diff --git a/.gitignore b/.gitignore
index f7df387..eaaa4db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,4 +36,10 @@ target
 *.out
 *.log
 *.tab
-*.sam
\ No newline at end of file
+*.sam
+
+# Linux build dir used by the solo Docker benchmark/diff (CARGO_TARGET_DIR)
+/target-linux/
+
+# amd64 Linux build dir for the benchmark container
+/target-amd64/
diff --git a/ROADMAP.md b/ROADMAP.md
index ea27b10..d004df5 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -27,7 +27,7 @@ Phase 1 (CLI) ✅
                                                                                        └→ Phase 17.B (per-mate seeding) [planned]
                                                               └→ Phase 17.1 (Log.final.out) ✅
                                                                    └→ Phase 17.2+ (features + polish)
-                                                              └→ Phase 14 (STARsolo) [DEFERRED]
+                                                              └→ Phase 14 (STARsolo) 🚧 14.1 done
 ```
 
 **Phase ordering rationale**: Threading (Phase 9) done first to establish parallel architecture.
@@ -55,7 +55,7 @@ Paired-end (Phase 8) builds on threaded infrastructure. GTF/junctions (Phase 7)
 | [15](docs-old/phase15_sam_tags.md) | SAM Tags + PE Fix | ✅ | 235 | NH/HI/AS/NM/nM/XS/jM/jI/MD, PE fix |
 | [16](docs-old/phase16_algorithm.md) | Algorithm Parity | ✅* | 268 | SE: **8613/8926 (0 STAR-only, 99.815% tie-adj)**, 2.2% splice; PE: **8390/8390 exact**, **99.883% tie-adj PE faithfulness**, 0 MAPQ inflate/deflate, 0 NH diffs (Phase G2) |
 | [17](docs-old/phase17_features.md) | Features + Polish | ✅* | 396 | Log.final.out, GeneCounts, TranscriptomeSAM, SJDB insertion, --outSAMattrRGline, --runRNGseed, combined-read PE seeding (Phase E2), scoreSeedBest (17.A), sorted BAM (17.2), outReadsUnmapped (17.4), outStd (17.6), PE chimeric (17.3), WithinBAM (17.11), GTF tag params (17.7), outBAMcompression+limitBAMsortRAM (17.9), chimeric Tier 1b soft-clip re-seed (12.2), chimeric Tier 3 residual re-seed (17.10) |
-| 14 | STARsolo | DEFERRED | — | Waiting for accuracy parity |
+| [14](docs-old/phase14_starsolo.md) | STARsolo (single-cell) | 🚧 In progress | 475 | **MVP done (14.1–14.4)**: 10x Gene count matrix end-to-end (barcode plumbing, CB correction, gene assignment, UMI dedup, raw matrix.mtx) |
 
 *Partially complete — see linked docs for sub-phase status.
 
@@ -308,6 +308,37 @@ See [docs-old/phase17_features.md](docs-old/phase17_features.md) for sub-phase t
 
 ---
 
-## Phase 14: STARsolo (Single-Cell) — DEFERRED
+## Phase 14: STARsolo (Single-Cell) — IN PROGRESS
 
-Waiting for accuracy parity (position agreement >99%).
+**Prerequisite met**: position agreement >99% (SE 99.815% tie-adj, PE 99.883%). Phase unblocked 2026-06-10.
+
+Single-cell quantification layered around the existing aligner: the cDNA read aligns through the normal SE path; a paired **barcode read** (R1 = cell barcode + UMI) is parsed, corrected against a whitelist, assigned to a gene, UMI-deduplicated, and emitted as a sparse per-cell count matrix. Target: faithful port of STARsolo (all features). See [docs-old/phase14_starsolo.md](docs-old/phase14_starsolo.md) for the full design and sub-phase tracking.
+
+| Sub-phase | Description | Status |
+|-----------|-------------|--------|
+| 14.1 | `--solo*` params + barcode-read input plumbing (`src/solo/`, CB/UMI extraction, SE dispatch) | ✅ Complete |
+| 14.2 | Whitelist load + CB correction (`--soloCBmatchWLtype`) + UMI checks | ✅ Complete |
+| 14.3 | Per-read gene assignment + CB/UMI threaded into the alignment loop | ✅ Complete |
+| 14.4 | UMI dedup + raw `matrix.mtx` (**MVP complete**) | ✅ Complete |
+| 14.CR | CellRanger 4/5-matching flags (`1MM_CR`, `MultiGeneUMI_CR`, `1MM_multi_Nbase_pseudocounts`, `CellRanger4` clip) | ✅ Complete |
+| 14.5 | `Summary.csv` / `Barcodes.stats` / `Features.stats` | ⬜ Planned |
+| 14.6 | Cell filtering (`--soloCellFilter`: CellRanger2.2, EmptyDrops_CR) | ⬜ Planned |
+| 14.7 | `CB`/`UB`/`GX`/`GN` SAM tags + `CB_samTagOut` | ⬜ Planned |
+| 14.8 | More features: GeneFull, SJ, Velocyto | ⬜ Planned |
+| 14.9 | Multi-gene resolution (`--soloMultiMappers`) | ⬜ Planned |
+| 14.10 | Other chemistries: CB_UMI_Complex, SmartSeq | ⬜ Planned |
+| 14.11 | Differential test harness vs STARsolo + synthetic integration tests | ⬜ Planned |
+
+**Phase 14.1** (2026-06-10): `SoloType` enum + 12 `--solo*` params in `src/params/mod.rs`; new `src/solo/mod.rs` (`SoloBarcodeLayout` geometry, `CellBarcode` CB/UMI extraction, `SoloReadReader` lockstep cDNA+barcode FASTQ reader); solo validation (2 read files, GTF for Gene/GeneFull, CB/UMI length); `run_single_pass` + `run_pass1` dispatch routes solo runs to the SE cDNA path (file 0). 447 lib tests (+6 solo), 0 clippy warnings.
+
+**Phase 14.2** (2026-06-11): new `src/solo/whitelist.rs` — faithful port of STAR's `SoloReadBarcode_getCBandUMI.cpp` read stage. 2-bit barcode packing (`seq[0]` high bits, N-detection: 0/1/>1), sorted-array whitelist load (plain/gz), `match_cb` (exact → single-N → 1MM enumeration) honoring `--soloCBmatchWLtype` (Exact/1MM/1MM_multi/…); multi-match reads record all candidate WL indices + mismatch quality (`CbMatch::Multi`) for the Phase 14.4 posterior; exact-match count table accumulated as the posterior prior; UMI checks (N → reject, homopolymer → reject); `CbMatchStats` with STAR's cbMatch categories. Params: `--soloCBmatchWLtype` validation, `solo_cb_match_type()` / `solo_cb_whitelist_path()` helpers, None-whitelist-requires-Exact rule, CBlen≤32 guard. 460 lib tests (+13 solo), 0 clippy warnings.
+
+**Phase 14.3** (2026-06-11): per-read gene assignment + barcode threading into the alignment loop. New `src/solo/gene.rs` — `SoloStrand` (`--soloStrand`), `assign_gene_se` (union of strand-filtered `overlapping_genes` across all loci → `Gene`/`NoFeature`/`Ambiguous`/`Unmapped`; multi-locus-same-gene stays unique). `src/solo/mod.rs` gains `SoloContext` (whitelist + gene model + stats + recorder, `build()` from params), `SoloRecorder` (thread-safe `SoloCountRecord` / deferred `SoloMultiRecord`), and `process_read` (CB match → UMI check → gene assign → record). New `align_reads_solo` loop in `lib.rs` reads cDNA + barcode in lockstep (`SoloReadReader`), aligns the cDNA, writes SAM/BAM, and collects per-cell records; `run_single_pass`/`run_two_pass` thread `solo_ctx`. 467 lib + 10 integration tests, 0 clippy warnings.
+
+**Phase 14.CR — CellRanger 4.x/5.x matching** (2026-06-12): implemented the STARsolo.md CellRanger-matching flag set faithfully from STAR source. `--soloUMIdedup 1MM_CR` (`umiArrayCorrect_CR`: each UMI corrected to its highest-count 1MM neighbor, non-transitive, count = distinct corrected). `--soloUMIfiltering MultiGeneUMI_CR` (keep the top-read-count gene of a multi-gene UMI) + `MultiGeneUMI`; `build_matrix` restructured to per-cell `umi → gene → readcount`. `--soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts` adds a +1 pseudocount to the CB posterior prior. `--clipAdapterType CellRanger4` (TSO 5' clip + polyA 3' trim, conservative no-op on adapter-free reads). All validated in params. Differential harness `test/solo_cellranger_diff.py` runs the full CellRanger flag set on both rustar-aligner and real STAR and compares decoded `{(barcode, gene_id): count}` matrices; committed cargo test `test_starsolo_cellranger_style_matrix` asserts the matrix (incl. 1MM_CR collapse) always.
+
+**Three-way benchmark** (see [docs-old/phase14_benchmark.md](docs-old/phase14_benchmark.md)): CellRanger 10.0.0 vs STARsolo 2.7.10b vs rustar-aligner on 10M reads of a real 5′ mouse 10x dataset (GRCm39-2024-A), all x86_64 in Docker. rustar produces a correct matrix (4.22M UMIs, exonic Gene, ~4% above STARsolo's 4.07M; CellRanger's 4.84M includes introns). After a buffered-I/O fix (raw-matrix write 1306s → 3s; barcodes.tsv was unbuffered), rustar's count is 670s vs STARsolo 152s / CellRanger 356s; index build 2801s (faster than STAR's 3626s under emulation). Peak RSS 37GB (index-dominated). `build_matrix` Step 1 (per-cell processing) bounds matrix-build memory.
+
+**Live verification — PASS:** rustar-aligner's `Gene/raw` matrix is **byte-identical to real STARsolo's** for the CellRanger-style run, confirmed deterministically (3/3 runs). The reference STAR (2.7.10b) and a Linux build of rustar-aligner run in a consistent Linux container (`test/Dockerfile.solodiff` + `test/solo_diff_docker.sh`, via colima — no Docker Desktop). This was necessary because STAR 2.7.11b reads 0 input reads on Apple-Silicon macOS (a known STAR/macOS bug, `nextChar=-1`). 479 lib + 11 integration tests, 0 clippy warnings.
+
+**Phase 14.4 — MVP COMPLETE** (2026-06-11): UMI deduplication + raw count-matrix output. New `src/solo/count.rs`: `UmiDedup` (`--soloUMIdedup`: Exact / NoDedup / 1MM_All [default, connected-components within Hamming-1] / 1MM_Directional / 1MM_Directional_UMItools, `dirCountAdd` 0/−1); deferred 1MM_multi CB resolution via STAR's count+quality posterior (weight = `exactCount·10^(−q/10)`, prior from `whitelist.exact_count_snapshot()`); `build_matrix` groups reads by (cell,gene), collapses UMIs, and `write_gene_matrix` writes `Solo.out/Gene/raw/{matrix.mtx, barcodes.tsv, features.tsv}` (MatrixMarket `nFeatures nBarcodes nEntries`, entries `gene+1 cell+1 count`, 1-based; CellRanger-v3 3-column features.tsv; whitelist-sorted barcodes.tsv). Wired into `align_reads` post-alignment. `--soloUMIdedup` validation in params. End-to-end test (`test_starsolo_gene_matrix`): 8 reads, one cell, two Hamming-distant UMI clouds → 2 deduped molecules → matrix `1 1 2`. **A working 10x Chromium Gene count matrix.** 475 lib + 10 integration tests, 0 clippy warnings.
diff --git a/docs-old/phase14_benchmark.md b/docs-old/phase14_benchmark.md
new file mode 100644
index 0000000..cdaf3ee
--- /dev/null
+++ b/docs-old/phase14_benchmark.md
@@ -0,0 +1,88 @@
+[← Back to ROADMAP](../ROADMAP.md) · [Phase 14](phase14_starsolo.md)
+
+# Phase 14 Benchmark: CellRanger vs STARsolo vs rustar-aligner
+
+Runtime + output-stats comparison of the three single-cell quantifiers on a real
+10x mouse dataset, run in one consistent Linux/x86_64 environment.
+
+## Setup
+
+- **Reference**: CellRanger mouse `refdata-gex-GRCm39-2024-A` (genome 2.79 Gb, 61
+  contigs, 33,696 genes). STAR + rustar build their indexes from the refdata
+  `fasta/genome.fa` + `genes/genes.gtf` (`--sjdbOverhang 89`); CellRanger uses
+  the refdata directly.
+- **Data**: 5k Mouse PBMCs, **5′ GEM-X** (SC5P-R2-v3); first **10,000,000 read
+  pairs** of the GEX library — identical reads for all three tools.
+- **Solo params** (CellRanger-matching, 5′): `--soloType CB_UMI_Simple`,
+  CB 16 / UMI 12, `--soloStrand Reverse`, whitelist `3M-5pgex-jan-2023`,
+  `--soloFeatures Gene`, `--soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts`,
+  `--soloUMIfiltering MultiGeneUMI_CR`, `--soloUMIdedup 1MM_CR`.
+- **Environment**: Docker (colima) on Apple-Silicon macOS, **everything x86_64
+  via Rosetta** (CellRanger is x86_64-only), 14 cores / 40 GB. All absolute
+  times are inflated ~2–3× by emulation; the *relative* picture holds.
+- **Tooling**: CellRanger 10.0.0, STAR 2.7.10b, rustar-aligner (this branch).
+  Driver: [`test/solo_bench.py`](../test/solo_bench.py) (each step under
+  `/usr/bin/time -v`), image [`test/Dockerfile.bench`](../test/Dockerfile.bench).
+
+## Results
+
+| Tool | Index build | Count (align+quant) | Peak RSS | Raw barcodes | Genes | Total UMIs |
+|------|------------:|--------------------:|---------:|-------------:|------:|-----------:|
+| **CellRanger 10.0.0** | (prebuilt) | 356 s | 12.5 GB | 161,465 | 17,258 | 4,843,682 |
+| **STARsolo 2.7.10b** | 3,626 s | 152 s | 30 GB | 143,490 | 15,675 | 4,067,946 |
+| **rustar-aligner** | 2,801 s | **670 s** | 37 GB | 156,258 | 16,278 | 4,219,582 |
+
+CellRanger reported: 3,858 cells, 599 median genes/cell, 88.5 % valid barcodes,
+58.5 % reads mapped to transcriptome.
+
+### Correctness
+
+On identical reads, rustar's raw matrix is in line with the references:
+**4,219,582 UMIs** (exonic `Gene`), ~4 % above STARsolo's 4,067,946 (also exonic
+`Gene`). CellRanger's 4,843,682 is higher because it counts **intronic** reads by
+default (`include-introns`), whereas `--soloFeatures Gene` is exonic-only.
+rustar's read-stage barcode match rate was **86 % exact** on this real data.
+
+### The buffered-I/O fix
+
+The first rustar count run took 1,774 s. A breakdown showed the raw-matrix write
+dominated:
+
+```
+                  before        after
+matrix write:    1,306 s   →       3 s     (~435×; byte-identical output)
+align (10M):       402 s   →     627 s     (unchanged logic; emulation variance)
+count total:     1,774 s   →     670 s
+```
+
+Cause: `write_barcodes` / `write_matrix_mtx` wrote to a raw `std::fs::File`
+(unbuffered) — one `write(2)` syscall per line, so `barcodes.tsv` (the full
+3,686,400-barcode whitelist) cost ~3.7M syscalls, amplified by Rosetta+virtiofs.
+Fix: wrap the writers in `BufWriter` + a no-alloc barcode unpack
+(`unpack_barcode_into`). The write dropped to ~3 s.
+
+## Notes & limitations
+
+- **Index build**: rustar (2,801 s) was *faster* than STARsolo (3,626 s) under
+  emulation; CellRanger ships a prebuilt index (its 356 s "count" includes the
+  internal STAR alignment + cell calling + full metrics).
+- **Memory**: rustar's 37 GB peak is dominated by the **loaded index (~27 GB:
+  5.4 B-entry SA for the 2.79 Gb genome)** plus the alignment working set — *not*
+  the matrix build (Step 1 per-cell `build_matrix` already bounds that). Reducing
+  the peak further is about the SA representation and alignment buffers, not the
+  matrix.
+- **Read count**: 10M (of ~200M total) keeps the run tractable and memory under
+  the 40 GB cap. Stats scale with depth (CellRanger called 3,858 cells at this
+  subsample vs the dataset's ~4,725).
+
+## Reproduce
+
+```bash
+brew install colima docker && colima start --cpu 14 --memory 40 --vm-type vz --vz-rosetta
+# build the amd64 image (colima can't build amd64 directly; run+commit a base):
+docker run --platform linux/amd64 --name b rust:1-bookworm \
+  bash -c "apt-get update -qq && apt-get install -y -qq rna-star python3 procps time"
+docker commit b rustar-bench-amd64 && docker rm -f b
+# then run test/solo_bench.py inside it with the ref/whitelist/fastqs mounted
+# (see test/solo_bench.py header for the full argument list).
+```
diff --git a/docs-old/phase14_starsolo.md b/docs-old/phase14_starsolo.md
new file mode 100644
index 0000000..230190b
--- /dev/null
+++ b/docs-old/phase14_starsolo.md
@@ -0,0 +1,324 @@
+[← Back to ROADMAP](../ROADMAP.md)
+
+# Phase 14: STARsolo (Single-Cell)
+
+**Status**: In progress — **MVP complete (14.1–14.4)**
+
+**Goal**: A faithful port of STARsolo — turn the aligner into a single-cell RNA-seq
+quantifier that matches STAR's `--soloType` output (count matrices, barcode/UMI
+correction, cell calling, SAM tags) as closely as the bulk aligner already
+matches STAR.
+
+**Prerequisite (met)**: position agreement >99% — SE 99.815% (tie-adjusted),
+PE 99.883%. Phase unblocked 2026-06-10.
+
+---
+
+## Architecture
+
+STARsolo is a **layer around** the existing aligner, not a change to it. The core
+alignment is untouched:
+
+```
+ readFilesIn[0] = cDNA read  ──► existing SE alignment ──► Transcript(s)
+ readFilesIn[1] = barcode read (R1: CB+UMI) ──► parse ──► correct vs whitelist
+                                                              │
+              Transcript + corrected CB + UMI ──► gene assignment (overlapping_genes)
+                                                              │
+                              collate per (CB, gene) ──► UMI dedup ──► count
+                                                              │
+                                            Solo.out/<Feature>/raw/matrix.mtx
+```
+
+Key reuse points already in the codebase:
+- `Transcript` (`src/align/transcript.rs`) carries `chr_idx`, `genome_start/end`,
+  `is_reverse`, `exons` — everything gene assignment needs.
+- `GeneAnnotation::overlapping_genes()` (`src/quant/mod.rs`) maps an alignment to
+  gene indices and is directly reusable for per-cell counting.
+- The SE parallel batch loop (`align_reads_single_end` in `src/lib.rs`) is where
+  per-read barcode info threads through to a per-cell accumulator.
+
+**Read-file convention** (matches STAR): `--readFilesIn cDNA_read barcode_read`.
+The cDNA read is file 0, the barcode read is file 1. A solo run therefore supplies
+two files but is a *single-end alignment* run.
+
+---
+
+## Sub-phase plan
+
+| Sub-phase | Description | Status |
+|-----------|-------------|--------|
+| 14.1 | `--solo*` params + barcode-read input plumbing | ✅ Complete |
+| 14.2 | Whitelist load + CB correction (`--soloCBmatchWLtype`) + UMI checks | ✅ Complete |
+| 14.3 | Per-read gene assignment + CB/UMI threaded into the alignment loop | ✅ Complete |
+| 14.4 | UMI dedup + raw `matrix.mtx` (**MVP complete**) | ✅ Complete |
+| 14.5 | `Summary.csv` / `Barcodes.stats` / `Features.stats` | ⬜ Planned |
+| 14.6 | Cell filtering (`filtered/` matrix) | ⬜ Planned |
+| 14.7 | `CB`/`UB`/`GX`/`GN` SAM tags + `CB_samTagOut` | ⬜ Planned |
+| 14.8 | More features: GeneFull, SJ, Velocyto | ⬜ Planned |
+| 14.9 | Multi-gene resolution (`--soloMultiMappers`) | ⬜ Planned |
+| 14.10 | Other chemistries: CB_UMI_Complex, SmartSeq | ⬜ Planned |
+| 14.11 | Differential test harness vs STARsolo + integration tests | ⬜ Planned |
+
+**MVP = 14.1–14.5**: a working 10x Chromium `Gene` count matrix.
+
+### Faithfulness risk notes
+- **Read ordering**: cDNA read is FIRST in `--readFilesIn`, barcode read second.
+- **CB correction** posterior math and the **`1MM_Directional`** UMI-graph collapse
+  are the two algorithms where byte-parity with STAR is fiddly — budget extra
+  differential-testing time there (14.2, 14.4).
+- **Matrix conventions**: MatrixMarket coordinate format, features × barcodes,
+  1-based indices — must match Cell Ranger / STARsolo layout exactly.
+
+---
+
+## Phase 14.1: Params + barcode-read plumbing ✅ (2026-06-10)
+
+**Goal**: Accept `--soloType` and the barcode geometry on the CLI, read the barcode
+read alongside the cDNA read, and extract CB+UMI — without yet counting.
+
+**Implementation**:
+
+1. **`src/params/mod.rs`** — `SoloType` enum (`None`, `CbUmiSimple` [alias
+   `Droplet`], `CbUmiComplex`, `CbSamTagOut`, `SmartSeq`) with `FromStr`/`Display`.
+   12 new parameters:
+   - `--soloType`, `--soloCBwhitelist`, `--soloCBstart` (1), `--soloCBlen` (16),
+     `--soloUMIstart` (17), `--soloUMIlen` (10), `--soloFeatures` (`Gene`),
+     `--soloUMIdedup` (`1MM_All`), `--soloCBmatchWLtype` (`1MM_multi`),
+     `--soloCellFilter`, `--soloOutFileNames`, `--soloStrand` (`Forward`).
+   - Helpers: `solo_enabled()`, `cdna_read_file()`, `barcode_read_file()`,
+     `solo_cb_whitelist_none()`.
+   - Validation: solo needs exactly 2 read files; `Gene`/`GeneFull` need a GTF;
+     CB/UMI length > 0 for `CB_UMI_Simple`.
+
+2. **`src/solo/mod.rs`** (new) —
+   - `SoloBarcodeLayout` — fixed-position geometry, 1-based starts converted to
+     0-based; `from_params`, `min_read_len`, `extract`.
+   - `CellBarcode` — encoded CB/UMI seq + raw Phred qualities; `cb_has_n`,
+     `umi_has_n`, `cb_string`, `umi_string`.
+   - `SoloReadReader` / `SoloRead` — lockstep reader over the cDNA and barcode
+     FASTQ files; `read_batch`; errors on length mismatch. `open_reader(params)`
+     factory.
+
+3. **`src/lib.rs`** — `mod solo;`; `run_single_pass` + `run_pass1` compute
+   `n_align_files = if solo { 1 } else { read_files_in.len() }` so a 2-file solo
+   run routes to the SE cDNA path; `is_paired` excludes solo.
+
+**Boundary**: 14.1 makes a solo run *parse and validate* and aligns the cDNA read
+(producing `Aligned.out.sam`). Barcodes are extracted by `SoloReadReader` but not
+yet threaded into the parallel alignment loop or counted — that begins in 14.2,
+where per-read barcode handling pairs naturally with whitelist correction.
+
+**Tests**: 6 new in `src/solo/mod.rs` (layout conversion, v2 extraction, too-short
+read, N-detection, reader pairing, length-mismatch error) + CLI validation smoke
+tests. 447 lib tests, 0 clippy warnings.
+
+**Files**: `src/params/mod.rs`, `src/solo/mod.rs` (new), `src/lib.rs`
+
+---
+
+## Phase 14.2: Whitelist load + CB correction ✅ (2026-06-11)
+
+**Goal**: Load the cell-barcode whitelist and match each read's CB to it exactly
+as STAR's read stage does, plus validate the UMI.
+
+**Reference**: STAR `source/SoloReadBarcode_getCBandUMI.cpp` (read stage). The
+multi-match *posterior* resolution lives in the collation stage, not here — see
+the boundary note below.
+
+**Implementation** (`src/solo/whitelist.rs`, new):
+
+- **Packing** — `pack_barcode` 2-bit packs an encoded barcode into a `u64` with
+  `seq[0]` in the high bits (matching `convertNuclStrToInt64`). N-handling:
+  `NoN(u64)` / `OneN{packed,pos}` / `ManyN`. `unpack_barcode` reverses it.
+- **`CbMatchType`** — decodes `--soloCBmatchWLtype` into STAR's `mm1` /
+  `mm1_multi` / `mm1_multi_nbase` / `pseudocounts` flags (Exact, 1MM, 1MM_multi
+  [default], `_pseudocounts`, `_Nbase_pseudocounts`).
+- **`CbWhitelist`** — `List` (sorted unique packed `Vec<u64>` + original-order
+  index for `barcodes.tsv` + per-index `exact_counts` atomics) or `NoWhitelist`.
+  `load()` reads plain or gzip, validates equal lengths, rejects N-containing
+  whitelist entries.
+- **`match_cb`** follows STAR exactly: exact binary search (→ `Exact`, bumps the
+  exact-count prior); else single-N substitution (all 4 bases at the N position)
+  or 1MM enumeration (every position × 3 alternate bases). One candidate →
+  `Corrected`; >1 → `Multi(candidates)` when the multi flag is set (records WL
+  index + mismatch position + quality for later resolution) else
+  `MultMatchRejected`. Rejections map to STAR's cbMatch codes (`NoMatch` -1,
+  `NinCb` -2, `MultMatchRejected` -3).
+- **`check_umi`** — any N → `NinUmi` (-23); exact homopolymer → `Homopolymer`
+  (-24); else `Ok(packed)`.
+- **`CbMatchStats`** — atomic counters for STAR's cbMatch categories.
+
+**Params** (`src/params/mod.rs`): `--soloCBmatchWLtype` validity check;
+`solo_cb_match_type()` and `solo_cb_whitelist_path()` helpers; rules that
+`--soloCBwhitelist None` requires `Exact`, and `--soloCBlen ≤ 32`.
+
+**Boundary**: the count + quality **posterior** that resolves `CbMatch::Multi`
+into one corrected barcode needs the *global* `exact_counts` table, which is only
+complete after all reads are processed — so it is a collation-stage operation
+deferred to Phase 14.4. Phase 14.2 records the candidates (exactly as STAR's
+`cbMatchString`) and accumulates the prior. The matcher is also not yet wired
+into the alignment loop; that happens in 14.3 alongside gene assignment.
+
+**Tests**: 13 new in `src/solo/whitelist.rs` (pack roundtrip, N-detection, exact
+match + count, 1MM correction, ambiguous multi vs reject, no-match, single-N
+correction, many-N reject, Exact-only mode, UMI checks, length-mismatch error,
+gzip load, match-type parsing) + CLI validation smoke tests. 460 lib tests,
+0 clippy warnings.
+
+**Files**: `src/solo/whitelist.rs` (new), `src/solo/mod.rs`, `src/params/mod.rs`
+
+---
+
+## Phase 14.3: Gene assignment + barcode threading ✅ (2026-06-11)
+
+**Goal**: Assign each cDNA alignment to a gene and wire CB/UMI through the
+alignment loop so per-cell (CB, UMI, gene) records are collected.
+
+**Gene assignment** (`src/solo/gene.rs`, new):
+- `SoloStrand` (`--soloStrand`: Forward [default] / Reverse / Unstranded).
+- `assign_gene_se(transcripts, gene_ann, strand)` — the read's gene set is the
+  UNION of strand-filtered `GeneAnnotation::overlapping_genes` across ALL its
+  alignments. Exactly one gene → `Gene(idx)`; zero → `NoFeature`; >1 →
+  `Ambiguous`; no transcripts → `Unmapped`. A multi-locus read whose loci all
+  fall in one gene is therefore still gene-unique (matching STARsolo's default
+  `--soloMultiMappers Unique`, unlike `quantMode GeneCounts` which drops every
+  multimapper).
+
+**Context + recorder** (`src/solo/mod.rs`):
+- `SoloContext` — `build(params, genome)` loads the whitelist and builds the
+  gene model from `--sjdbGTFfile`; bundles layout + whitelist + match type +
+  strand + `CbMatchStats` + `SoloRecorder`, shared as an `Arc` across threads.
+- `SoloRecorder` — thread-safe sink for `SoloCountRecord{cb, umi, gene}` plus
+  deferred `SoloMultiRecord` (unresolved 1MM_multi CBs, resolved in 14.4).
+- `SoloContext::process_read` — CB match → UMI check → gene assign, recording
+  stats and producing a record only when all three succeed.
+
+**Loop** (`src/lib.rs`): new `align_reads_solo` reads cDNA (file 0) + barcode
+(file 1) in lockstep via `SoloReadReader`, aligns the cDNA exactly like the SE
+path (`align_read` → `build_alignment_records`), writes SAM/BAM, runs
+`process_read` per read, and appends records to the recorder in the sequential
+write phase. `run_single_pass` dispatches solo runs here; `run_single_pass` /
+`run_two_pass` thread `solo_ctx`. A run-end summary logs the barcode-match stats
+and record count.
+
+**Boundary / limitations**: the solo loop is single-pass and does not yet emit
+BySJout / chimeric / transcriptome-SAM side outputs (not part of the MVP). The
+count matrix (`raw/matrix.mtx` + `barcodes.tsv` + `features.tsv`) and 1MM_multi
+posterior resolution are Phase 14.4. `--soloStrand` validated in params.
+
+**Tests**: 7 new gene-assignment unit tests + end-to-end
+`test_starsolo_gene_assignment` (synthetic genome + GTF + whitelist: 16 cDNA
+reads → 16 exact CB matches → 16 resolved (CB,UMI,gene) records). 467 lib + 10
+integration tests, 0 clippy warnings.
+
+**Files**: `src/solo/gene.rs` (new), `src/solo/mod.rs`, `src/params/mod.rs`,
+`src/lib.rs`, `tests/alignment_features.rs`
+
+---
+
+## Phase 14.4: UMI dedup + raw matrix — MVP COMPLETE ✅ (2026-06-11)
+
+**Goal**: Collapse UMIs and write the raw per-cell count matrix — the first
+usable single-cell output.
+
+**Reference**: STAR `SoloFeature_collapseUMIall.cpp` (dedup),
+`SoloReadFeature_inputRecords.cpp` (CB multi-resolution),
+`SoloFeature_outputResults.cpp` (matrix format).
+
+**Implementation** (`src/solo/count.rs`, new):
+
+- **`UmiDedup`** (`--soloUMIdedup`): `Exact` (distinct UMIs), `NoDedup` (reads),
+  `1MM_All` (default — connected components where any two UMIs within Hamming-1
+  merge transitively, via union-find), `1MM_Directional` / `_UMItools`
+  (`count_hub ≥ 2·count_leaf + dirCountAdd`, `dirCountAdd` 0 / −1).
+- **Deferred 1MM_multi CB resolution** — `resolve_multi_cb` picks the candidate
+  maximizing STAR's posterior weight `exactCount[cand] · 10^(−q/10)` (prior =
+  `whitelist.exact_count_snapshot()`, `q` = mismatch-position Phred); rejects
+  when no candidate has positive weight.
+- **`build_matrix`** groups reads by `(cell, gene)` into UMI→multiplicity maps
+  (resolved multi-CB records folded in), then dedups each.
+- **`write_gene_matrix`** writes `Solo.out/Gene/raw/`:
+  - `matrix.mtx` — `%%MatrixMarket matrix coordinate integer general`; dims
+    `nFeatures nBarcodes nEntries`; entries `gene+1 cell+1 count` (1-based),
+    iterated in cell-column order.
+  - `features.tsv` — `gene_id <TAB> gene_id <TAB> Gene Expression` (CellRanger
+    v3; no gene names available so id is repeated).
+  - `barcodes.tsv` — full whitelist in sorted order (matrix column order).
+
+Wired into `align_reads` after alignment. `--soloUMIdedup` validated in params.
+
+**Known approximations to revisit** (differential testing, 14.11): the
+`1MM_Directional` absorption is a greedy hub model (faithful default path is
+`1MM_All`, which is exact); the CB-posterior acceptance uses no `cbMinP`
+threshold (always takes the argmax); `barcodes.tsv` uses sorted (not 10x-file)
+order; `--soloCBwhitelist None` matrix output is not yet supported.
+
+**Tests**: 8 new unit tests in `count.rs` (each dedup method incl. transitive
+chains and the directional thresholds; multi-CB posterior) + end-to-end
+`test_starsolo_gene_matrix` (8 reads, one cell, two Hamming-distant UMI clouds →
+2 deduped molecules → matrix `1 1 2`, validated `features.tsv` / `barcodes.tsv`).
+475 lib + 10 integration tests, 0 clippy warnings.
+
+**Files**: `src/solo/count.rs` (new), `src/solo/mod.rs`, `src/params/mod.rs`,
+`src/lib.rs`, `tests/alignment_features.rs`
+
+---
+
+## Phase 14.CR: CellRanger 4.x/5.x matching — VERIFIED vs real STARsolo ✅ (2026-06-12)
+
+**Goal**: Support the [STARsolo CellRanger-matching flag set](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#matching-cellranger-4xx-and-5xx-results)
+and prove the output matches real STARsolo.
+
+**Flags** (`--clipAdapterType CellRanger4 --outFilterScoreMin 30
+--soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts --soloUMIfiltering MultiGeneUMI_CR
+--soloUMIdedup 1MM_CR`), implemented from STAR source:
+
+- **`1MM_CR`** (`src/solo/count.rs::cellranger_1mm`) — port of STAR
+  `umiArrayCorrect_CR`: UMIs sorted ascending by `(count, umi)`, each corrected
+  to its highest-count 1MM neighbor, **non-transitive** (points to the neighbor's
+  raw UMI), count = distinct corrected UMIs.
+- **`MultiGeneUMI_CR`** (`filter_multi_gene_umi`) — keep the top-read-count gene
+  of a multi-gene UMI. `build_matrix` restructured to per-cell
+  `umi → gene → read_count` so filtering precedes dedup.
+- **`1MM_multi_Nbase_pseudocounts`** — +1 pseudocount on the CB posterior prior
+  (`resolve_multi_cb`).
+- **`CellRanger4` clip** (`src/solo/mod.rs::clip_adapter_cr4`) — TSO 5' clip +
+  polyA 3' trim, conservative (no-op on adapter-free reads), applied in
+  `align_reads_solo` before fixed Nbases clipping.
+
+All four validated in `params.rs`.
+
+**Differential test** (`test/solo_cellranger_diff.py`): generates a synthetic 10x
+dataset (two 2-exon genes, whitelist, cDNA + barcode reads with a planted 1MM
+UMI pair), runs the full CellRanger flag set on BOTH rustar-aligner and real
+STAR, and compares the decoded `{(barcode, gene_id): count}` matrices.
+
+**Result — byte-identical match, 3/3 deterministic:**
+```
+(AAAACCCCGGGGTTTT, GENEA) = 2   # 1MM_CR collapsed M(x5)+M-1mm(x1) -> 1, +N(x3) -> 2
+(AAAACCCCGGGGTTTT, GENEB) = 1
+(ACACACACGTGTGTGT, GENEA) = 1
+```
+
+**Why a container**: STAR 2.7.11b reads 0 input reads on Apple-Silicon macOS (a
+known STAR/macOS bug — `nextChar=-1` immediate EOF — present in both the homebrew
+bottle and a from-source build). The reference therefore runs in a Linux
+container (`test/Dockerfile.solodiff` — Debian + `rna-star` 2.7.10b + Rust),
+driven by `test/solo_diff_docker.sh` via colima (no Docker Desktop needed). On a
+host with a working STAR, `python3 test/solo_cellranger_diff.py` runs it directly.
+
+A committed cargo test (`test_starsolo_cellranger_style_matrix`) asserts the same
+CellRanger-style matrix (including the 1MM_CR collapse) without needing STAR, and
+each CellRanger algorithm has unit tests in `src/solo/count.rs`.
+
+---
+
+## MVP status
+
+Phases 14.1–14.4 deliver a working **10x Chromium `Gene`** quantifier:
+`--soloType CB_UMI_Simple --soloCBwhitelist <wl> --soloFeatures Gene
+--sjdbGTFfile <gtf> --readFilesIn cDNA.fq barcode.fq` aligns the cDNA reads and
+writes `Solo.out/Gene/raw/{matrix.mtx, barcodes.tsv, features.tsv}`. Remaining
+phases (14.5–14.11) add stats files, cell filtering, SAM tags, more features,
+multi-gene resolution, other chemistries, and the differential-test harness.
diff --git a/src/align/read_align.rs b/src/align/read_align.rs
index 89ea66b..077d686 100644
--- a/src/align/read_align.rs
+++ b/src/align/read_align.rs
@@ -1505,7 +1505,7 @@ mod tests {
         }
 
         let genome = Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real: 1,
diff --git a/src/align/score.rs b/src/align/score.rs
index 7790fa9..83e5842 100644
--- a/src/align/score.rs
+++ b/src/align/score.rs
@@ -634,7 +634,7 @@ mod tests {
         }
 
         Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real: 1,
diff --git a/src/align/seed.rs b/src/align/seed.rs
index 7480756..31366ff 100644
--- a/src/align/seed.rs
+++ b/src/align/seed.rs
@@ -471,7 +471,7 @@ fn compare_seq_to_genome(
             return (match_len, true);
         }
 
-        let genome_base = index.genome.sequence[genome_idx];
+        let genome_base = index.genome.sequence.base(genome_idx);
 
         if genome_base >= 5 {
             // Padding character — STAR returns comp_res > 0 (read > genome)
diff --git a/src/align/stitch.rs b/src/align/stitch.rs
index c8ef6cf..b5fe2eb 100644
--- a/src/align/stitch.rs
+++ b/src/align/stitch.rs
@@ -3019,7 +3019,7 @@ mod tests {
         }
 
         let genome = Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real: 1,
@@ -3141,7 +3141,7 @@ mod tests {
         }
 
         let genome = Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real: 1,
diff --git a/src/bin/emptydrops.rs b/src/bin/emptydrops.rs
new file mode 100644
index 0000000..bbe1ef8
--- /dev/null
+++ b/src/bin/emptydrops.rs
@@ -0,0 +1,374 @@
+//! Standalone EmptyDrops_CR cell caller (Rust port of STAR
+//! `SoloFeature_emptyDrops_CR.cpp` / CellRanger's EmptyDrops variant).
+//!
+//! Reads a raw count matrix (MatrixMarket `matrix.mtx` [.gz] genes×cells +
+//! `barcodes.tsv`/`features.tsv`) and writes the called cells:
+//!   - guaranteed cells from the CellRanger-2.2 knee, plus
+//!   - extra cells whose expression profile is significantly different from the
+//!     ambient RNA profile (multinomial Monte-Carlo test, Benjamini-Hochberg).
+//!
+//! Output: `<out>/barcodes.tsv` (called cells) + `<out>/cells.txt` (one called
+//! barcode per line) and a `<out>/emptydrops.json` summary.
+//!
+//! Usage:
+//!   emptydrops --raw <raw_dir> --out <dir> [--seed N] [--fdr 0.01] [--sim-n 10000]
+//!
+//! Defaults mirror STAR `--soloCellFilter EmptyDrops_CR 3000 0.99 10 45000 90000 500 0.01 20000`.
+
+use std::fs::File;
+use std::io::{BufRead, BufReader, BufWriter, Read, Write};
+use std::path::{Path, PathBuf};
+
+use flate2::read::GzDecoder;
+use rand::SeedableRng;
+use rand::distr::{Distribution, weighted::WeightedIndex};
+use rand::rngs::StdRng;
+
+struct Args {
+    raw: PathBuf,
+    out: PathBuf,
+    seed: u64,
+    fdr: f64,
+    sim_n: usize,
+    n_expected: usize,
+    max_percentile: f64,
+    max_min_ratio: f64,
+    ind_min: usize,
+    ind_max: usize,
+    umi_min: u64,
+    umi_min_frac_median: f64,
+    cand_max_n: usize,
+}
+
+fn parse_args() -> Args {
+    let mut a = Args {
+        raw: PathBuf::new(),
+        out: PathBuf::new(),
+        seed: 19_760_110,
+        fdr: 0.01,
+        sim_n: 10_000,
+        n_expected: 3000,
+        max_percentile: 0.99,
+        max_min_ratio: 10.0,
+        ind_min: 45_000,
+        ind_max: 90_000,
+        umi_min: 500,
+        umi_min_frac_median: 0.01,
+        cand_max_n: 20_000,
+    };
+    let mut it = std::env::args().skip(1);
+    while let Some(k) = it.next() {
+        let mut v = || it.next().expect("missing value");
+        match k.as_str() {
+            "--raw" => a.raw = PathBuf::from(v()),
+            "--out" => a.out = PathBuf::from(v()),
+            "--seed" => a.seed = v().parse().unwrap(),
+            "--fdr" => a.fdr = v().parse().unwrap(),
+            "--sim-n" => a.sim_n = v().parse().unwrap(),
+            "--n-expected" => a.n_expected = v().parse().unwrap(),
+            "--cand-max-n" => a.cand_max_n = v().parse().unwrap(),
+            "--ind-min" => a.ind_min = v().parse().unwrap(),
+            "--ind-max" => a.ind_max = v().parse().unwrap(),
+            "--umi-min" => a.umi_min = v().parse().unwrap(),
+            other => panic!("unknown arg {other}"),
+        }
+    }
+    assert!(!a.raw.as_os_str().is_empty(), "--raw required");
+    assert!(!a.out.as_os_str().is_empty(), "--out required");
+    a
+}
+
+fn find(d: &Path, base: &str) -> PathBuf {
+    for c in [base.to_string(), format!("{base}.gz")] {
+        let p = d.join(&c);
+        if p.exists() {
+            return p;
+        }
+    }
+    panic!("{base}[.gz] not found in {}", d.display());
+}
+
+fn reader(p: &Path) -> Box<dyn BufRead> {
+    let f = File::open(p).unwrap();
+    if p.extension().is_some_and(|e| e == "gz") {
+        Box::new(BufReader::new(GzDecoder::new(f)))
+    } else {
+        Box::new(BufReader::new(f))
+    }
+}
+
+fn read_lines_first_col(p: &Path) -> Vec<String> {
+    reader(p)
+        .lines()
+        .map(|l| l.unwrap().split('\t').next().unwrap().trim().to_string())
+        .collect()
+}
+
+/// Per-cell sparse profile: (gene_idx, count). Plus per-cell total.
+struct Matrix {
+    n_genes: usize,
+    barcodes: Vec<String>,
+    cell_profiles: Vec<Vec<(u32, u32)>>,
+    totals: Vec<u64>,
+}
+
+fn load_matrix(raw: &Path) -> Matrix {
+    let barcodes = read_lines_first_col(&find(raw, "barcodes.tsv"));
+    let genes = read_lines_first_col(&find(raw, "features.tsv"));
+    let n_genes = genes.len();
+    let n_cells = barcodes.len();
+
+    // MatrixMarket: skip % header, then "nGenes nCells nnz", then "gene cell count".
+    let mut rd = reader(&find(raw, "matrix.mtx"));
+    let mut buf = String::new();
+    // header
+    loop {
+        buf.clear();
+        rd.read_line(&mut buf).unwrap();
+        if !buf.starts_with('%') {
+            break;
+        }
+    }
+    let mut cell_profiles: Vec<Vec<(u32, u32)>> = vec![Vec::new(); n_cells];
+    let mut totals = vec![0u64; n_cells];
+    let mut line = String::new();
+    let mut content = String::new();
+    rd.read_to_string(&mut content).unwrap();
+    for l in content.lines() {
+        line.clear();
+        let mut p = l.split_whitespace();
+        let g: usize = match p.next() {
+            Some(x) => x.parse().unwrap(),
+            None => continue,
+        };
+        let c: usize = p.next().unwrap().parse().unwrap();
+        let v: u64 = p.next().unwrap().parse::<f64>().unwrap() as u64;
+        if v == 0 {
+            continue;
+        }
+        let gi = g - 1;
+        let ci = c - 1;
+        cell_profiles[ci].push((gi as u32, v as u32));
+        totals[ci] += v;
+    }
+    Matrix {
+        n_genes,
+        barcodes,
+        cell_profiles,
+        totals,
+    }
+}
+
+/// CellRanger-2.2 knee: number of guaranteed cells (top barcodes by total).
+fn knee_n_cells(sorted_desc: &[u64], n_expected: usize, max_pct: f64, max_min_ratio: f64) -> usize {
+    if sorted_desc.is_empty() {
+        return 0;
+    }
+    let idx = ((n_expected as f64 * (1.0 - max_pct)).round() as usize).min(sorted_desc.len() - 1);
+    let robust_max = sorted_desc[idx] as f64;
+    let thr = robust_max / max_min_ratio;
+    sorted_desc.iter().take_while(|&&c| c as f64 >= thr).count()
+}
+
+fn main() {
+    let a = parse_args();
+    eprintln!("emptydrops: loading {}", a.raw.display());
+    let m = load_matrix(&a.raw);
+    let n_cells = m.totals.len();
+
+    // Rank barcodes by total UMI, descending (stable by index for ties).
+    let mut order: Vec<usize> = (0..n_cells).filter(|&i| m.totals[i] > 0).collect();
+    order.sort_by(|&i, &j| m.totals[j].cmp(&m.totals[i]).then(i.cmp(&j)));
+    let sorted_desc: Vec<u64> = order.iter().map(|&i| m.totals[i]).collect();
+
+    // (1) Guaranteed cells from the CR2.2 knee.
+    let n_simple = knee_n_cells(
+        &sorted_desc,
+        a.n_expected,
+        a.max_percentile,
+        a.max_min_ratio,
+    );
+    eprintln!("emptydrops: {n_simple} guaranteed cells from CR2.2 knee");
+
+    // (2) Ambient profile from rank [ind_min, ind_max).
+    let mut amb = vec![0f64; m.n_genes];
+    let mut amb_total = 0f64;
+    for &cell in order
+        .iter()
+        .skip(a.ind_min)
+        .take(a.ind_max.saturating_sub(a.ind_min))
+    {
+        for &(g, c) in &m.cell_profiles[cell] {
+            amb[g as usize] += c as f64;
+            amb_total += c as f64;
+        }
+    }
+    if amb_total == 0.0 {
+        eprintln!("emptydrops: empty ambient range — falling back to knee-only");
+        write_out(&a, &m, &order[..n_simple], n_simple, 0);
+        return;
+    }
+    // Good-Turing P0 (unseen mass) distributed over zero-count genes; seen genes
+    // get proportional mass scaled by (1 - P0). Approximates STAR's SGT.
+    let n1 = amb.iter().filter(|&&x| (x - 1.0).abs() < 0.5).count() as f64;
+    let p0 = (n1 / amb_total).clamp(1e-12, 0.5);
+    let n_zero = amb.iter().filter(|&&x| x == 0.0).count().max(1) as f64;
+    let amb_prob: Vec<f64> = amb
+        .iter()
+        .map(|&x| {
+            if x > 0.0 {
+                (1.0 - p0) * x / amb_total
+            } else {
+                p0 / n_zero
+            }
+        })
+        .collect();
+    let amb_logp: Vec<f64> = amb_prob.iter().map(|&p| p.max(1e-300).ln()).collect();
+
+    // (3) Candidate barcodes: rank >= n_simple, total >= minUMI, up to cand_max_n.
+    let median_top = if n_simple >= 2 {
+        sorted_desc[n_simple / 2]
+    } else if !sorted_desc.is_empty() {
+        sorted_desc[0]
+    } else {
+        0
+    };
+    let min_umi = a
+        .umi_min
+        .max((a.umi_min_frac_median * median_top as f64) as u64);
+    let mut cands: Vec<usize> = Vec::new();
+    for &cell in order.iter().skip(n_simple).take(a.cand_max_n) {
+        if m.totals[cell] < min_umi {
+            break;
+        }
+        cands.push(cell);
+    }
+    eprintln!(
+        "emptydrops: {} candidates (minUMI={min_umi}); running {} Monte-Carlo sims",
+        cands.len(),
+        a.sim_n
+    );
+    if cands.is_empty() {
+        write_out(&a, &m, &order[..n_simple], n_simple, 0);
+        return;
+    }
+
+    // logFactorial up to the largest candidate total.
+    let max_count = cands.iter().map(|&c| m.totals[c]).max().unwrap() as usize;
+    let mut log_fac = vec![0f64; max_count + 1];
+    for i in 2..=max_count {
+        log_fac[i] = log_fac[i - 1] + (i as f64).ln();
+    }
+
+    // Observed multinomial log-prob per candidate.
+    let obs_logp: Vec<f64> = cands
+        .iter()
+        .map(|&cell| {
+            let total = m.totals[cell] as usize;
+            let mut s = log_fac[total];
+            for &(g, c) in &m.cell_profiles[cell] {
+                s -= log_fac[c as usize];
+                s += c as f64 * amb_logp[g as usize];
+            }
+            s
+        })
+        .collect();
+
+    // (4/5) Monte Carlo: simulate sim_n barcodes from the ambient multinomial,
+    // recording the running log-prob at every count up to max_count. Each
+    // candidate of total t is compared against sim[*][t].
+    let nonzero: Vec<usize> = (0..m.n_genes).filter(|&g| amb_prob[g] > 0.0).collect();
+    let weights: Vec<f64> = nonzero.iter().map(|&g| amb_prob[g]).collect();
+    let dist = WeightedIndex::new(&weights).unwrap();
+    let mut rng = StdRng::seed_from_u64(a.seed);
+
+    // For each count t, collect the sim log-probs (so we can compare per candidate).
+    // Memory: sim_n * (max_count+1) f64 — fine for ~10k * a few-thousand.
+    let mut sim_at: Vec<Vec<f64>> = vec![Vec::with_capacity(a.sim_n); max_count + 1];
+    let mut curr = vec![0u32; m.n_genes];
+    for _ in 0..a.sim_n {
+        for v in curr.iter_mut() {
+            *v = 0;
+        }
+        let mut lp = 0f64;
+        sim_at[0].push(0.0);
+        #[allow(clippy::needless_range_loop)] // ic is both index and multinomial term
+        for ic in 1..=max_count {
+            let gi = nonzero[dist.sample(&mut rng)];
+            curr[gi] += 1;
+            lp += amb_logp[gi] + (ic as f64).ln() - (curr[gi] as f64).ln();
+            sim_at[ic].push(lp);
+        }
+    }
+
+    // p-value: fraction of sims with LOWER log-prob than observed (more extreme).
+    let mut pvals: Vec<(usize, f64)> = cands
+        .iter()
+        .enumerate()
+        .map(|(i, &cell)| {
+            let t = m.totals[cell] as usize;
+            let obs = obs_logp[i];
+            let n_lower = sim_at[t].iter().filter(|&&sp| sp < obs).count();
+            let p = (1 + n_lower) as f64 / (1 + a.sim_n) as f64;
+            (i, p)
+        })
+        .collect();
+
+    // (6) Benjamini-Hochberg.
+    pvals.sort_by(|x, y| x.1.partial_cmp(&y.1).unwrap());
+    let n = pvals.len() as f64;
+    let mut padj = vec![0f64; pvals.len()];
+    for (rank, &(_, p)) in pvals.iter().enumerate() {
+        padj[rank] = (p * n / (rank + 1) as f64).min(1.0);
+    }
+    for i in (0..padj.len() - 1).rev() {
+        padj[i] = padj[i].min(padj[i + 1]);
+    }
+
+    // Called cells = guaranteed + candidates with padj <= FDR.
+    let mut called: Vec<usize> = order[..n_simple].to_vec();
+    let mut extra = 0usize;
+    for (rank, &(ci, _)) in pvals.iter().enumerate() {
+        if padj[rank] <= a.fdr {
+            called.push(cands[ci]);
+            extra += 1;
+        }
+    }
+    eprintln!("emptydrops: {extra} extra cells (FDR<={})", a.fdr);
+    write_out(&a, &m, &called, n_simple, extra);
+}
+
+fn write_out(a: &Args, m: &Matrix, called: &[usize], n_simple: usize, extra: usize) {
+    std::fs::create_dir_all(&a.out).unwrap();
+    // Stable order: by descending total then barcode.
+    let mut cells: Vec<usize> = called.to_vec();
+    cells.sort_by(|&i, &j| {
+        m.totals[j]
+            .cmp(&m.totals[i])
+            .then(m.barcodes[i].cmp(&m.barcodes[j]))
+    });
+    cells.dedup();
+
+    let mut bc = BufWriter::new(File::create(a.out.join("barcodes.tsv")).unwrap());
+    let mut cl = BufWriter::new(File::create(a.out.join("cells.txt")).unwrap());
+    for &c in &cells {
+        writeln!(bc, "{}", m.barcodes[c]).unwrap();
+        writeln!(cl, "{}", m.barcodes[c]).unwrap();
+    }
+    let summary = format!(
+        "{{\"n_cells\": {}, \"n_guaranteed\": {}, \"n_emptydrops_extra\": {}, \"fdr\": {}, \"sim_n\": {}}}\n",
+        cells.len(),
+        n_simple,
+        extra,
+        a.fdr,
+        a.sim_n
+    );
+    std::fs::write(a.out.join("emptydrops.json"), &summary).unwrap();
+    println!(
+        "EmptyDrops_CR: {} cells ({} guaranteed + {} EmptyDrops) -> {}",
+        cells.len(),
+        n_simple,
+        extra,
+        a.out.display()
+    );
+}
diff --git a/src/chimeric/detect.rs b/src/chimeric/detect.rs
index 95ae396..de2d445 100644
--- a/src/chimeric/detect.rs
+++ b/src/chimeric/detect.rs
@@ -993,7 +993,7 @@ mod tests {
         let n_genome = chr_pad * 2;
         let sequence = vec![0u8; 2 * n_genome as usize];
         Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real: 2,
diff --git a/src/chimeric/output.rs b/src/chimeric/output.rs
index 9da093a..d198dc7 100644
--- a/src/chimeric/output.rs
+++ b/src/chimeric/output.rs
@@ -434,7 +434,7 @@ mod tests {
     fn make_genome_2chr() -> crate::genome::Genome {
         use crate::genome::Genome;
         Genome {
-            sequence: vec![0u8; 2048],
+            sequence: vec![0u8; 2048].into(),
             n_genome: 1024,
             n_genome_real: 1024,
             n_chr_real: 2,
diff --git a/src/chimeric/score.rs b/src/chimeric/score.rs
index d703150..c84aa83 100644
--- a/src/chimeric/score.rs
+++ b/src/chimeric/score.rs
@@ -71,8 +71,8 @@ fn extract_motif(
     }
 
     let genome_idx = (chr_start + extract_pos) as usize;
-    let b1 = genome.sequence.get(genome_idx).copied().unwrap_or(4);
-    let b2 = genome.sequence.get(genome_idx + 1).copied().unwrap_or(4);
+    let b1 = genome.sequence.get(genome_idx).unwrap_or(4);
+    let b2 = genome.sequence.get(genome_idx + 1).unwrap_or(4);
 
     // Convert to bases
     let mut motif = vec![base_to_char(b1), base_to_char(b2)];
@@ -127,8 +127,8 @@ pub fn calculate_repeat_length(
             break;
         }
 
-        let d_base = genome.sequence.get(d_pos as usize).copied().unwrap_or(4);
-        let a_base = genome.sequence.get(a_pos as usize).copied().unwrap_or(4);
+        let d_base = genome.sequence.get(d_pos as usize).unwrap_or(4);
+        let a_base = genome.sequence.get(a_pos as usize).unwrap_or(4);
 
         if d_base == a_base && d_base < 4 {
             // Only count ACGT, not N
@@ -171,7 +171,7 @@ mod tests {
 
     fn mock_genome_with_sequence(seq: Vec<u8>) -> Genome {
         Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: 100,
             n_genome_real: 100,
             n_chr_real: 1,
diff --git a/src/genome/mod.rs b/src/genome/mod.rs
index ca08848..8cb6938 100644
--- a/src/genome/mod.rs
+++ b/src/genome/mod.rs
@@ -10,6 +10,102 @@ use fasta::parse_fasta_files;
 /// STAR's genome spacing character (used for inter-chromosome padding).
 const GENOME_SPACING_CHAR: u8 = 5;
 
+/// Backing storage for a genome's `[forward | reverse-complement]` sequence.
+///
+/// `Owned` is the full `2*n_genome` byte buffer built at genomeGenerate time
+/// (it is the only variant that supports slicing/mutation). `Mapped` is a
+/// read-only memory map of the on-disk `Genome` file, which holds **only the
+/// forward strand** (`n_genome` bytes): the reverse-complement half is computed
+/// on access in [`GenomeSeq::base`], so loading never materializes the ~`n`-byte
+/// RC buffer and the forward bytes are reclaimable file-backed pages rather than
+/// an anonymous `Vec`. `Arc<Mmap>` keeps `Genome::clone` (two-pass) cheap.
+#[derive(Clone)]
+pub enum GenomeSeq {
+    Owned(Vec<u8>),
+    Mapped {
+        fwd: std::sync::Arc<memmap2::Mmap>,
+        n_genome: usize,
+    },
+}
+
+impl GenomeSeq {
+    /// Base at absolute position `i` — forward `[0, n_genome)` or
+    /// reverse-complement `[n_genome, 2*n_genome)`. For the `Mapped` RC half,
+    /// `base(i) = complement(forward[2*n_genome - 1 - i])`, exactly the bytes
+    /// the owned builder writes into the second half.
+    #[inline]
+    pub fn base(&self, i: usize) -> u8 {
+        match self {
+            GenomeSeq::Owned(v) => v[i],
+            GenomeSeq::Mapped { fwd, n_genome } => {
+                let n = *n_genome;
+                if i < n {
+                    fwd[i]
+                } else {
+                    let f = fwd[2 * n - 1 - i];
+                    if f < 4 { 3 - f } else { f }
+                }
+            }
+        }
+    }
+
+    /// Total sequence length (`2*n_genome` — forward + reverse complement).
+    #[inline]
+    pub fn len(&self) -> usize {
+        match self {
+            GenomeSeq::Owned(v) => v.len(),
+            GenomeSeq::Mapped { n_genome, .. } => 2 * n_genome,
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Bounds-checked [`base`](Self::base): the base at `i`, or `None` if out of
+    /// range.
+    #[inline]
+    pub fn get(&self, i: usize) -> Option<u8> {
+        if i < self.len() {
+            Some(self.base(i))
+        } else {
+            None
+        }
+    }
+
+    /// The contiguous byte buffer. For `Owned` this is the full
+    /// `[forward | RC]`; for `Mapped` it is the forward strand only — callers
+    /// that may touch the RC half must use [`base`](Self::base). Used at build
+    /// time (always `Owned`) for SA construction and the on-disk write.
+    pub fn as_slice(&self) -> &[u8] {
+        match self {
+            GenomeSeq::Owned(v) => v,
+            GenomeSeq::Mapped { fwd, .. } => fwd,
+        }
+    }
+}
+
+impl From<Vec<u8>> for GenomeSeq {
+    fn from(v: Vec<u8>) -> Self {
+        GenomeSeq::Owned(v)
+    }
+}
+
+// `memmap2::Mmap` is neither `Debug` nor `PartialEq`, so derive them by hand via
+// the byte view. `as_slice()` is the full buffer for `Owned` (the only variant
+// tests construct), so equality/printing behave like the old `Vec<u8>` field.
+impl std::fmt::Debug for GenomeSeq {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "GenomeSeq({} bytes)", self.len())
+    }
+}
+
+impl PartialEq for GenomeSeq {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_slice() == other.as_slice()
+    }
+}
+
 /// Packed genome with chromosome metadata.
 ///
 /// The genome sequence is stored as one byte per base:
@@ -19,8 +115,9 @@ const GENOME_SPACING_CHAR: u8 = 5;
 #[derive(Clone)]
 pub struct Genome {
     /// Forward genome (0..n_genome) + reverse complement (n_genome..2*n_genome).
-    /// Initialized to GENOME_SPACING_CHAR (5), then overwritten with actual bases.
-    pub sequence: Vec<u8>,
+    /// Owned at build time; a memory map of the forward strand (RC computed on
+    /// access) when loaded from disk. Access bases via [`GenomeSeq::base`].
+    pub sequence: GenomeSeq,
 
     /// Total length of the forward (padded) genome.
     pub n_genome: u64,
@@ -115,7 +212,7 @@ impl Genome {
         }
 
         Ok(Genome {
-            sequence,
+            sequence: sequence.into(),
             n_genome,
             n_genome_real: n_genome,
             n_chr_real,
@@ -141,7 +238,7 @@ impl Genome {
         let new_n = old_n + gsj.len() as u64;
 
         let mut new_seq = vec![GENOME_SPACING_CHAR; (new_n * 2) as usize];
-        new_seq[..old_n as usize].copy_from_slice(&self.sequence[..old_n as usize]);
+        new_seq[..old_n as usize].copy_from_slice(&self.sequence.as_slice()[..old_n as usize]);
         new_seq[old_n as usize..new_n as usize].copy_from_slice(gsj);
 
         // Rebuild RC over the extended forward range (STAR stores Gsj_RC
@@ -152,7 +249,7 @@ impl Genome {
             new_seq[2 * new_n as usize - 1 - i] = complement;
         }
 
-        self.sequence = new_seq;
+        self.sequence = new_seq.into();
         self.n_genome = new_n;
     }
 
@@ -165,7 +262,7 @@ impl Genome {
     /// The base value (0-3 for ACGT, 4 for N, 5 for padding), or None if out of bounds.
     pub fn get_base(&self, pos: u64) -> Option<u8> {
         if pos < self.sequence.len() as u64 {
-            Some(self.sequence[pos as usize])
+            Some(self.sequence.base(pos as usize))
         } else {
             None
         }
@@ -210,8 +307,11 @@ impl Genome {
 
         // Write Genome file (forward strand only, n_genome bytes)
         let genome_path = dir.join("Genome");
-        fs::write(&genome_path, &self.sequence[..self.n_genome as usize])
-            .map_err(|e| Error::io(e, &genome_path))?;
+        fs::write(
+            &genome_path,
+            &self.sequence.as_slice()[..self.n_genome as usize],
+        )
+        .map_err(|e| Error::io(e, &genome_path))?;
 
         // Write chrName.txt
         let chr_name_path = dir.join("chrName.txt");
@@ -443,19 +543,19 @@ mod tests {
         let n = genome.n_genome as usize;
 
         // Forward: A C G T N (then padding)
-        assert_eq!(genome.sequence[0], 0); // A
-        assert_eq!(genome.sequence[1], 1); // C
-        assert_eq!(genome.sequence[2], 2); // G
-        assert_eq!(genome.sequence[3], 3); // T
-        assert_eq!(genome.sequence[4], 4); // N
+        assert_eq!(genome.sequence.base(0), 0); // A
+        assert_eq!(genome.sequence.base(1), 1); // C
+        assert_eq!(genome.sequence.base(2), 2); // G
+        assert_eq!(genome.sequence.base(3), 3); // T
+        assert_eq!(genome.sequence.base(4), 4); // N
 
         // Reverse complement should be at positions [2n-1, 2n-2, 2n-3, 2n-4, 2n-5]
         // which maps to the reverse of [0,1,2,3,4]
-        assert_eq!(genome.sequence[2 * n - 1], 3); // T (complement of A at pos 0)
-        assert_eq!(genome.sequence[2 * n - 1 - 1], 2); // G (complement of C at pos 1)
-        assert_eq!(genome.sequence[2 * n - 1 - 2], 1); // C (complement of G at pos 2)
-        assert_eq!(genome.sequence[2 * n - 1 - 3], 0); // A (complement of T at pos 3)
-        assert_eq!(genome.sequence[2 * n - 1 - 4], 4); // N (complement of N at pos 4)
+        assert_eq!(genome.sequence.base(2 * n - 1), 3); // T (complement of A at pos 0)
+        assert_eq!(genome.sequence.base(2 * n - 1 - 1), 2); // G (complement of C at pos 1)
+        assert_eq!(genome.sequence.base(2 * n - 1 - 2), 1); // C (complement of G at pos 2)
+        assert_eq!(genome.sequence.base(2 * n - 1 - 3), 0); // A (complement of T at pos 3)
+        assert_eq!(genome.sequence.base(2 * n - 1 - 4), 4); // N (complement of N at pos 4)
     }
 
     #[test]
@@ -509,13 +609,13 @@ mod tests {
         assert_eq!(genome.n_chr_real, 1);
 
         // Forward is [real 0..8 | gsj 8..13].
-        assert_eq!(&genome.sequence[..4], &[0, 1, 2, 3]);
-        assert_eq!(&genome.sequence[8..13], gsj.as_slice());
+        assert_eq!(&genome.sequence.as_slice()[..4], &[0, 1, 2, 3]);
+        assert_eq!(&genome.sequence.as_slice()[8..13], gsj.as_slice());
 
         // RC over the extended forward range. sequence[2n-1-i] = complement(sequence[i]).
         let new_n = genome.n_genome as usize;
-        assert_eq!(genome.sequence[2 * new_n - 1 - 8], 3); // complement of A at fwd[8]=0
-        assert_eq!(genome.sequence[2 * new_n - 1 - 12], 5); // spacer stays 5
+        assert_eq!(genome.sequence.base(2 * new_n - 1 - 8), 3); // complement of A at fwd[8]=0
+        assert_eq!(genome.sequence.base(2 * new_n - 1 - 12), 5); // spacer stays 5
         assert_eq!(genome.sequence.len(), 2 * new_n);
     }
 
diff --git a/src/index/io.rs b/src/index/io.rs
index 18779f4..f9430da 100644
--- a/src/index/io.rs
+++ b/src/index/io.rs
@@ -1,5 +1,4 @@
 use std::fs::File;
-use std::io::Read;
 use std::path::Path;
 
 use byteorder::{LittleEndian, ReadBytesExt};
@@ -187,28 +186,29 @@ fn load_genome(genome_dir: &Path, _params: &Parameters) -> Result<Genome, Error>
     let n_genome_real = chr_start[n_chr_real];
     let n_genome = read_genome_file_size(genome_dir)?.unwrap_or(n_genome_real);
 
-    // Load Genome sequence file
+    // Memory-map the Genome sequence file (forward strand only, `n_genome`
+    // bytes). The reverse-complement half is computed on access by
+    // `GenomeSeq::base`, so the ~`n_genome`-byte RC buffer is never
+    // materialized and the forward bytes are reclaimable file-backed pages
+    // rather than an anonymous `Vec`. The genome is accessed by single-byte
+    // lookups during alignment, which `base` serves from the map.
     let genome_path = genome_dir.join("Genome");
-    let genome_data = std::fs::read(&genome_path).map_err(|e| Error::io(e, &genome_path))?;
+    let file = File::open(&genome_path).map_err(|e| Error::io(e, &genome_path))?;
+    // SAFETY: Genome is opened read-only and never mutated while loaded.
+    let mmap = unsafe { memmap2::Mmap::map(&file).map_err(|e| Error::io(e, &genome_path))? };
 
-    if genome_data.len() != n_genome as usize {
+    if mmap.len() != n_genome as usize {
         return Err(Error::Index(format!(
             "Genome file size mismatch: expected {} bytes, got {}",
             n_genome,
-            genome_data.len()
+            mmap.len()
         )));
     }
 
-    // Build full sequence buffer (forward + reverse complement)
-    let mut sequence = vec![5u8; (n_genome * 2) as usize];
-    sequence[..n_genome as usize].copy_from_slice(&genome_data);
-
-    // Build reverse complement
-    for i in 0..n_genome as usize {
-        let base = sequence[i];
-        let complement = if base < 4 { 3 - base } else { base };
-        sequence[2 * n_genome as usize - 1 - i] = complement;
-    }
+    let sequence = crate::genome::GenomeSeq::Mapped {
+        fwd: std::sync::Arc::new(mmap),
+        n_genome: n_genome as usize,
+    };
 
     Ok(Genome {
         sequence,
@@ -222,9 +222,29 @@ fn load_genome(genome_dir: &Path, _params: &Parameters) -> Result<Genome, Error>
 }
 
 /// Load suffix array from disk.
+///
+/// The `SA` file is **memory-mapped** rather than read into a `Vec`: it is the
+/// largest index component (≈21 GB for mouse) and is accessed by random binary
+/// search during alignment. mmap keeps it as reclaimable file-backed memory
+/// (demand-loaded, dropped — not swapped — under pressure) instead of an
+/// un-reclaimable anonymous allocation. `MADV_RANDOM` disables readahead, which
+/// would waste I/O on the random access pattern.
+/// Best-effort `MADV_RANDOM` on a read-only mmap. `madvise` (and `memmap2::Advice`)
+/// is Unix-only, so this is a no-op on platforms without it (e.g. Windows).
+#[cfg(unix)]
+fn advise_random(mmap: &memmap2::Mmap) {
+    let _ = mmap.advise(memmap2::Advice::Random); // best-effort; ignore if unsupported
+}
+#[cfg(not(unix))]
+fn advise_random(_mmap: &memmap2::Mmap) {}
+
 fn load_suffix_array(genome_dir: &Path, genome: &Genome) -> Result<SuffixArray, Error> {
     let sa_path = genome_dir.join("SA");
-    let sa_data = std::fs::read(&sa_path).map_err(|e| Error::io(e, &sa_path))?;
+    let file = File::open(&sa_path).map_err(|e| Error::io(e, &sa_path))?;
+    // SAFETY: the SA file is opened read-only and not mutated elsewhere while
+    // the index is loaded; the mapping is only ever read.
+    let mmap = unsafe { memmap2::Mmap::map(&file).map_err(|e| Error::io(e, &sa_path))? };
+    advise_random(&mmap);
 
     let gstrand_bit = SuffixArray::calculate_gstrand_bit(genome.n_genome);
     let word_length = gstrand_bit + 1;
@@ -236,7 +256,7 @@ fn load_suffix_array(genome_dir: &Path, genome: &Genome) -> Result<SuffixArray,
     // total_bits = (lengthByte - 8) * 8
     // length = (total_bits / wordLength) + 1
     // BUT we need ceiling division to account for partial entries
-    let length_byte = sa_data.len();
+    let length_byte = mmap.len();
     let length = if length_byte < 8 {
         0
     } else {
@@ -245,7 +265,7 @@ fn load_suffix_array(genome_dir: &Path, genome: &Genome) -> Result<SuffixArray,
         entries + 1
     };
 
-    let data = PackedArray::from_bytes(word_length, length, sa_data);
+    let data = PackedArray::from_mmap(word_length, length, mmap);
 
     Ok(SuffixArray {
         data,
@@ -255,6 +275,11 @@ fn load_suffix_array(genome_dir: &Path, genome: &Genome) -> Result<SuffixArray,
 }
 
 /// Load SA index from disk.
+///
+/// The small fixed header (`nbases` + the `genomeSAindexStart` array) is read
+/// normally; the packed-data region (≈1.8 GB for mouse) is **memory-mapped**
+/// from its byte offset for the same reason as the SA — reclaimable, demand-
+/// loaded file-backed memory instead of an anonymous `Vec`.
 fn load_sa_index(genome_dir: &Path, gstrand_bit: u32) -> Result<SaIndex, Error> {
     let sai_path = genome_dir.join("SAindex");
     let mut file = File::open(&sai_path).map_err(|e| Error::io(e, &sai_path))?;
@@ -273,15 +298,23 @@ fn load_sa_index(genome_dir: &Path, gstrand_bit: u32) -> Result<SaIndex, Error>
         genome_sa_index_start.push(val);
     }
 
-    // Read packed data
-    let mut packed_data = Vec::new();
-    file.read_to_end(&mut packed_data)
-        .map_err(|e| Error::io(e, &sai_path))?;
+    // Map the packed-data region: header is `nbases` (8B) + (nbases+1)×8B.
+    let header_len = 8 + 8 * (u64::from(nbases) + 1);
+    // SAFETY: SAindex is opened read-only and never mutated while loaded.
+    // memmap2 handles non-page-aligned offsets internally; the map runs from
+    // `header_len` to EOF and is only ever read.
+    let mmap = unsafe {
+        memmap2::MmapOptions::new()
+            .offset(header_len)
+            .map(&file)
+            .map_err(|e| Error::io(e, &sai_path))?
+    };
+    advise_random(&mmap);
 
     let word_length = gstrand_bit + 3;
     let num_indices = SaIndex::calculate_num_indices(nbases);
 
-    let data = PackedArray::from_bytes(word_length, num_indices as usize, packed_data);
+    let data = PackedArray::from_mmap(word_length, num_indices as usize, mmap);
 
     Ok(SaIndex {
         nbases,
diff --git a/src/index/packed_array.rs b/src/index/packed_array.rs
index 02d334e..ac925b8 100644
--- a/src/index/packed_array.rs
+++ b/src/index/packed_array.rs
@@ -1,3 +1,39 @@
+/// Backing byte storage for a [`PackedArray`].
+///
+/// `Owned` is a heap `Vec` (used while *building* an index — it is the only
+/// variant that supports [`PackedArray::write`]). `Mapped` is a read-only
+/// memory map of an on-disk `SA` / `SAindex` file (used at *load* time): its
+/// pages are file-backed, so they are demand-loaded and **reclaimable under
+/// memory pressure** (dropped, never swapped) rather than the un-reclaimable
+/// anonymous memory a `Vec` would occupy. `Arc<Mmap>` keeps `Clone` cheap
+/// (two-pass mode clones the whole `GenomeIndex`).
+#[derive(Clone)]
+enum PackedBytes {
+    Owned(Vec<u8>),
+    Mapped(std::sync::Arc<memmap2::Mmap>),
+}
+
+impl PackedBytes {
+    #[inline]
+    fn as_slice(&self) -> &[u8] {
+        match self {
+            PackedBytes::Owned(v) => v,
+            PackedBytes::Mapped(m) => m,
+        }
+    }
+
+    fn as_mut_slice(&mut self) -> &mut [u8] {
+        match self {
+            PackedBytes::Owned(v) => v,
+            PackedBytes::Mapped(_) => {
+                panic!(
+                    "PackedArray: cannot mutate a memory-mapped array (build into an Owned array)"
+                )
+            }
+        }
+    }
+}
+
 /// Variable-width bit-packed array matching STAR's PackedArray format.
 ///
 /// Stores integers with a specified bit width, packing them at bit-level
@@ -17,8 +53,8 @@ pub struct PackedArray {
     /// Number of elements
     length: usize,
 
-    /// Raw byte storage
-    data: Vec<u8>,
+    /// Raw byte storage (owned heap buffer or a read-only memory map).
+    data: PackedBytes,
 }
 
 impl PackedArray {
@@ -44,7 +80,7 @@ impl PackedArray {
             ((length - 1) as u64 * word_length as u64) / 8 + 8
         };
 
-        let data = vec![0u8; length_byte as usize];
+        let data = PackedBytes::Owned(vec![0u8; length_byte as usize]);
 
         Self {
             word_length,
@@ -70,24 +106,26 @@ impl PackedArray {
         let masked_value = (value & self.bit_rec_mask) << bit_shift;
         let mask = self.bit_rec_mask << bit_shift;
 
+        let data = self.data.as_mut_slice();
+
         // Read current 8-byte word, update bits, write back
         let mut word = u64::from_le_bytes([
-            self.data.get(byte_offset).copied().unwrap_or(0),
-            self.data.get(byte_offset + 1).copied().unwrap_or(0),
-            self.data.get(byte_offset + 2).copied().unwrap_or(0),
-            self.data.get(byte_offset + 3).copied().unwrap_or(0),
-            self.data.get(byte_offset + 4).copied().unwrap_or(0),
-            self.data.get(byte_offset + 5).copied().unwrap_or(0),
-            self.data.get(byte_offset + 6).copied().unwrap_or(0),
-            self.data.get(byte_offset + 7).copied().unwrap_or(0),
+            data.get(byte_offset).copied().unwrap_or(0),
+            data.get(byte_offset + 1).copied().unwrap_or(0),
+            data.get(byte_offset + 2).copied().unwrap_or(0),
+            data.get(byte_offset + 3).copied().unwrap_or(0),
+            data.get(byte_offset + 4).copied().unwrap_or(0),
+            data.get(byte_offset + 5).copied().unwrap_or(0),
+            data.get(byte_offset + 6).copied().unwrap_or(0),
+            data.get(byte_offset + 7).copied().unwrap_or(0),
         ]);
 
         word = (word & !mask) | masked_value;
 
         let bytes = word.to_le_bytes();
         for (i, &byte) in bytes.iter().enumerate() {
-            if byte_offset + i < self.data.len() {
-                self.data[byte_offset + i] = byte;
+            if byte_offset + i < data.len() {
+                data[byte_offset + i] = byte;
             }
         }
     }
@@ -106,22 +144,22 @@ impl PackedArray {
         let byte_offset = b / 8;
         let bit_shift = (b % 8) as u32;
 
-        let word = if byte_offset + 8 <= self.data.len() {
+        let data = self.data.as_slice();
+        let word = if byte_offset + 8 <= data.len() {
             // Fast path: read 8 bytes directly (no per-byte bounds checks)
-            // SAFETY: We just verified byte_offset + 8 <= data.len()
-            let bytes = &self.data[byte_offset..byte_offset + 8];
+            let bytes = &data[byte_offset..byte_offset + 8];
             u64::from_le_bytes(bytes.try_into().unwrap())
         } else {
             // Slow path: near end of array, read byte-by-byte with bounds checks
             u64::from_le_bytes([
-                self.data.get(byte_offset).copied().unwrap_or(0),
-                self.data.get(byte_offset + 1).copied().unwrap_or(0),
-                self.data.get(byte_offset + 2).copied().unwrap_or(0),
-                self.data.get(byte_offset + 3).copied().unwrap_or(0),
-                self.data.get(byte_offset + 4).copied().unwrap_or(0),
-                self.data.get(byte_offset + 5).copied().unwrap_or(0),
-                self.data.get(byte_offset + 6).copied().unwrap_or(0),
-                self.data.get(byte_offset + 7).copied().unwrap_or(0),
+                data.get(byte_offset).copied().unwrap_or(0),
+                data.get(byte_offset + 1).copied().unwrap_or(0),
+                data.get(byte_offset + 2).copied().unwrap_or(0),
+                data.get(byte_offset + 3).copied().unwrap_or(0),
+                data.get(byte_offset + 4).copied().unwrap_or(0),
+                data.get(byte_offset + 5).copied().unwrap_or(0),
+                data.get(byte_offset + 6).copied().unwrap_or(0),
+                data.get(byte_offset + 7).copied().unwrap_or(0),
             ])
         };
 
@@ -162,7 +200,7 @@ impl PackedArray {
 
     /// Get a reference to the raw byte data.
     pub fn data(&self) -> &[u8] {
-        &self.data
+        self.data.as_slice()
     }
 
     /// Create a PackedArray from raw byte data.
@@ -172,6 +210,23 @@ impl PackedArray {
     /// * `length` - Number of elements
     /// * `data` - Raw byte data
     pub fn from_bytes(word_length: u32, length: usize, data: Vec<u8>) -> Self {
+        Self::from_store(word_length, length, PackedBytes::Owned(data))
+    }
+
+    /// Create a read-only PackedArray backed by a memory map of an on-disk
+    /// `SA` / `SAindex` file. The mapped pages are demand-loaded and
+    /// reclaimable under memory pressure (unlike an owned `Vec`), so loading a
+    /// multi-GB suffix array does not pin that much anonymous RAM. `write` will
+    /// panic on the result — memory-mapped arrays are read-only.
+    pub fn from_mmap(word_length: u32, length: usize, mmap: memmap2::Mmap) -> Self {
+        Self::from_store(
+            word_length,
+            length,
+            PackedBytes::Mapped(std::sync::Arc::new(mmap)),
+        )
+    }
+
+    fn from_store(word_length: u32, length: usize, data: PackedBytes) -> Self {
         assert!(word_length > 0 && word_length <= 64);
 
         let word_comp_length = 64 - word_length;
diff --git a/src/index/sa_build.rs b/src/index/sa_build.rs
index 28555a8..09f0434 100644
--- a/src/index/sa_build.rs
+++ b/src/index/sa_build.rs
@@ -212,7 +212,7 @@ pub(crate) fn build_impl(genome: &Genome, force_sentinel: bool) -> Result<Suffix
     //     we count here.
     let n_genome = genome.n_genome as usize;
     let n2 = 2 * n_genome;
-    let n_sa_kept: usize = genome.sequence[..n2.min(genome.sequence.len())]
+    let n_sa_kept: usize = genome.sequence.as_slice()[..n2.min(genome.sequence.len())]
         .par_iter()
         .filter(|&&b| b < 4)
         .count();
@@ -300,7 +300,7 @@ where
     // (1) Count spacer runs so we can pick the narrowest alphabet
     //     width that fits. The build itself is a separate pass through
     //     the genome that emits the typed `Vec<S>` for the chosen S.
-    let n_seg = count_spacer_runs(&genome.sequence[..n2]);
+    let n_seg = count_spacer_runs(&genome.sequence.as_slice()[..n2]);
     let alphabet_max = SENTINEL_BASE as u32 + n_seg;
     log::info!("sa_build: counted {n_seg} per-segment sentinels (alphabet max = {alphabet_max})");
 
@@ -317,7 +317,10 @@ where
         sparse_d, 1,
         "non-default sparse_d isn't wired through this path"
     );
-    let n_sa_kept: usize = genome.sequence[..n2].par_iter().filter(|&&b| b < 4).count();
+    let n_sa_kept: usize = genome.sequence.as_slice()[..n2]
+        .par_iter()
+        .filter(|&&b| b < 4)
+        .count();
     log::info!("sa_build: {n_sa_kept} entries after ACGT + sparse-d={sparse_d} filter");
 
     let n_genome_u64 = n_genome as u64;
@@ -364,15 +367,27 @@ where
             "sa_build: RUSTAR_USE_SENTINEL_TRANSFORM=1, alphabet fits u8 — \
              using sentinel-transform arm"
         );
-        let t_prime: Vec<u8> = build_sentinel_transformed_text(&genome.sequence[..n2], n_seg);
-        dispatch_caps_sa(t_prime, &genome.sequence[..n2], temp_dir, &mut pack_one)?;
+        let t_prime: Vec<u8> =
+            build_sentinel_transformed_text(&genome.sequence.as_slice()[..n2], n_seg);
+        dispatch_caps_sa(
+            t_prime,
+            &genome.sequence.as_slice()[..n2],
+            temp_dir,
+            &mut pack_one,
+        )?;
     } else if force_sentinel && alphabet_max <= <u16 as SaSymbol>::MAX_REPRESENTABLE {
         log::info!(
             "sa_build: RUSTAR_USE_SENTINEL_TRANSFORM=1, alphabet fits u16 — \
              using sentinel-transform arm"
         );
-        let t_prime: Vec<u16> = build_sentinel_transformed_text(&genome.sequence[..n2], n_seg);
-        dispatch_caps_sa(t_prime, &genome.sequence[..n2], temp_dir, &mut pack_one)?;
+        let t_prime: Vec<u16> =
+            build_sentinel_transformed_text(&genome.sequence.as_slice()[..n2], n_seg);
+        dispatch_caps_sa(
+            t_prime,
+            &genome.sequence.as_slice()[..n2],
+            temp_dir,
+            &mut pack_one,
+        )?;
     } else {
         if force_sentinel {
             log::warn!(
@@ -387,7 +402,7 @@ where
                  alphabet_max={alphabet_max}, {n_seg} segments)"
             );
         }
-        dispatch_caps_sa_segmented(&genome.sequence[..n2], temp_dir, &mut pack_one)?;
+        dispatch_caps_sa_segmented(&genome.sequence.as_slice()[..n2], temp_dir, &mut pack_one)?;
     }
 
     debug_assert_eq!(
diff --git a/src/index/sa_index.rs b/src/index/sa_index.rs
index e4df398..9761b10 100644
--- a/src/index/sa_index.rs
+++ b/src/index/sa_index.rs
@@ -167,7 +167,7 @@ impl SaIndex {
             (1u64 << sa_word_length) - 1
         };
         let n_genome = genome.n_genome as usize;
-        let genome_seq: &[u8] = &genome.sequence;
+        let genome_seq: &[u8] = genome.sequence.as_slice();
 
         // Chunk size: 1 M entries per worker. STAR's algorithm
         // visits at most ~chunk_size / isa_step boundaries per chunk
@@ -495,7 +495,7 @@ impl SaIndex {
                 if genome_pos + (k as usize) > genome.sequence.len() {
                     break;
                 }
-                let next_base = genome.sequence[genome_pos + (k - 1) as usize];
+                let next_base = genome.sequence.base(genome_pos + (k - 1) as usize);
                 if next_base >= 4 {
                     break;
                 }
@@ -565,7 +565,7 @@ impl SaIndexBuilder<'_> {
             if genome_pos + (k as usize) > self.genome.sequence.len() {
                 break;
             }
-            let next_base = self.genome.sequence[genome_pos + (k - 1) as usize];
+            let next_base = self.genome.sequence.base(genome_pos + (k - 1) as usize);
             if next_base >= 4 {
                 break;
             }
diff --git a/src/index/suffix_array.rs b/src/index/suffix_array.rs
index 570a7dd..197844d 100644
--- a/src/index/suffix_array.rs
+++ b/src/index/suffix_array.rs
@@ -80,7 +80,7 @@ fn compare_suffixes(
     use std::cmp::Ordering;
 
     let n_genome = genome.n_genome as usize;
-    let sequence = &genome.sequence;
+    let sequence = genome.sequence.as_slice();
 
     // Adjust positions for reverse complement
     let start_a = if reverse_a { pos_a + n_genome } else { pos_a };
@@ -184,12 +184,12 @@ mod tests {
 
         let mut suffixes: Vec<(u64, bool)> = Vec::new();
         for i in 0..n_genome {
-            if genome.sequence[i] < 4 {
+            if genome.sequence.base(i) < 4 {
                 suffixes.push((i as u64, false));
             }
         }
         for i in n_genome..(2 * n_genome) {
-            if genome.sequence[i] < 4 {
+            if genome.sequence.base(i) < 4 {
                 suffixes.push(((i - n_genome) as u64, true));
             }
         }
@@ -272,7 +272,7 @@ mod tests {
         // The lexicographically first suffix should start with the smallest base
         let first_entry = sa.get(0);
         let (first_pos, _) = sa.decode(first_entry);
-        let first_base = genome.sequence[first_pos as usize];
+        let first_base = genome.sequence.base(first_pos as usize);
 
         // In "AAB", the first suffix lexicographically is "A" (from pos 0 or 1)
         assert!(first_base == 0); // A
diff --git a/src/io/bam.rs b/src/io/bam.rs
index e9c9183..4daba2d 100644
--- a/src/io/bam.rs
+++ b/src/io/bam.rs
@@ -465,7 +465,7 @@ mod tests {
 
     fn create_test_genome() -> Genome {
         Genome {
-            sequence: vec![0, 1, 2, 3, 0, 1, 2, 3], // ACGTACGT
+            sequence: vec![0, 1, 2, 3, 0, 1, 2, 3].into(), // ACGTACGT
             n_genome: 8,
             n_genome_real: 8,
             n_chr_real: 1,
diff --git a/src/io/sam.rs b/src/io/sam.rs
index 29716ea..30276be 100644
--- a/src/io/sam.rs
+++ b/src/io/sam.rs
@@ -1404,7 +1404,7 @@ mod tests {
 
     fn make_test_genome() -> Genome {
         Genome {
-            sequence: vec![0, 1, 2, 3, 0, 1, 2, 3], // ACGTACGT
+            sequence: vec![0, 1, 2, 3, 0, 1, 2, 3].into(), // ACGTACGT
             n_genome: 8,
             n_genome_real: 8,
             n_chr_real: 1,
diff --git a/src/junction/gtf.rs b/src/junction/gtf.rs
index 3e3d3be..5a07ded 100644
--- a/src/junction/gtf.rs
+++ b/src/junction/gtf.rs
@@ -310,7 +310,7 @@ mod tests {
     fn test_extract_junctions_single_transcript() {
         // Create a simple genome
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -362,7 +362,7 @@ mod tests {
     #[test]
     fn test_extract_junctions_multiple_transcripts() {
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -438,7 +438,7 @@ mod tests {
     #[test]
     fn test_extract_junctions_single_exon_transcript() {
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -470,7 +470,7 @@ mod tests {
     #[test]
     fn test_extract_junctions_unknown_chromosome() {
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -517,7 +517,7 @@ mod tests {
     #[test]
     fn test_junction_coordinate_calculation() {
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -601,7 +601,7 @@ mod tests {
     #[test]
     fn test_extract_junctions_configured_custom_transcript_tag() {
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
diff --git a/src/junction/mod.rs b/src/junction/mod.rs
index a260a2d..a3715f2 100644
--- a/src/junction/mod.rs
+++ b/src/junction/mod.rs
@@ -407,7 +407,7 @@ mod tests {
 
         // Two-chromosome toy genome so chr_start[1] != 0.
         let genome = Genome {
-            sequence: vec![0; 4000],
+            sequence: vec![0; 4000].into(),
             n_genome: 2000,
             n_genome_real: 2000,
             n_chr_real: 2,
diff --git a/src/junction/sj_output.rs b/src/junction/sj_output.rs
index 2b66b3a..1e433ec 100644
--- a/src/junction/sj_output.rs
+++ b/src/junction/sj_output.rs
@@ -228,10 +228,47 @@ impl SpliceJunctionStats {
     ) -> Result<(), Error> {
         let file = File::create(output_path).map_err(|e| Error::io(e, output_path))?;
         let mut writer = BufWriter::new(file);
+        let written = self.write_sj_lines(&mut writer, genome, params)?;
+        writer.flush().map_err(|e| Error::io(e, output_path))?;
+        let filtered = self.junctions.len() as u32 - written;
+        log::info!(
+            "Wrote {} junctions to {} ({} filtered by outSJfilter*)",
+            written,
+            output_path.display(),
+            filtered,
+        );
+        Ok(())
+    }
 
+    /// Surviving junctions sorted by (chr, intron_start, intron_end) — the
+    /// canonical `SJ.out.tab` order, which is also the row order of the `SJ`
+    /// solo-feature matrix. Returns the (intron_start, intron_end) absolute-coord
+    /// keys so the SJ recorder can be mapped to matrix rows.
+    pub(crate) fn sj_feature_order(&self, params: &Parameters) -> Vec<(u64, u64)> {
         let surviving = self.compute_surviving_junctions(params);
+        let mut keys: Vec<(usize, u64, u64)> = self
+            .junctions
+            .iter()
+            .filter(|e| surviving.contains(e.key()))
+            .map(|e| {
+                let k = e.key();
+                (k.chr_idx, k.intron_start, k.intron_end)
+            })
+            .collect();
+        keys.sort_unstable();
+        keys.into_iter().map(|(_, s, e)| (s, e)).collect()
+    }
 
-        // Collect and sort surviving junctions for deterministic output
+    /// Write the 9-column `SJ.out.tab` lines (sorted) to `writer`; returns the
+    /// number written. Shared by `write_output` and the SJ feature's
+    /// `features.tsv`, so both stay in the same order as the SJ matrix rows.
+    pub(crate) fn write_sj_lines(
+        &self,
+        writer: &mut dyn std::io::Write,
+        genome: &Genome,
+        params: &Parameters,
+    ) -> Result<u32, Error> {
+        let surviving = self.compute_surviving_junctions(params);
         let mut output_junctions: Vec<_> = self
             .junctions
             .iter()
@@ -262,11 +299,9 @@ impl SpliceJunctionStats {
                 .chr_name
                 .get(key.chr_idx)
                 .ok_or_else(|| Error::Index("Invalid chromosome index in junction".to_string()))?;
-
             let chr_start_pos = genome.chr_start[key.chr_idx];
             let chr_pos_start = key.intron_start - chr_start_pos + 1;
             let chr_pos_end = key.intron_end - chr_start_pos + 1;
-
             writeln!(
                 writer,
                 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
@@ -280,21 +315,10 @@ impl SpliceJunctionStats {
                 multi,
                 max_overhang
             )
-            .map_err(|e| Error::io(e, output_path))?;
+            .map_err(|e| Error::Index(format!("SJ write: {e}")))?;
             written += 1;
         }
-
-        writer.flush().map_err(|e| Error::io(e, output_path))?;
-
-        let filtered = self.junctions.len() as u32 - written;
-        log::info!(
-            "Wrote {} junctions to {} ({} filtered by outSJfilter*)",
-            written,
-            output_path.display(),
-            filtered,
-        );
-
-        Ok(())
+        Ok(written)
     }
 
     /// Get the number of unique junctions tracked
@@ -523,7 +547,7 @@ mod tests {
         stats.record_junction(0, 300, 400, 2, SpliceMotif::GcAg, false, 15, true);
 
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -584,7 +608,7 @@ mod tests {
         stats.record_junction(0, 300, 400, 1, SpliceMotif::GtAg, true, 20, false);
 
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -619,7 +643,7 @@ mod tests {
         stats.record_junction(0, 100, 200, 1, SpliceMotif::NonCanonical, true, 2, true);
 
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -697,7 +721,7 @@ mod tests {
         }
 
         let genome = Genome {
-            sequence: vec![0; 1000],
+            sequence: vec![0; 1000].into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
diff --git a/src/junction/sjdb_insert.rs b/src/junction/sjdb_insert.rs
index 85fa625..7014418 100644
--- a/src/junction/sjdb_insert.rs
+++ b/src/junction/sjdb_insert.rs
@@ -222,7 +222,7 @@ const GSJ_SPACING: u8 = 5;
 ///
 /// Stops at genome bounds, on any N-base (code ≥ 4), or at the 255 cap.
 pub fn compute_shifts(genome: &Genome, s: u64, e: u64, n_genome_real: u64) -> (u8, u8) {
-    let forward = &genome.sequence[..n_genome_real as usize];
+    let forward = &genome.sequence.as_slice()[..n_genome_real as usize];
     let si = s as usize;
     let ei = e as usize;
 
@@ -448,7 +448,7 @@ pub fn build_gsj(
 ) -> Result<Vec<u8>, Error> {
     let overhang = sjdb_overhang as usize;
     let sjdb_length = 2 * overhang + 1;
-    let forward = &genome.sequence[..n_genome_real as usize];
+    let forward = &genome.sequence.as_slice()[..n_genome_real as usize];
     let mut gsj = vec![GSJ_SPACING; junctions.len() * sjdb_length];
 
     for (i, pj) in junctions.iter().enumerate() {
@@ -569,7 +569,7 @@ mod tests {
         let mut seq = forward;
         seq.extend(std::iter::repeat_n(5u8, n));
         Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: n as u64,
             n_genome_real: n as u64,
             n_chr_real: 1,
@@ -974,7 +974,7 @@ mod tests {
         let mut seq = vec![5u8; 4000];
         seq[..2000].copy_from_slice(&vec![0u8; 2000]);
         let genome = Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: 2000,
             n_genome_real: 2000,
             n_chr_real: 2,
@@ -1113,7 +1113,7 @@ mod tests {
         let mut seq = vec![5u8; 4000];
         seq[..2000].copy_from_slice(&vec![0u8; 2000]);
         let genome = Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: 2000,
             n_genome_real: 2000,
             n_chr_real: 1,
diff --git a/src/lib.rs b/src/lib.rs
index c9a3aa9..a916f72 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -33,6 +33,7 @@ pub mod io;
 pub mod junction;
 pub mod mapq;
 pub mod quant;
+pub mod solo;
 pub mod stats;
 
 use log::info;
@@ -217,8 +218,8 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> {
         info!("Using single-threaded mode");
     }
 
-    // Validate read files
-    if params.read_files_in.is_empty() {
+    // Validate read files (SmartSeq supplies reads via --readFilesManifest).
+    if params.read_files_in.is_empty() && params.solo_type != params::SoloType::SmartSeq {
         anyhow::bail!("No read files specified (--readFilesIn)");
     }
 
@@ -278,17 +279,60 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> {
             None
         };
 
+    // SmartSeq has no barcodes/UMIs — a dedicated manifest-driven path.
+    if params.solo_type == params::SoloType::SmartSeq {
+        let stats = run_smartseq(&index, &params)?;
+        let log_path = params.output_path("Log.final.out");
+        if let Some(parent) = log_path.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+        stats.write_log_final(
+            &log_path,
+            time_start,
+            chrono::Local::now(),
+            chrono::Local::now(),
+        )?;
+        info!("Alignment complete!");
+        return Ok(());
+    }
+
+    // Build the STARsolo context (whitelist + gene model) if a droplet solo run.
+    let solo_ctx: Option<std::sync::Arc<crate::solo::SoloContext>> = if params.solo_enabled() {
+        info!(
+            "STARsolo: soloType={} — building barcode + gene context",
+            params.solo_type
+        );
+        Some(std::sync::Arc::new(crate::solo::SoloContext::build(
+            &params,
+            &index.genome,
+        )?))
+    } else {
+        None
+    };
+
     let time_map_start = chrono::Local::now();
 
     // 2. Dispatch based on two-pass mode
     let stats = match params.twopass_mode {
         TwopassMode::None => {
             info!("Running single-pass alignment");
-            run_single_pass(&index, &params, quant_ctx.as_ref(), tr_idx.as_ref())?
+            run_single_pass(
+                &index,
+                &params,
+                quant_ctx.as_ref(),
+                tr_idx.as_ref(),
+                solo_ctx.as_ref(),
+            )?
         }
         TwopassMode::Basic => {
             info!("Running two-pass alignment mode");
-            run_two_pass(&index, &params, quant_ctx.as_ref(), tr_idx.as_ref())?
+            run_two_pass(
+                &index,
+                &params,
+                quant_ctx.as_ref(),
+                tr_idx.as_ref(),
+                solo_ctx.as_ref(),
+            )?
         }
     };
 
@@ -329,12 +373,217 @@ fn align_reads(params: &Parameters) -> anyhow::Result<()> {
     Ok(())
 }
 
+/// Log STARsolo barcode/record stats and write the per-cell matrices (raw +
+/// filtered), `Summary.csv`, and the SJ feature matrix. Called from the solo
+/// branch of `run_single_pass`, where `sj_stats` is live.
+fn write_solo_output(
+    sctx: &std::sync::Arc<crate::solo::SoloContext>,
+    params: &Parameters,
+    stats: &std::sync::Arc<crate::stats::AlignmentStats>,
+    sj_stats: &std::sync::Arc<crate::junction::SpliceJunctionStats>,
+    index: &std::sync::Arc<crate::index::GenomeIndex>,
+) -> anyhow::Result<()> {
+    use std::sync::atomic::Ordering;
+    let s = &sctx.stats;
+    info!(
+        "STARsolo barcode stats: exact={} 1MM={} multiMM={} noMatch={} N-in-CB={} multReject={} N-in-UMI={} UMIhomopolymer={}",
+        s.yes_exact.load(Ordering::Relaxed),
+        s.yes_one_mm.load(Ordering::Relaxed),
+        s.yes_mult_mm.load(Ordering::Relaxed),
+        s.no_match.load(Ordering::Relaxed),
+        s.n_in_cb.load(Ordering::Relaxed),
+        s.mult_rejected.load(Ordering::Relaxed),
+        s.n_in_umi.load(Ordering::Relaxed),
+        s.umi_homopolymer.load(Ordering::Relaxed),
+    );
+    for (feature, recorder) in sctx.features.iter().zip(&sctx.recorders) {
+        info!(
+            "STARsolo {}: collected {} resolved (CB,UMI,gene) records ({} deferred 1MM_multi)",
+            feature.dir_name(),
+            recorder.n_records(),
+            recorder.n_multi_records(),
+        );
+    }
+    crate::solo::write_gene_matrix(sctx, params, stats, Some(&**sj_stats), &index.genome)?;
+    Ok(())
+}
+
+/// `--soloType SmartSeq`: align each manifest cell's reads and count reads per
+/// gene (no barcodes, no UMIs). Writes `Solo.out/Gene/raw/` (genes × cells) and
+/// returns the alignment stats.
+fn run_smartseq(
+    index: &std::sync::Arc<crate::index::GenomeIndex>,
+    params: &Parameters,
+) -> anyhow::Result<std::sync::Arc<crate::stats::AlignmentStats>> {
+    use crate::align::read_align::{PairedAlignmentResult, align_paired_read, align_read};
+    use crate::solo::{GeneAssignment, SoloStrand, classify_read};
+    use rayon::prelude::*;
+    use std::sync::Arc;
+
+    let manifest = params
+        .read_files_manifest
+        .as_ref()
+        .ok_or_else(|| anyhow::anyhow!("--soloType SmartSeq requires --readFilesManifest"))?;
+    let cells = crate::solo::smartseq::parse_manifest(manifest)?;
+    info!(
+        "STARsolo SmartSeq: {} cells from {}",
+        cells.len(),
+        manifest.display()
+    );
+
+    let gtf = params.sjdb_gtf_file.as_ref().ok_or_else(|| {
+        anyhow::anyhow!("--soloType SmartSeq Gene counting requires --sjdbGTFfile")
+    })?;
+    let exons = crate::junction::gtf::parse_gtf_configured(
+        gtf,
+        &params.sjdb_gtf_feature_exon,
+        &params.sjdb_gtf_chr_prefix,
+    )?;
+    let gene_ann = crate::quant::GeneAnnotation::from_gtf_exons_configured(
+        &exons,
+        &index.genome,
+        &params.sjdb_gtf_tag_exon_parent_gene,
+    );
+    info!(
+        "STARsolo SmartSeq: {} genes from {}",
+        gene_ann.n_genes(),
+        gtf.display()
+    );
+    let strand: SoloStrand = params.solo_strand.parse().unwrap_or_default();
+    let max_multimaps = params.out_filter_multimap_nmax as usize;
+
+    let stats = Arc::new(crate::stats::AlignmentStats::new());
+    let cell_ids: Vec<String> = cells.iter().map(|c| c.cell_id.clone()).collect();
+    let counts = crate::solo::smartseq::SmartSeqCounts::new(cell_ids, gene_ann.gene_ids.len());
+
+    // Assign a (possibly multi-locus) read/fragment to a gene and count it.
+    let assign_count = |ci: usize, transcripts: &[crate::align::transcript::Transcript]| {
+        if let GeneAssignment::Gene(g) =
+            classify_read(transcripts, &gene_ann, strand, true, false, false).gene
+        {
+            counts.add(ci, g);
+        }
+    };
+    let cmd = params.read_files_command.as_deref();
+
+    for (ci, cell) in cells.iter().enumerate() {
+        match &cell.read2 {
+            // Single-end: count reads.
+            None => {
+                let mut reader = crate::io::fastq::FastqReader::open(&cell.read1, cmd)?;
+                loop {
+                    let batch = reader.read_batch(10_000)?;
+                    if batch.is_empty() {
+                        break;
+                    }
+                    batch.par_iter().for_each(|read| {
+                        stats.record_read_bases(read.sequence.len() as u64);
+                        let Ok((transcripts, _chim, n_for_mapq, reason)) =
+                            align_read(&read.sequence, &read.name, index, params)
+                        else {
+                            return;
+                        };
+                        let n = if transcripts.is_empty() && n_for_mapq > 0 {
+                            n_for_mapq
+                        } else {
+                            transcripts.len()
+                        };
+                        stats.record_alignment(n, max_multimaps);
+                        if transcripts.is_empty() {
+                            stats.record_unmapped_reason(
+                                reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                            );
+                        } else if transcripts.len() == 1 {
+                            stats.record_transcript_stats(&transcripts[0]);
+                        }
+                        assign_count(ci, &transcripts);
+                    });
+                }
+            }
+            // Paired-end: align both mates as a fragment, count the fragment once
+            // (gene from the union of both mates' overlaps).
+            Some(r2) => {
+                let mut reader = crate::io::fastq::PairedFastqReader::open(&cell.read1, r2, cmd)?;
+                loop {
+                    let mut batch = Vec::with_capacity(10_000);
+                    while batch.len() < 10_000 {
+                        match reader.next_paired()? {
+                            Some(p) => batch.push(p),
+                            None => break,
+                        }
+                    }
+                    if batch.is_empty() {
+                        break;
+                    }
+                    batch.par_iter().for_each(|pr| {
+                        stats.record_read_bases(
+                            (pr.mate1.sequence.len() + pr.mate2.sequence.len()) as u64,
+                        );
+                        let Ok((results, _chim, n_for_mapq, reason)) = align_paired_read(
+                            &pr.mate1.sequence,
+                            &pr.mate2.sequence,
+                            &pr.name,
+                            index,
+                            params,
+                        ) else {
+                            return;
+                        };
+                        let n_pairs = results.len();
+                        let mut trs = Vec::with_capacity(n_pairs * 2);
+                        for r in results {
+                            match r {
+                                PairedAlignmentResult::BothMapped(pa) => {
+                                    trs.push(pa.mate1_transcript);
+                                    trs.push(pa.mate2_transcript);
+                                }
+                                PairedAlignmentResult::HalfMapped {
+                                    mapped_transcript, ..
+                                } => trs.push(mapped_transcript),
+                            }
+                        }
+                        let n = if trs.is_empty() && n_for_mapq > 0 {
+                            n_for_mapq
+                        } else {
+                            n_pairs
+                        };
+                        stats.record_alignment(n, max_multimaps);
+                        if trs.is_empty() {
+                            stats.record_unmapped_reason(
+                                reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                            );
+                        }
+                        assign_count(ci, &trs);
+                    });
+                }
+            }
+        }
+    }
+
+    let solo_dir = params
+        .solo_out_file_names
+        .first()
+        .cloned()
+        .unwrap_or_else(|| "Solo.out/".to_string());
+    let raw_dir = params.output_path(&format!("{solo_dir}Gene/raw/"));
+    let gzip = matches!(params.solo_out_gzip.as_str(), "yes" | "Yes" | "true");
+    let nnz = counts.write_matrix(&raw_dir, &gene_ann.gene_ids, gzip)?;
+    info!(
+        "STARsolo SmartSeq: wrote Gene/raw matrix ({} genes × {} cells, {} entries)",
+        gene_ann.n_genes(),
+        cells.len(),
+        nnz,
+    );
+    stats.print_summary();
+    Ok(stats)
+}
+
 /// Run single-pass alignment (original logic)
 fn run_single_pass(
     index: &std::sync::Arc<crate::index::GenomeIndex>,
     params: &Parameters,
     quant_ctx: Option<&std::sync::Arc<crate::quant::QuantContext>>,
     tr_idx: Option<&std::sync::Arc<crate::quant::transcriptome::TranscriptomeIndex>>,
+    solo_ctx: Option<&std::sync::Arc<crate::solo::SoloContext>>,
 ) -> anyhow::Result<std::sync::Arc<crate::stats::AlignmentStats>> {
     use crate::io::bam::{BamWriter, SortedBamWriter};
     use crate::io::sam::SamWriter;
@@ -365,7 +614,7 @@ fn run_single_pass(
     use crate::io::fastq::UnmappedFastqWriter;
     use crate::params::OutReadsUnmapped;
 
-    let is_paired = params.read_files_in.len() == 2;
+    let is_paired = params.read_files_in.len() == 2 && !params.solo_enabled();
     let mut unmapped_w1: Option<UnmappedFastqWriter> =
         if params.out_reads_unmapped == OutReadsUnmapped::Fastx {
             let path = params.output_path("Unmapped.out.mate1");
@@ -442,13 +691,37 @@ fn run_single_pass(
                 }
             }
             OutSamFormat::None => {
-                anyhow::bail!("Output format 'None' not yet implemented");
+                info!("--outSAMtype None: skipping alignment output (count/quant only)");
+                Box::new(NullWriter)
             }
         },
     };
 
     // Align reads through the boxed writer.
-    match params.read_files_in.len() {
+    //
+    // Solo runs supply two `--readFilesIn` files (cDNA read + barcode read) but
+    // are single-end *alignment* runs: only the cDNA read (file 0) is aligned.
+    // The dedicated solo loop reads the barcode read in lockstep, quantifies
+    // per cell, and otherwise emits the cDNA alignments like the SE path.
+    if let Some(sctx) = solo_ctx {
+        align_reads_solo(params, index, writer.as_mut(), &stats, &sj_stats, sctx)?;
+        writer.finish()?;
+        if let Some(ref mut w) = tr_writer {
+            w.finish()?;
+        }
+        let sj_output_path = params.output_path("SJ.out.tab");
+        if !sj_stats.is_empty() {
+            sj_stats.write_output(&sj_output_path, &index.genome, params)?;
+        }
+        // Per-cell count matrices (raw + filtered), Summary.csv, and the SJ
+        // feature matrix — written here where sj_stats is available.
+        write_solo_output(sctx, params, &stats, &sj_stats, index)?;
+        stats.print_summary();
+        return Ok(stats);
+    }
+
+    let n_align_files = params.read_files_in.len();
+    match n_align_files {
         1 => align_reads_single_end(
             params,
             index,
@@ -504,6 +777,7 @@ fn run_two_pass(
     params: &Parameters,
     quant_ctx: Option<&std::sync::Arc<crate::quant::QuantContext>>,
     tr_idx: Option<&std::sync::Arc<crate::quant::transcriptome::TranscriptomeIndex>>,
+    solo_ctx: Option<&std::sync::Arc<crate::solo::SoloContext>>,
 ) -> anyhow::Result<std::sync::Arc<crate::stats::AlignmentStats>> {
     use std::sync::Arc;
 
@@ -534,7 +808,7 @@ fn run_two_pass(
 
     // PASS 2: Re-alignment with merged DB (quant counts happen here)
     info!("Two-pass mode: Pass 2 - Re-alignment");
-    let stats = run_single_pass(&Arc::new(merged_index), params, quant_ctx, tr_idx)?;
+    let stats = run_single_pass(&Arc::new(merged_index), params, quant_ctx, tr_idx, solo_ctx)?;
 
     Ok(stats)
 }
@@ -567,8 +841,14 @@ fn run_pass1(
     // Create NullWriter (discard SAM/BAM output in pass 1)
     let mut null_writer = NullWriter;
 
-    // Align reads (single-end or paired-end); no quant counting in pass 1
-    match params.read_files_in.len() {
+    // Align reads (single-end or paired-end); no quant counting in pass 1.
+    // Solo runs align only the cDNA read (file 0) — route to the SE path.
+    let n_align_files = if params.solo_enabled() {
+        1
+    } else {
+        params.read_files_in.len()
+    };
+    match n_align_files {
         1 => align_reads_single_end(
             &params_pass1,
             index,
@@ -937,7 +1217,9 @@ fn align_reads_single_end<W: AlignmentWriter + ?Sized>(
     let clip5p = params.clip5p_nbases as usize;
     let clip3p = params.clip3p_nbases as usize;
     let max_multimaps = params.out_filter_multimap_nmax as usize;
-    let output_unmapped = params.out_sam_unmapped != params::OutSamUnmapped::None;
+    // `--outSAMtype None` (e.g. quant-only) skips building SAM records.
+    let emit_sam = params.emits_alignments();
+    let output_unmapped = emit_sam && params.out_sam_unmapped != params::OutSamUnmapped::None;
     let write_unmapped_fastq = params.out_reads_unmapped == params::OutReadsUnmapped::Fastx;
     let by_sjout = params.out_filter_type == OutFilterType::BySJout;
 
@@ -1083,36 +1365,39 @@ fn align_reads_single_end<W: AlignmentWriter + ?Sized>(
                         Vec::new()
                     };
 
-                // Build SAM records (no I/O, just construction)
+                // Build SAM records (no I/O, just construction).
+                // Skipped entirely under `--outSAMtype None`.
                 let is_unmapped_se = transcripts.is_empty();
-                if is_unmapped_se {
-                    // Unmapped
-                    if output_unmapped {
-                        let record = SamWriter::build_unmapped_record(
+                if emit_sam {
+                    if is_unmapped_se {
+                        // Unmapped
+                        if output_unmapped {
+                            let record = SamWriter::build_unmapped_record(
+                                &read.name,
+                                &clipped_seq,
+                                &clipped_qual,
+                                params,
+                                unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                            )?;
+                            buffer.push(record);
+                        }
+                    } else if transcripts.len() <= max_multimaps {
+                        // Mapped (within multimap limit)
+                        let records = SamWriter::build_alignment_records(
                             &read.name,
                             &clipped_seq,
                             &clipped_qual,
+                            &transcripts,
+                            &index.genome,
                             params,
-                            unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                            n_for_mapq,
                         )?;
-                        buffer.push(record);
-                    }
-                } else if transcripts.len() <= max_multimaps {
-                    // Mapped (within multimap limit)
-                    let records = SamWriter::build_alignment_records(
-                        &read.name,
-                        &clipped_seq,
-                        &clipped_qual,
-                        &transcripts,
-                        &index.genome,
-                        params,
-                        n_for_mapq,
-                    )?;
-                    for record in records {
-                        buffer.push(record);
+                        for record in records {
+                            buffer.push(record);
+                        }
                     }
+                    // else: too many loci, skip output
                 }
-                // else: too many loci, skip output
 
                 // Transcriptome SAM projection for --quantMode TranscriptomeSAM.
                 let transcriptome_records: Vec<noodles::sam::alignment::record_buf::RecordBuf> =
@@ -1307,6 +1592,242 @@ fn align_reads_single_end<W: AlignmentWriter + ?Sized>(
     Ok(())
 }
 
+/// Align a STARsolo single-cell run: the cDNA read (file 0) is aligned exactly
+/// like the SE path, while the barcode read (file 1) is read in lockstep and
+/// quantified per cell. Mapped cDNA alignments are written to the SAM/BAM output
+/// just like a normal SE run; the per-cell (CB, UMI, gene) records are collected
+/// into `solo_ctx.recorder` for the matrix output that follows in Phase 14.4.
+///
+/// Solo runs are single-pass and (for now) do not support BySJout / chimeric /
+/// transcriptome-SAM side outputs — those are not part of the STARsolo MVP.
+fn align_reads_solo<W: AlignmentWriter + ?Sized>(
+    params: &Parameters,
+    index: &std::sync::Arc<crate::index::GenomeIndex>,
+    writer: &mut W,
+    stats: &std::sync::Arc<crate::stats::AlignmentStats>,
+    sj_stats: &std::sync::Arc<crate::junction::SpliceJunctionStats>,
+    solo_ctx: &std::sync::Arc<crate::solo::SoloContext>,
+) -> anyhow::Result<()> {
+    use crate::align::read_align::align_read;
+    use crate::io::fastq::clip_read;
+    use crate::io::sam::{BufferedSamRecords, SamWriter};
+    use crate::solo::{SoloCountRecord, SoloMultiRecord};
+    use rayon::prelude::*;
+    use std::sync::Arc;
+
+    let cdna_file = &params.read_files_in[0];
+    let barcode_file = &params.read_files_in[1];
+    info!(
+        "STARsolo: cDNA reads from {}, barcode reads from {}",
+        cdna_file.display(),
+        barcode_file.display()
+    );
+    let mut reader = crate::solo::open_reader(params)?;
+
+    let stats = Arc::clone(stats);
+    let sj_stats = Arc::clone(sj_stats);
+    let solo = Arc::clone(solo_ctx);
+
+    let mut read_count = 0u64;
+    let max_reads = if params.read_map_number < 0 {
+        u64::MAX
+    } else {
+        params.read_map_number as u64
+    };
+    let batch_size = 10000;
+    let clip5p = params.clip5p_nbases as usize;
+    let clip3p = params.clip3p_nbases as usize;
+    let cr4_clip = params.clip_adapter_type == "CellRanger4";
+    let max_multimaps = params.out_filter_multimap_nmax as usize;
+    // With `--outSAMtype None` (count-only) we skip building SAM records entirely
+    // — a large saving for solo runs that only need the count matrix.
+    let emit_sam = params.emits_alignments();
+    let output_unmapped = emit_sam && params.out_sam_unmapped != params::OutSamUnmapped::None;
+
+    /// Per-read result for the solo loop (one outcome per quantified feature).
+    struct SoloReadProduct {
+        sam_records: BufferedSamRecords,
+        per_feature: Vec<crate::solo::FeatureOutcome>,
+        sj: Vec<crate::solo::SjCountRecord>,
+        velocyto: Option<crate::solo::VelocytoRecord>,
+    }
+
+    info!("STARsolo: aligning cDNA reads and quantifying barcodes...");
+    loop {
+        let batch = reader.read_batch(batch_size)?;
+        if batch.is_empty() {
+            break;
+        }
+        let reads_to_process = if read_count + batch.len() as u64 > max_reads {
+            (max_reads - read_count) as usize
+        } else {
+            batch.len()
+        };
+        let batch_to_process = &batch[..reads_to_process];
+
+        let batch_results: Vec<Result<SoloReadProduct, error::Error>> = batch_to_process
+            .par_iter()
+            .map(|sread| {
+                let index = Arc::clone(index);
+                let stats = Arc::clone(&stats);
+                let sj_stats = Arc::clone(&sj_stats);
+                let solo = Arc::clone(&solo);
+
+                let read = &sread.cdna;
+                // CellRanger4 adapter clipping (TSO 5' + polyA 3') runs before
+                // the fixed clip5p/clip3p Nbases trimming.
+                let (cr_seq, cr_qual) = if cr4_clip {
+                    crate::solo::clip_adapter_cr4(&read.sequence, &read.quality)
+                } else {
+                    (read.sequence.clone(), read.quality.clone())
+                };
+                let (clipped_seq, clipped_qual) = clip_read(&cr_seq, &cr_qual, clip5p, clip3p);
+                let mut buffer = BufferedSamRecords::new();
+                stats.record_read_bases(clipped_seq.len() as u64);
+
+                if clipped_seq.is_empty() {
+                    stats.record_alignment(0, max_multimaps);
+                    stats.record_unmapped_reason(crate::stats::UnmappedReason::Other);
+                    // No alignment → barcode still counts toward stats (unmapped → no gene).
+                    let outcome = solo.process_read(&[], sread.barcode.as_ref(), &[]);
+                    return Ok(SoloReadProduct {
+                        sam_records: buffer,
+                        per_feature: outcome.per_feature,
+                        sj: outcome.sj,
+                        velocyto: outcome.velocyto,
+                    });
+                }
+
+                let (transcripts, _chimeric, n_for_mapq, unmapped_reason) =
+                    align_read(&clipped_seq, &read.name, &index, params)?;
+
+                let n_for_stats = if transcripts.is_empty() && n_for_mapq > 0 {
+                    n_for_mapq
+                } else {
+                    transcripts.len()
+                };
+                stats.record_alignment(n_for_stats, max_multimaps);
+                if transcripts.is_empty() && unmapped_reason.is_some() {
+                    stats.record_unmapped_reason(
+                        unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                    );
+                } else if transcripts.len() == 1 {
+                    stats.record_transcript_stats(&transcripts[0]);
+                }
+
+                let is_unique = transcripts.len() == 1;
+                for transcript in &transcripts {
+                    record_transcript_junctions(transcript, &index, &sj_stats, is_unique);
+                }
+
+                // SJ feature: the junctions crossed by a uniquely-mapped read
+                // (absolute intron coords), mapped to SJ.out.tab rows at output.
+                let junctions: Vec<(u64, u64)> =
+                    if solo.sj_enabled && is_unique && transcripts[0].n_junction > 0 {
+                        extract_junction_keys(&transcripts[0], &index)
+                            .into_iter()
+                            .map(|k| (k.intron_start, k.intron_end))
+                            .collect()
+                    } else {
+                        Vec::new()
+                    };
+
+                // Solo quantification (CB match + UMI check + gene assignment).
+                let outcome = solo.process_read(&transcripts, sread.barcode.as_ref(), &junctions);
+
+                // Build SAM records for the cDNA alignment (same as SE path).
+                // Skipped entirely under `--outSAMtype None` (count-only).
+                if emit_sam {
+                    if transcripts.is_empty() {
+                        if output_unmapped {
+                            let record = SamWriter::build_unmapped_record(
+                                &read.name,
+                                &clipped_seq,
+                                &clipped_qual,
+                                params,
+                                unmapped_reason.unwrap_or(crate::stats::UnmappedReason::Other),
+                            )?;
+                            buffer.push(record);
+                        }
+                    } else if transcripts.len() <= max_multimaps {
+                        let records = SamWriter::build_alignment_records(
+                            &read.name,
+                            &clipped_seq,
+                            &clipped_qual,
+                            &transcripts,
+                            &index.genome,
+                            params,
+                            n_for_mapq,
+                        )?;
+                        for record in records {
+                            buffer.push(record);
+                        }
+                    }
+                }
+
+                Ok(SoloReadProduct {
+                    sam_records: buffer,
+                    per_feature: outcome.per_feature,
+                    sj: outcome.sj,
+                    velocyto: outcome.velocyto,
+                })
+            })
+            .collect();
+
+        // Sequential write + per-feature record collection.
+        let n_feat = solo.features.len();
+        let mut feat_records: Vec<Vec<SoloCountRecord>> = (0..n_feat).map(|_| Vec::new()).collect();
+        let mut feat_multi: Vec<Vec<SoloMultiRecord>> = (0..n_feat).map(|_| Vec::new()).collect();
+        let mut feat_multi_gene: Vec<Vec<crate::solo::MultiGeneRecord>> =
+            (0..n_feat).map(|_| Vec::new()).collect();
+        let mut sj_batch: Vec<crate::solo::SjCountRecord> = Vec::new();
+        let mut velo_batch: Vec<crate::solo::VelocytoRecord> = Vec::new();
+        for result in batch_results {
+            let product = result?;
+            writer.write_batch(&product.sam_records.records)?;
+            for (fi, fo) in product.per_feature.into_iter().enumerate() {
+                if let Some(r) = fo.record {
+                    feat_records[fi].push(r);
+                }
+                if let Some(m) = fo.multi {
+                    feat_multi[fi].push(m);
+                }
+                if let Some(mg) = fo.multi_gene {
+                    feat_multi_gene[fi].push(mg);
+                }
+            }
+            sj_batch.extend(product.sj);
+            velo_batch.extend(product.velocyto);
+        }
+        for (fi, recorder) in solo.recorders.iter().enumerate() {
+            recorder.extend(
+                std::mem::take(&mut feat_records[fi]),
+                std::mem::take(&mut feat_multi[fi]),
+            );
+            let mg = std::mem::take(&mut feat_multi_gene[fi]);
+            if !mg.is_empty() {
+                recorder.multi_gene.lock().unwrap().extend(mg);
+            }
+        }
+        if !sj_batch.is_empty() {
+            solo.sj_records.lock().unwrap().extend(sj_batch);
+        }
+        if !velo_batch.is_empty() {
+            solo.velocyto_records.lock().unwrap().extend(velo_batch);
+        }
+
+        read_count += reads_to_process as u64;
+        if read_count % 100_000 < batch_size as u64 {
+            info!("STARsolo: processed {read_count} reads...");
+        }
+        if read_count >= max_reads {
+            break;
+        }
+    }
+
+    Ok(())
+}
+
 /// Align paired-end reads
 #[allow(clippy::too_many_arguments)]
 fn align_reads_paired_end<W: AlignmentWriter + ?Sized>(
@@ -1368,7 +1889,9 @@ fn align_reads_paired_end<W: AlignmentWriter + ?Sized>(
     let clip5p = params.clip5p_nbases as usize;
     let clip3p = params.clip3p_nbases as usize;
     let max_multimaps = params.out_filter_multimap_nmax as usize;
-    let output_unmapped = params.out_sam_unmapped != params::OutSamUnmapped::None;
+    // `--outSAMtype None` (e.g. quant-only) skips building SAM records.
+    let emit_sam = params.emits_alignments();
+    let output_unmapped = emit_sam && params.out_sam_unmapped != params::OutSamUnmapped::None;
     let write_unmapped_fastq = params.out_reads_unmapped == params::OutReadsUnmapped::Fastx;
     let by_sjout = params.out_filter_type == OutFilterType::BySJout;
 
@@ -1607,8 +2130,10 @@ fn align_reads_paired_end<W: AlignmentWriter + ?Sized>(
                     Vec::new()
                 };
 
-                // Build SAM records
-                if results.is_empty() {
+                // Build SAM records (skipped entirely under `--outSAMtype None`).
+                if !emit_sam {
+                    // count/quant-only: no SAM record construction
+                } else if results.is_empty() {
                     // Unmapped pair
                     if output_unmapped {
                         let records = SamWriter::build_paired_unmapped_records(
diff --git a/src/params/mod.rs b/src/params/mod.rs
index a63b5e8..d248b28 100644
--- a/src/params/mod.rs
+++ b/src/params/mod.rs
@@ -221,6 +221,62 @@ impl std::str::FromStr for TwopassMode {
     }
 }
 
+// ---------------------------------------------------------------------------
+// STARsolo (single-cell) type
+// ---------------------------------------------------------------------------
+
+/// STAR's `--soloType` — selects the single-cell barcode geometry.
+///
+/// Mirrors STAR's `ParametersSolo::typeStr` values. Only `None` and
+/// `CB_UMI_Simple` (droplet 10x-style) are functional in Phase 14.1; the
+/// remaining variants are parsed so the CLI accepts them and later sub-phases
+/// can fill in behavior.
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub enum SoloType {
+    /// Not a single-cell run (default).
+    #[default]
+    None,
+    /// One cell barcode + one UMI at fixed positions in the barcode read
+    /// (10x Chromium, Drop-seq, inDrops-simple, etc.). STAR alias: `Droplet`.
+    CbUmiSimple,
+    /// Multi-segment cell barcode and/or UMI, optionally adapter-anchored.
+    CbUmiComplex,
+    /// Barcodes passed through as SAM tags only (no collapsing).
+    CbSamTagOut,
+    /// Plate-based Smart-seq: one cell per read-group, no UMI.
+    SmartSeq,
+}
+
+impl std::str::FromStr for SoloType {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "None" => Ok(Self::None),
+            // STAR accepts both the descriptive name and the `Droplet` alias.
+            "CB_UMI_Simple" | "Droplet" => Ok(Self::CbUmiSimple),
+            "CB_UMI_Complex" => Ok(Self::CbUmiComplex),
+            "CB_samTagOut" => Ok(Self::CbSamTagOut),
+            "SmartSeq" => Ok(Self::SmartSeq),
+            _ => Err(format!(
+                "unknown soloType '{s}'; expected None, CB_UMI_Simple, CB_UMI_Complex, CB_samTagOut, or SmartSeq"
+            )),
+        }
+    }
+}
+
+impl std::fmt::Display for SoloType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            Self::None => "None",
+            Self::CbUmiSimple => "CB_UMI_Simple",
+            Self::CbUmiComplex => "CB_UMI_Complex",
+            Self::CbSamTagOut => "CB_samTagOut",
+            Self::SmartSeq => "SmartSeq",
+        };
+        write!(f, "{s}")
+    }
+}
+
 // ---------------------------------------------------------------------------
 // Parameters struct
 // ---------------------------------------------------------------------------
@@ -284,6 +340,12 @@ pub struct Parameters {
     #[arg(long = "readFilesCommand")]
     pub read_files_command: Option<String>,
 
+    /// `--soloType SmartSeq` manifest: a TSV with `read1 <TAB> read2 <TAB> cellID`
+    /// per line (`read2` = `-` for single-end). Each line is one plate-well cell;
+    /// reads are counted per gene with no UMI.
+    #[arg(long = "readFilesManifest")]
+    pub read_files_manifest: Option<PathBuf>,
+
     /// Number of reads to map; -1 = all
     #[arg(long = "readMapNumber", default_value_t = -1, allow_hyphen_values = true)]
     pub read_map_number: i64,
@@ -296,6 +358,13 @@ pub struct Parameters {
     #[arg(long = "clip3pNbases", default_value_t = 0)]
     pub clip3p_nbases: u32,
 
+    /// Adapter clipping type applied to the cDNA read: `Hamming` (default,
+    /// adapter-sequence based, no-op when no adapter is configured) or
+    /// `CellRanger4` (clip the 10x TSO from the 5' end and trim the 3' polyA
+    /// tail, to match CellRanger ≥ 4.0).
+    #[arg(long = "clipAdapterType", default_value = "Hamming")]
+    pub clip_adapter_type: String,
+
     // ── Output ──────────────────────────────────────────────────────────
     /// Output file name prefix (including path)
     #[arg(long = "outFileNamePrefix", default_value = "./")]
@@ -659,6 +728,88 @@ pub struct Parameters {
     #[arg(long = "chimOutType", num_args = 1..=2, default_values_t = vec!["Junctions".to_string()])]
     pub chim_out_type: Vec<String>,
 
+    // ── STARsolo (single-cell) ──────────────────────────────────────────
+    /// Single-cell barcode geometry; `None` disables solo processing.
+    #[arg(long = "soloType", default_value = "None")]
+    pub solo_type: SoloType,
+
+    /// Cell-barcode whitelist file (one barcode per line, plain or gzipped).
+    /// The literal `None` means "no whitelist" (all observed barcodes kept).
+    /// Multiple files are allowed for `CB_UMI_Complex` (one per CB segment).
+    #[arg(long = "soloCBwhitelist", num_args = 1.., default_values_t = vec!["None".to_string()])]
+    pub solo_cb_whitelist: Vec<String>,
+
+    /// 1-based start position of the cell barcode in the barcode read.
+    #[arg(long = "soloCBstart", default_value_t = 1)]
+    pub solo_cb_start: u32,
+
+    /// Length of the cell barcode in bases.
+    #[arg(long = "soloCBlen", default_value_t = 16)]
+    pub solo_cb_len: u32,
+
+    /// 1-based start position of the UMI in the barcode read.
+    #[arg(long = "soloUMIstart", default_value_t = 17)]
+    pub solo_umi_start: u32,
+
+    /// Length of the UMI in bases (10x v2 = 10, v3 = 12).
+    #[arg(long = "soloUMIlen", default_value_t = 10)]
+    pub solo_umi_len: u32,
+
+    /// `CB_UMI_Complex` cell-barcode segment positions, one per segment, as
+    /// `startAnchor_startDist_endAnchor_endDist`. Only read-start anchoring
+    /// (`anchor = 0`, fixed positions) is supported, e.g. `0_0_0_7 0_8_0_15`.
+    #[arg(long = "soloCBposition", num_args = 0..)]
+    pub solo_cb_position: Vec<String>,
+
+    /// `CB_UMI_Complex` UMI position as `startAnchor_startDist_endAnchor_endDist`
+    /// (read-start anchoring only), e.g. `0_16_0_25`.
+    #[arg(long = "soloUMIposition", default_value = "")]
+    pub solo_umi_position: String,
+
+    /// Genomic features to quantify per cell: Gene, GeneFull, SJ, Velocyto, …
+    #[arg(long = "soloFeatures", num_args = 1.., default_values_t = vec!["Gene".to_string()])]
+    pub solo_features: Vec<String>,
+
+    /// UMI collapsing strategy: 1MM_All, 1MM_Directional, 1MM_Directional_UMItools,
+    /// Exact, or NoDedup.
+    #[arg(long = "soloUMIdedup", num_args = 1.., default_values_t = vec!["1MM_All".to_string()])]
+    pub solo_umi_dedup: Vec<String>,
+
+    /// Cell-barcode-to-whitelist matching: Exact, 1MM, 1MM_multi,
+    /// 1MM_multi_pseudocounts, 1MM_multi_Nbase_pseudocounts.
+    #[arg(long = "soloCBmatchWLtype", default_value = "1MM_multi")]
+    pub solo_cb_match_wl_type: String,
+
+    /// Cell-calling / matrix filtering: None, CellRanger2.2, EmptyDrops_CR, TopCells.
+    #[arg(long = "soloCellFilter", num_args = 1.., default_values_t = vec!["CellRanger2.2".to_string(), "3000".to_string(), "0.99".to_string(), "10".to_string()])]
+    pub solo_cell_filter: Vec<String>,
+
+    /// Counting method for reads mapping to multiple genes: Unique (default,
+    /// drop), Uniform, Rescue, PropUnique, EM. Non-Unique methods additionally
+    /// write `UniqueAndMult-<method>.mtx` (real-valued) per Gene/GeneFull feature.
+    #[arg(long = "soloMultiMappers", num_args = 1.., default_values_t = vec!["Unique".to_string()])]
+    pub solo_multi_mappers: Vec<String>,
+
+    /// Output directory name for solo matrices (relative to `--outFileNamePrefix`).
+    #[arg(long = "soloOutFileNames", num_args = 1.., default_values_t = vec!["Solo.out/".to_string(), "features.tsv".to_string(), "barcodes.tsv".to_string(), "matrix.mtx".to_string()])]
+    pub solo_out_file_names: Vec<String>,
+
+    /// Gzip the solo `matrix.mtx` / `barcodes.tsv` / `features.tsv` and append a
+    /// `.gz` suffix (CellRanger-style output). Default `no` keeps the plain files
+    /// that STARsolo writes (so the byte-for-byte STARsolo comparison still holds).
+    #[arg(long = "soloOutGzip", default_value = "no")]
+    pub solo_out_gzip: String,
+
+    /// Strand of the read relative to the gene for counting: Forward, Reverse, Unstranded.
+    #[arg(long = "soloStrand", default_value = "Forward")]
+    pub solo_strand: String,
+
+    /// UMI filtering of multi-gene UMIs: `-`/`None` (default, no filtering),
+    /// `MultiGeneUMI`, `MultiGeneUMI_CR`, or `MultiGeneUMI_All`. The `_CR`
+    /// variant matches CellRanger > 3.0.
+    #[arg(long = "soloUMIfiltering", num_args = 1.., default_values_t = vec!["-".to_string()])]
+    pub solo_umi_filtering: Vec<String>,
+
     /// Full command line as invoked, embedded in the BAM `@PG` `CL:` field.
     #[arg(skip)]
     pub command_line: Option<String>,
@@ -670,6 +821,14 @@ impl Parameters {
         PathBuf::from(format!("{}{suffix}", self.out_file_name_prefix))
     }
 
+    /// Whether the run produces per-read alignment records (SAM/BAM). False only
+    /// for `--outSAMtype None` written to a file (no `--outStd`): the alignment
+    /// loops then skip building SAM records entirely, which is a large saving for
+    /// solo / quant-only runs that only need the count matrix.
+    pub fn emits_alignments(&self) -> bool {
+        !matches!(self.out_std, OutStd::None) || self.out_sam_type.format != OutSamFormat::None
+    }
+
     /// Whether `--chimOutType` includes `Junctions` (write Chimeric.out.junction).
     pub fn chim_out_junctions(&self) -> bool {
         self.chim_out_type.iter().any(|s| s == "Junctions")
@@ -856,8 +1015,12 @@ impl Parameters {
             ));
         }
 
-        // alignReads requires read files
-        if params.run_mode == RunMode::AlignReads && params.read_files_in.is_empty() {
+        // alignReads requires read files — except SmartSeq, which gets its reads
+        // from --readFilesManifest instead.
+        if params.run_mode == RunMode::AlignReads
+            && params.read_files_in.is_empty()
+            && params.solo_type != SoloType::SmartSeq
+        {
             return Err(command.error(
                 ErrorKind::MissingRequiredArgument,
                 "--readFilesIn is required when --runMode alignReads",
@@ -917,6 +1080,184 @@ impl Parameters {
             ));
         }
 
+        // ── STARsolo validation ─────────────────────────────────────────
+        if params.run_mode == RunMode::AlignReads && params.solo_enabled() {
+            // CB_UMI_Complex needs one CB position + whitelist per segment.
+            if params.solo_type == SoloType::CbUmiComplex {
+                if params.solo_cb_position.is_empty() {
+                    return Err(command.error(
+                        ErrorKind::MissingRequiredArgument,
+                        "--soloType CB_UMI_Complex requires --soloCBposition (one per CB segment)",
+                    ));
+                }
+                if params.solo_cb_whitelist.len() != params.solo_cb_position.len() {
+                    return Err(command.error(
+                        ErrorKind::InvalidValue,
+                        format!(
+                            "--soloType CB_UMI_Complex: {} --soloCBposition segments but {} --soloCBwhitelist files (must match)",
+                            params.solo_cb_position.len(),
+                            params.solo_cb_whitelist.len()
+                        ),
+                    ));
+                }
+            }
+            // SmartSeq is plate-based (one library per manifest cell, no barcodes).
+            if params.solo_type == SoloType::SmartSeq && params.read_files_manifest.is_none() {
+                return Err(command.error(
+                    ErrorKind::MissingRequiredArgument,
+                    "--soloType SmartSeq requires --readFilesManifest (a TSV of read1<TAB>read2<TAB>cellID per cell)",
+                ));
+            }
+            // CB_UMI_Simple needs exactly two read files: cDNA + barcode read.
+            if matches!(
+                params.solo_type,
+                SoloType::CbUmiSimple | SoloType::CbUmiComplex | SoloType::CbSamTagOut
+            ) && params.read_files_in.len() != 2
+            {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    format!(
+                        "--soloType {} requires exactly two --readFilesIn files (cDNA read then barcode read); got {}",
+                        params.solo_type,
+                        params.read_files_in.len()
+                    ),
+                ));
+            }
+            // Gene / GeneFull / SJ / Velocyto are implemented.
+            for f in &params.solo_features {
+                if !matches!(f.as_str(), "SJ" | "Velocyto")
+                    && f.parse::<crate::solo::SoloFeature>().is_err()
+                {
+                    return Err(command.error(
+                        ErrorKind::InvalidValue,
+                        format!(
+                            "unsupported --soloFeatures '{f}'; supported: Gene, GeneFull, SJ, Velocyto"
+                        ),
+                    ));
+                }
+            }
+            // soloMultiMappers values.
+            for m in &params.solo_multi_mappers {
+                if !matches!(
+                    m.as_str(),
+                    "Unique" | "Uniform" | "Rescue" | "PropUnique" | "EM"
+                ) {
+                    return Err(command.error(
+                        ErrorKind::InvalidValue,
+                        format!(
+                            "unsupported --soloMultiMappers '{m}'; expected Unique, Uniform, Rescue, PropUnique, or EM"
+                        ),
+                    ));
+                }
+            }
+            // Gene-level features need a gene model (SJ does not — junctions come
+            // from the alignments).
+            let needs_gtf = params
+                .solo_features
+                .iter()
+                .any(|f| f == "Gene" || f == "GeneFull" || f == "Velocyto");
+            if needs_gtf && params.sjdb_gtf_file.is_none() {
+                return Err(command.error(
+                    ErrorKind::MissingRequiredArgument,
+                    "--soloFeatures Gene/GeneFull requires --sjdbGTFfile (a gene model)",
+                ));
+            }
+            // CB length / UMI length sanity.
+            if params.solo_type == SoloType::CbUmiSimple
+                && (params.solo_cb_len == 0 || params.solo_umi_len == 0)
+            {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    "--soloCBlen and --soloUMIlen must be > 0 for soloType CB_UMI_Simple",
+                ));
+            }
+            // Cell barcode cannot exceed a u64 packing (32 bases).
+            if params.solo_cb_len as usize > crate::solo::whitelist::CB_LEN_MAX {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    format!(
+                        "--soloCBlen {} exceeds the maximum of {}",
+                        params.solo_cb_len,
+                        crate::solo::whitelist::CB_LEN_MAX
+                    ),
+                ));
+            }
+            // Validate --soloCBmatchWLtype.
+            if params
+                .solo_cb_match_wl_type
+                .parse::<crate::solo::whitelist::CbMatchType>()
+                .is_err()
+            {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    format!(
+                        "unknown --soloCBmatchWLtype '{}'; expected Exact, 1MM, 1MM_multi, 1MM_multi_pseudocounts, or 1MM_multi_Nbase_pseudocounts",
+                        params.solo_cb_match_wl_type
+                    ),
+                ));
+            }
+            // Validate --soloUMIdedup (each method string).
+            for m in &params.solo_umi_dedup {
+                if m.parse::<crate::solo::UmiDedup>().is_err() {
+                    return Err(command.error(
+                        ErrorKind::InvalidValue,
+                        format!(
+                            "unknown --soloUMIdedup '{m}'; expected Exact, NoDedup, 1MM_All, 1MM_Directional, or 1MM_Directional_UMItools"
+                        ),
+                    ));
+                }
+            }
+            // Validate --soloUMIfiltering (each method string).
+            for f in &params.solo_umi_filtering {
+                if f.parse::<crate::solo::UmiFiltering>().is_err() {
+                    return Err(command.error(
+                        ErrorKind::InvalidValue,
+                        format!(
+                            "unknown --soloUMIfiltering '{f}'; expected -, None, MultiGeneUMI, MultiGeneUMI_CR, or MultiGeneUMI_All"
+                        ),
+                    ));
+                }
+            }
+            // Validate --clipAdapterType.
+            if !matches!(
+                params.clip_adapter_type.as_str(),
+                "Hamming" | "CellRanger4" | "None"
+            ) {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    format!(
+                        "unknown --clipAdapterType '{}'; expected Hamming, CellRanger4, or None",
+                        params.clip_adapter_type
+                    ),
+                ));
+            }
+            // Validate --soloStrand.
+            if params
+                .solo_strand
+                .parse::<crate::solo::SoloStrand>()
+                .is_err()
+            {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    format!(
+                        "unknown --soloStrand '{}'; expected Forward, Reverse, or Unstranded",
+                        params.solo_strand
+                    ),
+                ));
+            }
+            // A whitelist is required for any correction beyond None (SmartSeq
+            // has no cell barcodes at all, so the rule does not apply).
+            if params.solo_type != SoloType::SmartSeq
+                && params.solo_cb_whitelist_none()
+                && params.solo_cb_match_wl_type != "Exact"
+            {
+                return Err(command.error(
+                    ErrorKind::InvalidValue,
+                    "--soloCBwhitelist None requires --soloCBmatchWLtype Exact (no correction possible without a whitelist)",
+                ));
+            }
+        }
+
         Ok(params)
     }
 
@@ -929,6 +1270,56 @@ impl Parameters {
     pub fn quant_transcriptome_sam(&self) -> bool {
         self.quant_mode.iter().any(|m| m == "TranscriptomeSAM")
     }
+
+    /// True when a single-cell run is requested (`--soloType` != None).
+    pub fn solo_enabled(&self) -> bool {
+        self.solo_type != SoloType::None
+    }
+
+    /// Path to the cDNA (transcript) read file. For solo runs this is the
+    /// FIRST `--readFilesIn` file (STAR convention: `cDNA_read barcode_read`).
+    /// Returns `None` if no read files are configured.
+    pub fn cdna_read_file(&self) -> Option<&PathBuf> {
+        self.read_files_in.first()
+    }
+
+    /// Path to the barcode (CB+UMI) read file — the SECOND `--readFilesIn`
+    /// file when solo is enabled. `None` if absent.
+    pub fn barcode_read_file(&self) -> Option<&PathBuf> {
+        if self.solo_enabled() {
+            self.read_files_in.get(1)
+        } else {
+            None
+        }
+    }
+
+    /// True when the literal `None` whitelist was given (keep all barcodes).
+    pub fn solo_cb_whitelist_none(&self) -> bool {
+        self.solo_cb_whitelist.len() == 1 && self.solo_cb_whitelist[0] == "None"
+    }
+
+    /// Path to the (first) cell-barcode whitelist file, or `None` for the
+    /// literal `None` whitelist.
+    pub fn solo_cb_whitelist_path(&self) -> Option<PathBuf> {
+        if self.solo_cb_whitelist_none() {
+            None
+        } else {
+            self.solo_cb_whitelist.first().map(PathBuf::from)
+        }
+    }
+
+    /// Parsed `--soloCBmatchWLtype` flags. Falls back to the `1MM_multi`
+    /// default if somehow unset (validation rejects invalid strings).
+    pub fn solo_cb_match_type(&self) -> crate::solo::whitelist::CbMatchType {
+        self.solo_cb_match_wl_type
+            .parse()
+            .unwrap_or(crate::solo::whitelist::CbMatchType {
+                mm1: true,
+                mm1_multi: true,
+                mm1_multi_nbase: false,
+                pseudocounts: false,
+            })
+    }
 }
 
 // ---------------------------------------------------------------------------
diff --git a/src/quant/mod.rs b/src/quant/mod.rs
index 218f0fa..30b4094 100644
--- a/src/quant/mod.rs
+++ b/src/quant/mod.rs
@@ -33,6 +33,16 @@ pub struct GeneAnnotation {
     /// Per-chromosome exon interval list, sorted by (start, end).
     /// Each entry: (start_0based_incl, end_0based_excl, gene_idx).
     pub chr_exons: Vec<Vec<(u64, u64, usize)>>,
+    /// Per-chromosome **gene-body** interval list (one entry per gene: its full
+    /// `[min exon start, max exon end)` span, covering introns), sorted by
+    /// (start, end). Used by the STARsolo `GeneFull` feature, which counts a
+    /// read overlapping the gene locus including purely intronic reads.
+    pub chr_gene_body: Vec<Vec<(u64, u64, usize)>>,
+    /// Per-gene merged, sorted exon intervals `[start, end)` (absolute coords),
+    /// indexed by `gene_idx`. Used by the `Velocyto` feature to tell whether an
+    /// aligned block lies wholly within an exon (mature/ambiguous) or extends
+    /// into an intron (nascent/unspliced).
+    pub gene_exons: Vec<Vec<(u64, u64)>>,
 }
 
 impl GeneAnnotation {
@@ -46,6 +56,9 @@ impl GeneAnnotation {
         let mut gene_id_to_idx: std::collections::HashMap<String, usize> =
             std::collections::HashMap::new();
         let mut chr_exons: Vec<Vec<(u64, u64, usize)>> = vec![Vec::new(); n_chrs];
+        // Per-gene full span: (chr_idx, min_start, max_end). Accumulated over all
+        // of a gene's exons to build the GeneFull gene-body intervals.
+        let mut gene_span: Vec<Option<(usize, u64, u64)>> = Vec::new();
 
         for exon in exons {
             let gene_id = match exon.attributes.get(gene_tag) {
@@ -61,6 +74,7 @@ impl GeneAnnotation {
                 let is_rev = exon.strand == '-';
                 gene_is_reverse.push(is_rev);
                 gene_ids.push(gene_id);
+                gene_span.push(None);
                 idx
             };
 
@@ -78,6 +92,15 @@ impl GeneAnnotation {
             let end = chr_offset + exon.end;
 
             chr_exons[chr_idx].push((start, end, gene_idx));
+
+            // Extend this gene's full span. (A gene's exons share one chr.)
+            match &mut gene_span[gene_idx] {
+                Some((_, s, e)) => {
+                    *s = (*s).min(start);
+                    *e = (*e).max(end);
+                }
+                slot @ None => *slot = Some((chr_idx, start, end)),
+            }
         }
 
         for exons in &mut chr_exons {
@@ -85,13 +108,60 @@ impl GeneAnnotation {
             exons.dedup();
         }
 
+        // Build the per-chromosome gene-body interval list.
+        let mut chr_gene_body: Vec<Vec<(u64, u64, usize)>> = vec![Vec::new(); n_chrs];
+        for (gene_idx, span) in gene_span.iter().enumerate() {
+            if let Some((chr_idx, s, e)) = *span {
+                chr_gene_body[chr_idx].push((s, e, gene_idx));
+            }
+        }
+        for bodies in &mut chr_gene_body {
+            bodies.sort_unstable_by_key(|&(s, e, _)| (s, e));
+        }
+
+        // Per-gene merged exon intervals (for the Velocyto exonic/intronic test).
+        let mut gene_exons: Vec<Vec<(u64, u64)>> = vec![Vec::new(); gene_ids.len()];
+        for chr in &chr_exons {
+            for &(s, e, g) in chr {
+                gene_exons[g].push((s, e));
+            }
+        }
+        for ex in &mut gene_exons {
+            ex.sort_unstable();
+            // Merge overlapping/adjacent exons so a block test is unambiguous.
+            let mut merged: Vec<(u64, u64)> = Vec::with_capacity(ex.len());
+            for &(s, e) in ex.iter() {
+                if let Some(last) = merged.last_mut()
+                    && s <= last.1
+                {
+                    last.1 = last.1.max(e);
+                } else {
+                    merged.push((s, e));
+                }
+            }
+            *ex = merged;
+        }
+
         GeneAnnotation {
             gene_ids,
             gene_is_reverse,
             chr_exons,
+            chr_gene_body,
+            gene_exons,
         }
     }
 
+    /// Whether the aligned block `[start, end)` lies wholly within a single
+    /// (merged) exon of gene `g` — i.e. it is exonic, not intron-spanning.
+    pub fn block_is_exonic(&self, g: usize, start: u64, end: u64) -> bool {
+        let Some(exons) = self.gene_exons.get(g) else {
+            return false;
+        };
+        // First exon with exon_start > start is at `i`; the candidate is `i-1`.
+        let i = exons.partition_point(|&(s, _)| s <= start);
+        i > 0 && exons[i - 1].0 <= start && end <= exons[i - 1].1
+    }
+
     /// Build from GTF exon records using default `"gene_id"` attribute (backward-compatible).
     pub fn from_gtf_exons(exons: &[GtfRecord], genome: &Genome) -> Self {
         Self::from_gtf_exons_configured(exons, genome, "gene_id")
@@ -101,38 +171,68 @@ impl GeneAnnotation {
         self.gene_ids.len()
     }
 
-    /// Return indices of all genes whose exons overlap any exon of `transcript`.
-    /// Result is sorted and deduplicated.
+    /// Return indices of all genes whose exons overlap any exon of `transcript`
+    /// (the `Gene` feature). Result is sorted and deduplicated.
     pub fn overlapping_genes(&self, transcript: &Transcript) -> Vec<usize> {
-        if transcript.chr_idx >= self.chr_exons.len() {
-            return Vec::new();
+        let mut out = Vec::new();
+        self.overlapping_genes_into(transcript, &mut out);
+        out
+    }
+
+    /// Return indices of all genes whose **full body** (exons + introns)
+    /// overlaps any aligned block of `transcript` (the `GeneFull` feature). A
+    /// purely intronic read therefore counts here but not in `overlapping_genes`.
+    pub fn overlapping_genes_full(&self, transcript: &Transcript) -> Vec<usize> {
+        let mut out = Vec::new();
+        self.overlapping_genes_full_into(transcript, &mut out);
+        out
+    }
+
+    /// `overlapping_genes` into a caller-provided buffer (cleared + sorted/deduped
+    /// here). Lets the per-read hot path reuse one scratch `Vec` across reads.
+    pub fn overlapping_genes_into(&self, transcript: &Transcript, out: &mut Vec<usize>) {
+        Self::overlapping_in_into(&self.chr_exons, transcript, out);
+    }
+
+    /// `overlapping_genes_full` into a caller-provided buffer.
+    pub fn overlapping_genes_full_into(&self, transcript: &Transcript, out: &mut Vec<usize>) {
+        Self::overlapping_in_into(&self.chr_gene_body, transcript, out);
+    }
+
+    /// Shared overlap query over a sorted-by-start per-chromosome interval list,
+    /// writing sorted/deduped gene indices into `out` (which is cleared first).
+    fn overlapping_in_into(
+        chr_intervals: &[Vec<(u64, u64, usize)>],
+        transcript: &Transcript,
+        out: &mut Vec<usize>,
+    ) {
+        out.clear();
+        if transcript.chr_idx >= chr_intervals.len() {
+            return;
         }
-        let chr = &self.chr_exons[transcript.chr_idx];
+        let chr = &chr_intervals[transcript.chr_idx];
         if chr.is_empty() {
-            return Vec::new();
+            return;
         }
 
-        let mut genes: Vec<usize> = Vec::new();
-
         for exon in &transcript.exons {
             let rs = exon.genome_start;
             let re = exon.genome_end;
             if re <= rs {
                 continue;
             }
-            // All gene exons with start < re are candidates.
+            // All intervals with start < re are candidates.
             let upper = chr.partition_point(|&(gs, _, _)| gs < re);
             for &(_, ge, gene_idx) in &chr[..upper] {
                 // Overlap condition: ge > rs (start already guaranteed < re by upper bound).
                 if ge > rs {
-                    genes.push(gene_idx);
+                    out.push(gene_idx);
                 }
             }
         }
 
-        genes.sort_unstable();
-        genes.dedup();
-        genes
+        out.sort_unstable();
+        out.dedup();
     }
 }
 
@@ -381,7 +481,7 @@ mod tests {
 
     fn make_genome() -> Genome {
         Genome {
-            sequence: vec![0u8; 2000],
+            sequence: vec![0u8; 2000].into(),
             n_genome: 2000,
             n_genome_real: 2000,
             n_chr_real: 2,
diff --git a/src/quant/transcriptome.rs b/src/quant/transcriptome.rs
index c4c86ae..26df2e0 100644
--- a/src/quant/transcriptome.rs
+++ b/src/quant/transcriptome.rs
@@ -1203,7 +1203,7 @@ fn extend_softclips(
                 break;
             }
             let r1 = read_bases_align_orientation[r_idx];
-            let g1 = genome.sequence[g_idx];
+            let g1 = genome.sequence.base(g_idx);
             if r1 != g1 && r1 < 4 && g1 < 4 {
                 n_mm_extra += 1;
             }
@@ -1223,7 +1223,7 @@ fn extend_softclips(
                 break;
             }
             let r1 = read_bases_align_orientation[r_idx];
-            let g1 = genome.sequence[g_idx];
+            let g1 = genome.sequence.base(g_idx);
             if r1 != g1 && r1 < 4 && g1 < 4 {
                 n_mm_extra += 1;
             }
@@ -1381,7 +1381,7 @@ mod tests {
 
     fn make_genome() -> Genome {
         Genome {
-            sequence: vec![0u8; 3000],
+            sequence: vec![0u8; 3000].into(),
             n_genome: 3000,
             n_genome_real: 3000,
             n_chr_real: 2,
@@ -2296,7 +2296,7 @@ mod tests {
         // Aligned region [104, 144) — fill with zeros (A) so read bases match
         seq[104..144].fill(0);
         let genome = Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
@@ -2349,7 +2349,7 @@ mod tests {
         // Aligned region [104, 144): all zeros
         seq[104..144].fill(0);
         let genome = Genome {
-            sequence: seq,
+            sequence: seq.into(),
             n_genome: 1000,
             n_genome_real: 1000,
             n_chr_real: 1,
diff --git a/src/solo/count.rs b/src/solo/count.rs
new file mode 100644
index 0000000..7ea431e
--- /dev/null
+++ b/src/solo/count.rs
@@ -0,0 +1,1905 @@
+//! UMI deduplication and raw count-matrix output (Phase 14.4).
+//!
+//! Collates the per-read `(cell, UMI, gene)` records produced during alignment
+//! into a sparse per-cell, per-gene count matrix:
+//!   1. resolve deferred 1MM_multi cell barcodes via the count+quality posterior
+//!      (STAR `SoloReadFeature_inputRecords.cpp`: weight = exactCount·10^(−q/10));
+//!   2. group reads by `(cell, gene)` and collapse UMIs per `--soloUMIdedup`
+//!      (STAR `SoloFeature_collapseUMIall.cpp`);
+//!   3. write `Solo.out/Gene/raw/{matrix.mtx, barcodes.tsv, features.tsv}` in
+//!      CellRanger-compatible MatrixMarket layout (features × barcodes, 1-based).
+
+use crate::error::Error;
+use crate::solo::whitelist::CbWhitelist;
+use crate::solo::{SoloContext, SoloCountRecord};
+use flate2::Compression;
+use flate2::write::GzEncoder;
+use std::collections::HashMap;
+use std::io::{BufRead, BufReader, Write as _};
+use std::path::{Path, PathBuf};
+use std::str::FromStr;
+
+/// Open a solo output file, gzipping it (and appending `.gz` to the name) when
+/// `gzip` is set. The body is written by the closure; the gzip stream is
+/// finished explicitly so the trailer is always flushed. Returns the path written.
+pub(crate) fn write_file<F>(path: &Path, gzip: bool, body: F) -> Result<PathBuf, Error>
+where
+    F: FnOnce(&mut dyn std::io::Write) -> Result<(), Error>,
+{
+    let final_path = if gzip {
+        let mut s = path.as_os_str().to_owned();
+        s.push(".gz");
+        PathBuf::from(s)
+    } else {
+        path.to_path_buf()
+    };
+    let file = std::fs::File::create(&final_path).map_err(|e| Error::io(e, &final_path))?;
+    if gzip {
+        let mut enc = GzEncoder::new(file, Compression::default());
+        body(&mut enc)?;
+        enc.finish().map_err(|e| Error::io(e, &final_path))?;
+    } else {
+        let mut w = std::io::BufWriter::new(file);
+        body(&mut w)?;
+        w.flush().map_err(|e| Error::io(e, &final_path))?;
+    }
+    Ok(final_path)
+}
+
+// ---------------------------------------------------------------------------
+// UMI deduplication
+// ---------------------------------------------------------------------------
+
+/// `--soloUMIdedup` method.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UmiDedup {
+    /// Count distinct UMI sequences (no error correction).
+    Exact,
+    /// No collapsing — count every read.
+    NoDedup,
+    /// Collapse all UMIs within Hamming-1 transitively (connected components).
+    OneMmAll,
+    /// UMI-tools directional, `count_hub >= 2*count_leaf + 0`.
+    OneMmDirectional,
+    /// UMI-tools directional original, `count_hub >= 2*count_leaf - 1`.
+    OneMmDirectionalUmiTools,
+    /// CellRanger 2–4 1MM collapse: each UMI is corrected to a higher-count
+    /// 1MM neighbor (non-transitive); count = distinct corrected UMIs.
+    OneMmCr,
+}
+
+impl FromStr for UmiDedup {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Exact" => Ok(Self::Exact),
+            "NoDedup" => Ok(Self::NoDedup),
+            "1MM_All" => Ok(Self::OneMmAll),
+            "1MM_Directional" => Ok(Self::OneMmDirectional),
+            "1MM_Directional_UMItools" => Ok(Self::OneMmDirectionalUmiTools),
+            "1MM_CR" => Ok(Self::OneMmCr),
+            _ => Err(format!(
+                "unknown soloUMIdedup '{s}'; expected Exact, NoDedup, 1MM_All, 1MM_Directional, 1MM_Directional_UMItools, or 1MM_CR"
+            )),
+        }
+    }
+}
+
+/// `--soloUMIfiltering`: removal of UMIs that map to multiple genes within a cell.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UmiFiltering {
+    /// No multi-gene UMI filtering.
+    None,
+    /// Remove lower-count gene assignments of a multi-gene UMI; if every gene
+    /// has a single read, drop the UMI entirely (STAR `MultiGeneUMI`).
+    MultiGeneUmi,
+    /// CellRanger > 3.0 variant: keep only the highest-read-count gene for a
+    /// multi-gene UMI (ties retained), without the all-singletons drop.
+    MultiGeneUmiCr,
+}
+
+impl FromStr for UmiFiltering {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "-" | "None" => Ok(Self::None),
+            // MultiGeneUMI_All behaves like MultiGeneUMI for the count matrix.
+            "MultiGeneUMI" | "MultiGeneUMI_All" => Ok(Self::MultiGeneUmi),
+            "MultiGeneUMI_CR" => Ok(Self::MultiGeneUmiCr),
+            _ => Err(format!(
+                "unknown soloUMIfiltering '{s}'; expected -, None, MultiGeneUMI, MultiGeneUMI_CR, or MultiGeneUMI_All"
+            )),
+        }
+    }
+}
+
+/// True if packed UMIs `a` and `b` (length `len`) differ at exactly one base.
+fn hamming1(a: u64, b: u64, len: usize) -> bool {
+    let x = a ^ b;
+    let mut diff = 0u32;
+    for i in 0..len {
+        if (x >> (2 * i)) & 0b11 != 0 {
+            diff += 1;
+            if diff > 1 {
+                return false;
+            }
+        }
+    }
+    diff == 1
+}
+
+/// Deduplicate the UMIs observed for one `(cell, gene)` pair into a molecule
+/// count. `umis` maps each packed UMI to its read multiplicity.
+#[allow(clippy::implicit_hasher)] // always called with the default hasher
+pub fn dedup_count(umis: &HashMap<u64, u32>, method: UmiDedup, umi_len: usize) -> u64 {
+    match method {
+        UmiDedup::Exact => umis.len() as u64,
+        UmiDedup::NoDedup => umis.values().map(|&c| u64::from(c)).sum(),
+        UmiDedup::OneMmAll => connected_components(umis, umi_len),
+        UmiDedup::OneMmDirectional => directional(umis, umi_len, 0),
+        UmiDedup::OneMmDirectionalUmiTools => directional(umis, umi_len, -1),
+        UmiDedup::OneMmCr => cellranger_1mm(umis, umi_len),
+    }
+}
+
+/// 1MM_CR: CellRanger's 1-mismatch UMI collapse (STAR `umiArrayCorrect_CR`).
+/// UMIs are sorted ascending by `(count, umi)`; each UMI is corrected to the
+/// LAST (highest-count) 1MM neighbor with a strictly later sort position — i.e.
+/// its highest-count 1MM neighbor. Correction is non-transitive (it points to
+/// the neighbor's raw UMI, not its corrected value); the molecule count is the
+/// number of distinct corrected UMIs.
+fn cellranger_1mm(umis: &HashMap<u64, u32>, umi_len: usize) -> u64 {
+    let mut items: Vec<(u64, u32)> = umis.iter().map(|(&u, &c)| (u, c)).collect();
+    // Ascending by count, then by UMI value (mirrors funCompareSolo1 ordering,
+    // so the inner scan from the end meets higher-count neighbors first).
+    items.sort_by(|a, b| a.1.cmp(&b.1).then(a.0.cmp(&b.0)));
+    let n = items.len();
+    let mut corrected: Vec<u64> = Vec::with_capacity(n);
+    for iu in 0..n {
+        let mut corr = items[iu].0;
+        let mut iuu = n;
+        while iuu > iu + 1 {
+            iuu -= 1;
+            if hamming1(items[iu].0, items[iuu].0, umi_len) {
+                corr = items[iuu].0;
+                break;
+            }
+        }
+        corrected.push(corr);
+    }
+    let distinct: std::collections::HashSet<u64> = corrected.into_iter().collect();
+    distinct.len() as u64
+}
+
+/// 1MM_All: number of connected components when UMIs within Hamming-1 are
+/// merged transitively (union-find).
+fn connected_components(umis: &HashMap<u64, u32>, umi_len: usize) -> u64 {
+    let keys: Vec<u64> = umis.keys().copied().collect();
+    let n = keys.len();
+    if n <= 1 {
+        return n as u64;
+    }
+    let mut parent: Vec<usize> = (0..n).collect();
+    fn find(parent: &mut [usize], mut x: usize) -> usize {
+        while parent[x] != x {
+            parent[x] = parent[parent[x]];
+            x = parent[x];
+        }
+        x
+    }
+    for i in 0..n {
+        for j in (i + 1)..n {
+            if hamming1(keys[i], keys[j], umi_len) {
+                let ri = find(&mut parent, i);
+                let rj = find(&mut parent, j);
+                if ri != rj {
+                    parent[ri] = rj;
+                }
+            }
+        }
+    }
+    let mut roots = std::collections::HashSet::new();
+    for i in 0..n {
+        let r = find(&mut parent, i);
+        roots.insert(r);
+    }
+    roots.len() as u64
+}
+
+/// 1MM_Directional: a lower-count UMI within Hamming-1 of a hub whose count
+/// satisfies `count_hub >= 2*count_leaf + dir_count_add` is absorbed; the
+/// molecule count is the number of surviving (non-absorbed) UMIs.
+fn directional(umis: &HashMap<u64, u32>, umi_len: usize, dir_count_add: i64) -> u64 {
+    // Sort by count desc, then by UMI value for determinism.
+    let mut items: Vec<(u64, u32)> = umis.iter().map(|(&u, &c)| (u, c)).collect();
+    items.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
+    let n = items.len();
+    let mut absorbed = vec![false; n];
+    for i in 0..n {
+        if absorbed[i] {
+            continue;
+        }
+        let hub_count = i64::from(items[i].1);
+        for j in 0..n {
+            if i == j || absorbed[j] {
+                continue;
+            }
+            let leaf_count = i64::from(items[j].1);
+            if leaf_count <= hub_count
+                && hub_count >= 2 * leaf_count + dir_count_add
+                && hamming1(items[i].0, items[j].0, umi_len)
+            {
+                absorbed[j] = true;
+            }
+        }
+    }
+    (n - absorbed.iter().filter(|&&a| a).count()) as u64
+}
+
+// ---------------------------------------------------------------------------
+// Cell-barcode multi-match resolution (deferred 1MM_multi)
+// ---------------------------------------------------------------------------
+
+/// Resolve a 1MM_multi cell barcode to a single whitelist index using the
+/// count+quality posterior: weight = `(exactCount[cand] + pseudocount) · 10^(−q/10)`
+/// where `q` is the mismatch-position Phred score. `pseudocount` is 1 for the
+/// `*_pseudocounts` match types (CellRanger ≥ 3.0). Returns the argmax, or
+/// `None` if no candidate has positive weight.
+fn resolve_multi_cb(
+    candidates: &[crate::solo::whitelist::CbCandidate],
+    exact_counts: &[u64],
+    pseudocount: f64,
+) -> Option<u32> {
+    let mut best: Option<(u32, f64)> = None;
+    let mut total = 0.0f64;
+    for c in candidates {
+        let prior = *exact_counts.get(c.wl_index as usize).unwrap_or(&0) as f64 + pseudocount;
+        let q = f64::from(c.mismatch_qual.saturating_sub(33)); // Phred+33 → Phred
+        let weight = prior * 10f64.powf(-q / 10.0);
+        total += weight;
+        match best {
+            Some((_, w)) if w >= weight => {}
+            _ => best = Some((c.wl_index, weight)),
+        }
+    }
+    match best {
+        Some((idx, w)) if total > 0.0 && w > 0.0 => Some(idx),
+        _ => None,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Matrix assembly + output
+// ---------------------------------------------------------------------------
+
+/// Build and stream the raw count matrix to `matrix_path` in one per-cell pass,
+/// returning the number of non-zero entries written.
+///
+/// Mirrors STAR's `SoloFeature_collapseUMIall.cpp`: the flat record list is
+/// sorted by cell barcode so each cell's reads are contiguous, then **one cell
+/// is processed at a time** (Step 1 — peak build memory is a single cell's
+/// `umi → gene` maps, not a global `cell → umi → gene` nest over all records).
+///
+/// Step 2 (streaming output): each cell's `gene → count` entries are written
+/// straight to a temporary MatrixMarket body as they are produced — the global
+/// `cell → (gene → count)` map is never materialized. `nnz` is counted on the
+/// fly; the final `matrix.mtx` is the header (`rows cols nnz`) followed by the
+/// temp body (the BySJout temp-file pattern). So matrix-output memory is bounded
+/// by one cell regardless of how many cells the raw whitelist matrix spans.
+///
+/// Records are sorted by cb (ascending column), and each cell's genes are
+/// emitted ascending, so entries come out in the same order as before.
+#[allow(clippy::too_many_arguments)]
+/// Per-cell summary collected while streaming the matrix: the whitelist barcode
+/// index, reads (records before UMI dedup), UMIs (deduped column sum), and genes
+/// detected (nonzero entries).
+#[derive(Clone, Copy)]
+pub struct CellStat {
+    pub cb: u32,
+    pub n_reads: u64,
+    pub n_umis: u64,
+    pub n_genes: u32,
+}
+
+/// What `build_matrix_body` returns alongside the temp matrix body.
+pub struct MatrixStats {
+    pub nnz: usize,
+    /// One entry per barcode that received ≥1 UMI (the raw, unfiltered set).
+    pub cells: Vec<CellStat>,
+    /// Distinct genes with a nonzero count anywhere in the raw matrix.
+    pub genes_detected: u32,
+}
+
+/// Stream the per-cell deduplicated counts into a plain temporary MatrixMarket
+/// *body* (`gene+1 cb+1 count`, barcode-ascending) and collect per-cell stats.
+/// The body is finalized into `raw/` (and optionally `filtered/`) by the caller,
+/// which lets the raw + filtered matrices share one streaming pass.
+#[allow(clippy::too_many_arguments)]
+fn build_matrix_body(
+    ctx: &SoloContext,
+    recorder: &crate::solo::SoloRecorder,
+    method: UmiDedup,
+    filtering: UmiFiltering,
+    umi_len: usize,
+    pseudocount: f64,
+    dir: &Path,
+    n_features: usize,
+) -> Result<(tempfile::NamedTempFile, MatrixStats), Error> {
+    let mut body_tmp = tempfile::Builder::new()
+        .prefix(".matrix_body")
+        .tempfile_in(dir)
+        .map_err(|e| Error::io(e, dir))?;
+    let mut nnz = 0usize;
+    let mut cell_stats: Vec<CellStat> = Vec::new();
+    let mut gene_seen = vec![false; n_features];
+
+    {
+        let mut body = std::io::BufWriter::new(body_tmp.as_file_mut());
+
+        // Move records out of the recorder; fold in resolved 1MM_multi cells.
+        let mut records = std::mem::take(&mut *recorder.records.lock().unwrap());
+        let exact_counts = ctx.whitelist.exact_count_snapshot();
+        let multi = std::mem::take(&mut *recorder.multi_records.lock().unwrap());
+        for m in &multi {
+            if let Some(cb) = resolve_multi_cb(&m.candidates, &exact_counts, pseudocount) {
+                records.push(SoloCountRecord {
+                    cb,
+                    umi: m.umi,
+                    gene: m.gene,
+                });
+            }
+        }
+        drop(multi);
+
+        // Group each cell's reads together so we can process + free one at a time.
+        records.sort_unstable_by_key(|r| r.cb);
+
+        let mut i = 0;
+        while i < records.len() {
+            let cb = records[i].cb;
+
+            // umi → gene → read multiplicity, for this cell only.
+            let mut umi_genes: HashMap<u64, HashMap<u32, u32>> = HashMap::new();
+            let mut j = i;
+            while j < records.len() && records[j].cb == cb {
+                let r = &records[j];
+                *umi_genes
+                    .entry(r.umi)
+                    .or_default()
+                    .entry(r.gene)
+                    .or_insert(0) += 1;
+                j += 1;
+            }
+
+            // (gene → (umi → read_count)) after multi-gene UMI filtering.
+            let mut gene_umis: HashMap<u32, HashMap<u64, u32>> = HashMap::new();
+            for (&umi, genes) in &umi_genes {
+                for (&gene, &rc) in filter_multi_gene_umi(genes, filtering) {
+                    *gene_umis.entry(gene).or_default().entry(umi).or_insert(0) += rc;
+                }
+            }
+
+            // Collapse UMIs per gene, then emit this cell's entries gene-ascending.
+            let mut cell_entries: Vec<(u32, u64)> = Vec::with_capacity(gene_umis.len());
+            for (&gene, umis) in &gene_umis {
+                let count = dedup_count(umis, method, umi_len);
+                if count > 0 {
+                    cell_entries.push((gene, count));
+                }
+            }
+            cell_entries.sort_unstable_by_key(|&(g, _)| g);
+            // Per-cell summary: reads = records (j-i), genes = nonzero entries,
+            // UMIs = sum of deduped counts.
+            let n_reads = (j - i) as u64;
+            let n_genes = cell_entries.len() as u32;
+            let mut n_umis = 0u64;
+            for (g, c) in cell_entries {
+                n_umis += c;
+                gene_seen[g as usize] = true;
+                writeln!(body, "{} {} {}", g + 1, cb + 1, c).map_err(|e| Error::io(e, dir))?;
+                nnz += 1;
+            }
+            if n_umis > 0 {
+                cell_stats.push(CellStat {
+                    cb,
+                    n_reads,
+                    n_umis,
+                    n_genes,
+                });
+            }
+
+            i = j;
+        }
+        body.flush().map_err(|e| Error::io(e, dir))?;
+    }
+
+    let genes_detected = gene_seen.iter().filter(|&&s| s).count() as u32;
+    Ok((
+        body_tmp,
+        MatrixStats {
+            nnz,
+            cells: cell_stats,
+            genes_detected,
+        },
+    ))
+}
+
+/// Write a final `matrix.mtx[.gz]` = MatrixMarket header + (optionally
+/// cb-remapped/filtered) body. With `remap = None` the body is copied verbatim
+/// (raw); with `Some(map)` only columns in the map survive, renumbered to the
+/// `n_cols` called cells. Returns the entry count written.
+fn finalize_matrix(
+    body: &tempfile::NamedTempFile,
+    out_path: &Path,
+    gzip: bool,
+    n_features: usize,
+    n_cols: usize,
+    raw_nnz: usize,
+    remap: Option<&HashMap<u32, u32>>,
+) -> Result<usize, Error> {
+    // For the filtered matrix we must know nnz before the header, so first build
+    // the remapped body into a temp and count it; raw reuses the known nnz.
+    let (src, nnz): (PathBuf, usize) = match remap {
+        None => (body.path().to_path_buf(), raw_nnz),
+        Some(map) => {
+            let dir = out_path.parent().unwrap_or_else(|| Path::new("."));
+            let mut ftmp = tempfile::Builder::new()
+                .prefix(".matrix_filt")
+                .tempfile_in(dir)
+                .map_err(|e| Error::io(e, dir))?;
+            let mut kept = 0usize;
+            {
+                let mut w = std::io::BufWriter::new(ftmp.as_file_mut());
+                let reader = BufReader::new(
+                    std::fs::File::open(body.path()).map_err(|e| Error::io(e, body.path()))?,
+                );
+                for line in reader.lines() {
+                    let line = line.map_err(|e| Error::io(e, body.path()))?;
+                    let mut it = line.split(' ');
+                    let (Some(gene), Some(cb1), Some(cnt)) = (it.next(), it.next(), it.next())
+                    else {
+                        continue;
+                    };
+                    let cb0: u32 = cb1.parse::<u32>().unwrap_or(0).saturating_sub(1);
+                    if let Some(&col) = map.get(&cb0) {
+                        writeln!(w, "{gene} {col} {cnt}").map_err(|e| Error::io(e, out_path))?;
+                        kept += 1;
+                    }
+                }
+                w.flush().map_err(|e| Error::io(e, out_path))?;
+            }
+            (
+                ftmp.into_temp_path()
+                    .keep()
+                    .map_err(|e| Error::io(e.error, out_path))?,
+                kept,
+            )
+        }
+    };
+
+    write_file(out_path, gzip, |w| {
+        writeln!(w, "%%MatrixMarket matrix coordinate integer general")
+            .map_err(|e| Error::io(e, out_path))?;
+        writeln!(w, "%").map_err(|e| Error::io(e, out_path))?;
+        writeln!(w, "{n_features} {n_cols} {nnz}").map_err(|e| Error::io(e, out_path))?;
+        let mut r = std::fs::File::open(&src).map_err(|e| Error::io(e, &src))?;
+        std::io::copy(&mut r, w).map_err(|e| Error::io(e, out_path))?;
+        Ok(())
+    })?;
+    if remap.is_some() {
+        let _ = std::fs::remove_file(&src); // best-effort cleanup of the filtered temp
+    }
+    Ok(nnz)
+}
+
+/// `--soloMultiMappers` method (non-`Unique` ones produce a `UniqueAndMult-*.mtx`).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum MultiMethod {
+    Uniform,
+    Rescue,
+    PropUnique,
+    Em,
+}
+
+impl MultiMethod {
+    fn name(self) -> &'static str {
+        match self {
+            MultiMethod::Uniform => "Uniform",
+            MultiMethod::Rescue => "Rescue",
+            MultiMethod::PropUnique => "PropUnique",
+            MultiMethod::Em => "EM",
+        }
+    }
+
+    /// Parse `--soloMultiMappers` values, dropping `Unique` (no extra matrix).
+    pub fn parse_list(vals: &[String]) -> Vec<MultiMethod> {
+        vals.iter()
+            .filter_map(|v| match v.as_str() {
+                "Uniform" => Some(MultiMethod::Uniform),
+                "Rescue" => Some(MultiMethod::Rescue),
+                "PropUnique" => Some(MultiMethod::PropUnique),
+                "EM" => Some(MultiMethod::Em),
+                _ => None,
+            })
+            .collect()
+    }
+}
+
+/// Distribute one cell's gene-ambiguous molecules across their gene sets and add
+/// to the unique counts `u`, returning the combined (unique + multi) per-gene
+/// counts. `molecules` is one gene set per deduplicated multi-gene UMI.
+fn distribute_multi(
+    method: MultiMethod,
+    u: &HashMap<u32, f64>,
+    molecules: &[Vec<u32>],
+) -> HashMap<u32, f64> {
+    let mut out = u.clone();
+    let unit = |s: &[u32]| 1.0 / s.len() as f64;
+    let get = |m: &HashMap<u32, f64>, g: u32| m.get(&g).copied().unwrap_or(0.0);
+    match method {
+        MultiMethod::Uniform => {
+            for s in molecules {
+                let w = unit(s);
+                for &g in s {
+                    *out.entry(g).or_insert(0.0) += w;
+                }
+            }
+        }
+        MultiMethod::PropUnique => {
+            for s in molecules {
+                let total: f64 = s.iter().map(|&g| get(u, g)).sum();
+                for &g in s {
+                    let w = if total > 0.0 {
+                        get(u, g) / total
+                    } else {
+                        unit(s)
+                    };
+                    *out.entry(g).or_insert(0.0) += w;
+                }
+            }
+        }
+        MultiMethod::Rescue => {
+            // Weights = unique counts + a uniform spread of the multi molecules.
+            let mut unif: HashMap<u32, f64> = HashMap::new();
+            for s in molecules {
+                let w = unit(s);
+                for &g in s {
+                    *unif.entry(g).or_insert(0.0) += w;
+                }
+            }
+            for s in molecules {
+                let total: f64 = s.iter().map(|&g| get(u, g) + get(&unif, g)).sum();
+                for &g in s {
+                    let w = if total > 0.0 {
+                        (get(u, g) + get(&unif, g)) / total
+                    } else {
+                        unit(s)
+                    };
+                    *out.entry(g).or_insert(0.0) += w;
+                }
+            }
+        }
+        MultiMethod::Em => {
+            // theta_g = u_g + (multi distributed proportional to theta), iterated.
+            let mut theta = u.clone();
+            for s in molecules {
+                for &g in s {
+                    theta.entry(g).or_insert(0.0);
+                }
+            }
+            for _ in 0..100 {
+                let mut next = u.clone();
+                for s in molecules {
+                    for &g in s {
+                        next.entry(g).or_insert(0.0);
+                    }
+                }
+                for s in molecules {
+                    let total: f64 = s.iter().map(|&g| get(&theta, g)).sum();
+                    for &g in s {
+                        let w = if total > 0.0 {
+                            get(&theta, g) / total
+                        } else {
+                            unit(s)
+                        };
+                        *next.get_mut(&g).unwrap() += w;
+                    }
+                }
+                let delta: f64 = next.iter().map(|(g, v)| (v - get(&theta, *g)).abs()).sum();
+                theta = next;
+                if delta < 1e-6 {
+                    break;
+                }
+            }
+            out = theta;
+        }
+    }
+    out
+}
+
+/// Format a real matrix value compactly (integers without a decimal point).
+fn fmt_real(v: f64) -> String {
+    if v.fract().abs() < 1e-9 {
+        format!("{}", v.round() as i64)
+    } else {
+        format!("{v:.5}")
+    }
+}
+
+/// Write the `UniqueAndMult-<method>.mtx` matrices (real-valued) for the
+/// `--soloMultiMappers` methods. Re-reads the raw matrix body (per-cell unique
+/// counts, cb-ascending) and merges each cell with its gene-ambiguous molecules
+/// (deduplicated by UMI, gene set = union). Cells present only in multi records
+/// (no unique gene) are skipped.
+#[allow(clippy::too_many_arguments)]
+fn build_multi_matrices(
+    raw_body: &tempfile::NamedTempFile,
+    multi_records: &[crate::solo::MultiGeneRecord],
+    methods: &[MultiMethod],
+    dir: &Path,
+    matrix_name: &str,
+    n_features: usize,
+    n_barcodes: usize,
+    gzip: bool,
+) -> Result<(), Error> {
+    if methods.is_empty() {
+        return Ok(());
+    }
+    let mut multi: Vec<&crate::solo::MultiGeneRecord> = multi_records.iter().collect();
+    multi.sort_unstable_by_key(|r| r.cb);
+
+    // Per-method temp body + entry count.
+    let mut bodies: Vec<tempfile::NamedTempFile> = Vec::new();
+    for _ in methods {
+        bodies.push(
+            tempfile::Builder::new()
+                .prefix(".um_body")
+                .tempfile_in(dir)
+                .map_err(|e| Error::io(e, dir))?,
+        );
+    }
+    let mut nnz = vec![0usize; methods.len()];
+
+    // Gather one cell's multi molecules (gene sets, one per deduped UMI).
+    let cell_molecules = |cb: u32, mptr: &mut usize| -> Vec<Vec<u32>> {
+        while *mptr < multi.len() && multi[*mptr].cb < cb {
+            *mptr += 1; // skip multi-only cells (no unique gene)
+        }
+        let mut by_umi: HashMap<u64, std::collections::BTreeSet<u32>> = HashMap::new();
+        while *mptr < multi.len() && multi[*mptr].cb == cb {
+            let r = multi[*mptr];
+            by_umi
+                .entry(r.umi)
+                .or_default()
+                .extend(r.genes.iter().copied());
+            *mptr += 1;
+        }
+        by_umi
+            .into_values()
+            .map(|s| s.into_iter().collect())
+            .collect()
+    };
+
+    {
+        let mut writers: Vec<std::io::BufWriter<&mut std::fs::File>> = bodies
+            .iter_mut()
+            .map(|t| std::io::BufWriter::new(t.as_file_mut()))
+            .collect();
+        let reader = BufReader::new(
+            std::fs::File::open(raw_body.path()).map_err(|e| Error::io(e, raw_body.path()))?,
+        );
+        let mut mptr = 0usize;
+        let mut cur_cb: Option<u32> = None;
+        let mut u_map: HashMap<u32, f64> = HashMap::new();
+
+        let mut flush = |cb: u32,
+                         u: &HashMap<u32, f64>,
+                         mptr: &mut usize,
+                         nnz: &mut [usize]|
+         -> Result<(), Error> {
+            let mols = cell_molecules(cb, mptr);
+            for (k, &m) in methods.iter().enumerate() {
+                let counts = distribute_multi(m, u, &mols);
+                let mut entries: Vec<(u32, f64)> =
+                    counts.into_iter().filter(|&(_, v)| v > 1e-9).collect();
+                entries.sort_unstable_by_key(|&(g, _)| g);
+                for (g, v) in entries {
+                    writeln!(writers[k], "{} {} {}", g + 1, cb + 1, fmt_real(v))
+                        .map_err(|e| Error::io(e, dir))?;
+                    nnz[k] += 1;
+                }
+            }
+            Ok(())
+        };
+
+        for line in reader.lines() {
+            let line = line.map_err(|e| Error::io(e, raw_body.path()))?;
+            let mut it = line.split(' ');
+            let (Some(gt), Some(ct), Some(vt)) = (it.next(), it.next(), it.next()) else {
+                continue;
+            };
+            let g: u32 = gt.parse::<u32>().unwrap_or(1) - 1;
+            let cb: u32 = ct.parse::<u32>().unwrap_or(1) - 1;
+            let v: f64 = vt.parse().unwrap_or(0.0);
+            if cur_cb != Some(cb) {
+                if let Some(prev) = cur_cb {
+                    flush(prev, &u_map, &mut mptr, &mut nnz)?;
+                }
+                cur_cb = Some(cb);
+                u_map.clear();
+            }
+            *u_map.entry(g).or_insert(0.0) += v;
+        }
+        if let Some(prev) = cur_cb {
+            flush(prev, &u_map, &mut mptr, &mut nnz)?;
+        }
+        for w in &mut writers {
+            w.flush().map_err(|e| Error::io(e, dir))?;
+        }
+    }
+
+    // Finalize each UniqueAndMult-<method>.mtx (real-valued MatrixMarket).
+    for ((m, body), &n) in methods.iter().zip(&bodies).zip(&nnz) {
+        let path = dir.join(format!("UniqueAndMult-{}.mtx", m.name()));
+        write_file(&path, gzip, |w| {
+            writeln!(w, "%%MatrixMarket matrix coordinate real general")
+                .map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "%").map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "{n_features} {n_barcodes} {n}").map_err(|e| Error::io(e, &path))?;
+            let mut r = std::fs::File::open(body.path()).map_err(|e| Error::io(e, body.path()))?;
+            std::io::copy(&mut r, w).map_err(|e| Error::io(e, &path))?;
+            Ok(())
+        })?;
+    }
+    let _ = matrix_name; // UniqueAndMult uses a fixed name scheme
+    Ok(())
+}
+
+/// Apply `--soloUMIfiltering` to the gene→read_count map of a single UMI,
+/// returning the surviving (gene, read_count) entries.
+fn filter_multi_gene_umi(genes: &HashMap<u32, u32>, filtering: UmiFiltering) -> Vec<(&u32, &u32)> {
+    if filtering == UmiFiltering::None || genes.len() <= 1 {
+        return genes.iter().collect();
+    }
+    let max = genes.values().copied().max().unwrap_or(0);
+    match filtering {
+        // STAR MultiGeneUMI: threshold = max (or 2 if max==1, dropping all
+        // single-read multi-gene UMIs); keep genes with read_count >= threshold.
+        UmiFiltering::MultiGeneUmi => {
+            let thresh = if max == 1 { 2 } else { max };
+            genes.iter().filter(|&(_, &rc)| rc >= thresh).collect()
+        }
+        // CellRanger > 3.0: keep the highest-read-count gene(s); no singleton drop.
+        UmiFiltering::MultiGeneUmiCr => genes.iter().filter(|&(_, &rc)| rc >= max).collect(),
+        UmiFiltering::None => unreachable!(),
+    }
+}
+
+/// CellRanger-2.2 knee threshold on per-barcode UMI totals (STARsolo's default
+/// `--soloCellFilter CellRanger2.2 3000 0.99 10`). Returns the minimum UMI count
+/// for a barcode to be called a cell.
+fn knee_cr22(umis_desc: &[u64], n_expected: usize, max_pct: f64, max_min_ratio: f64) -> u64 {
+    if umis_desc.is_empty() {
+        return 0;
+    }
+    let idx = ((n_expected as f64 * (1.0 - max_pct)).round() as usize).min(umis_desc.len() - 1);
+    let robust_max = umis_desc[idx] as f64;
+    (robust_max / max_min_ratio).ceil() as u64
+}
+
+/// Whitelist indices of called cells (sorted ascending) per `--soloCellFilter`.
+/// `None` → no filtered/ output. `EmptyDrops_CR` writes only the knee-guaranteed
+/// cells here (the Monte-Carlo rescue is the standalone `emptydrops` binary).
+fn called_cells(cells: &[CellStat], filter: &[String]) -> Option<Vec<u32>> {
+    let method = filter.first().map_or("CellRanger2.2", String::as_str);
+    let arg = |i: usize, d: f64| filter.get(i).and_then(|s| s.parse().ok()).unwrap_or(d);
+    let mut cbs: Vec<u32> = match method {
+        "None" => return None,
+        "TopCells" => {
+            let n = arg(1, 0.0) as usize;
+            let mut idx: Vec<&CellStat> = cells.iter().collect();
+            idx.sort_by(|a, b| b.n_umis.cmp(&a.n_umis).then(a.cb.cmp(&b.cb)));
+            idx.into_iter().take(n).map(|c| c.cb).collect()
+        }
+        // EmptyDrops_CR is handled by `emptydrops_called`; the knee here is the
+        // fallback / guaranteed-cell base.
+        "CellRanger2.2" | "EmptyDrops_CR" => {
+            let mut umis: Vec<u64> = cells.iter().map(|c| c.n_umis).collect();
+            umis.sort_unstable_by(|a, b| b.cmp(a));
+            let thr = knee_cr22(&umis, arg(1, 3000.0) as usize, arg(2, 0.99), arg(3, 10.0));
+            cells
+                .iter()
+                .filter(|c| c.n_umis >= thr)
+                .map(|c| c.cb)
+                .collect()
+        }
+        other => {
+            log::warn!("--soloCellFilter '{other}' not supported; skipping filtered/ output");
+            return None;
+        }
+    };
+    cbs.sort_unstable();
+    Some(cbs)
+}
+
+/// `--soloCellFilter EmptyDrops_CR`: the CR2.2-knee guaranteed cells PLUS cells
+/// rescued by the EmptyDrops multinomial Monte-Carlo test (STAR
+/// `SoloFeature_emptyDrops_CR.cpp`). Per-cell gene profiles for the ambient +
+/// candidate cells are read back from the raw matrix body. `filter` is the
+/// `EmptyDrops_CR nExpected maxPct maxMinRatio indMin indMax umiMin
+/// umiMinFracMedian candMaxN FDR [simN]` argument list.
+fn emptydrops_called(
+    cells: &[CellStat],
+    body: &tempfile::NamedTempFile,
+    n_features: usize,
+    filter: &[String],
+) -> Result<Vec<u32>, Error> {
+    use rand::SeedableRng;
+    use rand::distr::{Distribution, weighted::WeightedIndex};
+    let arg = |i: usize, d: f64| {
+        filter
+            .get(i)
+            .and_then(|s| s.parse::<f64>().ok())
+            .unwrap_or(d)
+    };
+    let (n_expected, max_pct, ratio) = (arg(1, 3000.0) as usize, arg(2, 0.99), arg(3, 10.0));
+    let (ind_min, ind_max) = (arg(4, 45000.0) as usize, arg(5, 90000.0) as usize);
+    let umi_min = arg(6, 500.0) as u64;
+    let umi_min_frac = arg(7, 0.01);
+    let cand_max = arg(8, 20000.0) as usize;
+    let fdr = arg(9, 0.01);
+    let sim_n = arg(10, 10000.0).max(1.0) as usize;
+
+    // Rank by total UMI (descending, cb tie-break).
+    let mut order: Vec<&CellStat> = cells.iter().collect();
+    order.sort_by(|a, b| b.n_umis.cmp(&a.n_umis).then(a.cb.cmp(&b.cb)));
+    let totals_desc: Vec<u64> = order.iter().map(|c| c.n_umis).collect();
+    let thr = knee_cr22(&totals_desc, n_expected, max_pct, ratio);
+    let n_simple = totals_desc.iter().take_while(|&&u| u >= thr).count();
+    let mut called: Vec<u32> = order.iter().take(n_simple).map(|c| c.cb).collect();
+
+    // Candidate cells: rank ≥ nSimple, total ≥ minUMI, up to candMaxN.
+    let median_top = totals_desc.get(n_simple / 2).copied().unwrap_or(0);
+    let min_umi = umi_min.max((umi_min_frac * median_top as f64) as u64);
+    let mut cand_cbs: Vec<u32> = Vec::new();
+    for c in order.iter().skip(n_simple).take(cand_max) {
+        if c.n_umis < min_umi {
+            break;
+        }
+        cand_cbs.push(c.cb);
+    }
+    if cand_cbs.is_empty() {
+        called.sort_unstable();
+        return Ok(called);
+    }
+    let cand_set: std::collections::HashSet<u32> = cand_cbs.iter().copied().collect();
+    let ambient_set: std::collections::HashSet<u32> = order
+        .iter()
+        .skip(ind_min)
+        .take(ind_max.saturating_sub(ind_min))
+        .map(|c| c.cb)
+        .collect();
+
+    // Re-read the raw body for ambient (summed) + per-candidate profiles.
+    let mut ambient = vec![0f64; n_features];
+    let mut amb_total = 0f64;
+    let mut cand_profiles: HashMap<u32, Vec<(u32, u32)>> = HashMap::new();
+    let reader =
+        BufReader::new(std::fs::File::open(body.path()).map_err(|e| Error::io(e, body.path()))?);
+    for line in reader.lines() {
+        let line = line.map_err(|e| Error::io(e, body.path()))?;
+        let mut it = line.split(' ');
+        let (Some(gt), Some(ct), Some(vt)) = (it.next(), it.next(), it.next()) else {
+            continue;
+        };
+        let g = gt.parse::<u32>().unwrap_or(1) - 1;
+        let cb = ct.parse::<u32>().unwrap_or(1) - 1;
+        let v = vt.parse::<u32>().unwrap_or(0);
+        if ambient_set.contains(&cb) {
+            ambient[g as usize] += v as f64;
+            amb_total += v as f64;
+        }
+        if cand_set.contains(&cb) {
+            cand_profiles.entry(cb).or_default().push((g, v));
+        }
+    }
+    if amb_total == 0.0 {
+        called.sort_unstable();
+        return Ok(called);
+    }
+
+    // Ambient probabilities with a Good-Turing P0 unseen-mass correction.
+    let n1 = ambient.iter().filter(|&&x| (x - 1.0).abs() < 0.5).count() as f64;
+    let p0 = (n1 / amb_total).clamp(1e-12, 0.5);
+    let n_zero = ambient.iter().filter(|&&x| x == 0.0).count().max(1) as f64;
+    let amb_p: Vec<f64> = ambient
+        .iter()
+        .map(|&x| {
+            if x > 0.0 {
+                (1.0 - p0) * x / amb_total
+            } else {
+                p0 / n_zero
+            }
+        })
+        .collect();
+    let amb_logp: Vec<f64> = amb_p.iter().map(|&p| p.max(1e-300).ln()).collect();
+
+    // Observed multinomial log-prob per candidate.
+    let max_count = cand_cbs
+        .iter()
+        .filter_map(|cb| cand_profiles.get(cb))
+        .map(|p| p.iter().map(|&(_, c)| c as usize).sum::<usize>())
+        .max()
+        .unwrap_or(0);
+    let mut log_fac = vec![0f64; max_count + 1];
+    for i in 2..=max_count {
+        log_fac[i] = log_fac[i - 1] + (i as f64).ln();
+    }
+    let obs: Vec<(u32, usize, f64)> = cand_cbs
+        .iter()
+        .filter_map(|&cb| {
+            let prof = cand_profiles.get(&cb)?;
+            let total: usize = prof.iter().map(|&(_, c)| c as usize).sum();
+            let mut s = log_fac[total];
+            for &(g, c) in prof {
+                s -= log_fac[c as usize];
+                s += c as f64 * amb_logp[g as usize];
+            }
+            Some((cb, total, s))
+        })
+        .collect();
+
+    // Monte-Carlo: simulate sim_n ambient barcodes, recording the running
+    // log-prob at each count; compare each candidate against sim[*][its total].
+    let nonzero: Vec<usize> = (0..n_features).filter(|&g| amb_p[g] > 0.0).collect();
+    let weights: Vec<f64> = nonzero.iter().map(|&g| amb_p[g]).collect();
+    let dist = WeightedIndex::new(&weights).map_err(|e| {
+        Error::from(std::io::Error::new(
+            std::io::ErrorKind::InvalidData,
+            e.to_string(),
+        ))
+    })?;
+    let mut rng = rand::rngs::StdRng::seed_from_u64(19_760_110);
+    let mut sim_at: Vec<Vec<f64>> = vec![Vec::with_capacity(sim_n); max_count + 1];
+    let mut curr = vec![0u32; n_features];
+    for _ in 0..sim_n {
+        curr.fill(0);
+        let mut lp = 0f64;
+        sim_at[0].push(0.0);
+        #[allow(clippy::needless_range_loop)] // ic is both index and multinomial term
+        for ic in 1..=max_count {
+            let gi = nonzero[dist.sample(&mut rng)];
+            curr[gi] += 1;
+            lp += amb_logp[gi] + (ic as f64).ln() - (curr[gi] as f64).ln();
+            sim_at[ic].push(lp);
+        }
+    }
+
+    // p-values + Benjamini-Hochberg.
+    let mut pvals: Vec<(u32, f64)> = obs
+        .iter()
+        .map(|&(cb, total, o)| {
+            let lower = sim_at[total].iter().filter(|&&sp| sp < o).count();
+            (cb, (1 + lower) as f64 / (1 + sim_n) as f64)
+        })
+        .collect();
+    pvals.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
+    let n = pvals.len() as f64;
+    let mut padj = vec![0f64; pvals.len()];
+    for (rank, &(_, p)) in pvals.iter().enumerate() {
+        padj[rank] = (p * n / (rank + 1) as f64).min(1.0);
+    }
+    for i in (0..padj.len().saturating_sub(1)).rev() {
+        padj[i] = padj[i].min(padj[i + 1]);
+    }
+    let mut rescued = 0usize;
+    for (rank, &(cb, _)) in pvals.iter().enumerate() {
+        if padj[rank] <= fdr {
+            called.push(cb);
+            rescued += 1;
+        }
+    }
+    log::info!(
+        "EmptyDrops_CR: {n_simple} knee cells + {rescued} rescued (of {} candidates, FDR<={fdr})",
+        cand_cbs.len()
+    );
+    called.sort_unstable();
+    Ok(called)
+}
+
+/// Median of an ascending-sorted slice (0 if empty).
+fn median_sorted(sorted: &[u64]) -> u64 {
+    let n = sorted.len();
+    if n == 0 {
+        0
+    } else if n % 2 == 1 {
+        sorted[n / 2]
+    } else {
+        u64::midpoint(sorted[n / 2 - 1], sorted[n / 2])
+    }
+}
+
+/// Write the raw gene-count matrix + `Summary.csv` for a finished solo run.
+/// No-op (with a warning) when there is no explicit whitelist.
+pub fn write_gene_matrix(
+    ctx: &SoloContext,
+    params: &crate::params::Parameters,
+    align_stats: &crate::stats::AlignmentStats,
+    sj_stats: Option<&crate::junction::SpliceJunctionStats>,
+    genome: &crate::genome::Genome,
+) -> Result<(), Error> {
+    let CbWhitelist::List { sorted, .. } = &ctx.whitelist else {
+        log::warn!(
+            "STARsolo: --soloCBwhitelist None matrix output is not yet supported (Phase 14.4); skipping matrix"
+        );
+        return Ok(());
+    };
+
+    let method: UmiDedup = params
+        .solo_umi_dedup
+        .first()
+        .map_or("1MM_All", String::as_str)
+        .parse()
+        .unwrap_or(UmiDedup::OneMmAll);
+    let filtering: UmiFiltering = params
+        .solo_umi_filtering
+        .first()
+        .map_or("-", String::as_str)
+        .parse()
+        .unwrap_or(UmiFiltering::None);
+    // `*_pseudocounts` CB-match types add 1 to the posterior prior.
+    let pseudocount = if params.solo_cb_match_wl_type.contains("pseudocounts") {
+        1.0
+    } else {
+        0.0
+    };
+    let umi_len = params.solo_umi_len as usize;
+
+    let solo_dir = params
+        .solo_out_file_names
+        .first()
+        .cloned()
+        .unwrap_or_else(|| "Solo.out/".to_string());
+    let features_name = params
+        .solo_out_file_names
+        .get(1)
+        .cloned()
+        .unwrap_or_else(|| "features.tsv".to_string());
+    let barcodes_name = params
+        .solo_out_file_names
+        .get(2)
+        .cloned()
+        .unwrap_or_else(|| "barcodes.tsv".to_string());
+    let matrix_name = params
+        .solo_out_file_names
+        .get(3)
+        .cloned()
+        .unwrap_or_else(|| "matrix.mtx".to_string());
+
+    // Global mapping funnel (shared across features). The region tallies are
+    // CellRanger-style positional bins over uniquely-mapped reads, populated only
+    // when both Gene and GeneFull run (otherwise the split is unavailable).
+    use std::sync::atomic::Ordering;
+    let total_reads = align_stats.total_reads.load(Ordering::Relaxed);
+    let mapped_unique = align_stats.uniquely_mapped.load(Ordering::Relaxed);
+    let mapped_multi = align_stats.multi_mapped.load(Ordering::Relaxed);
+    let valid_barcodes = ctx.stats.yes_exact.load(Ordering::Relaxed)
+        + ctx.stats.yes_one_mm.load(Ordering::Relaxed)
+        + ctx.stats.yes_mult_mm.load(Ordering::Relaxed);
+    let reads_of = |f: crate::solo::SoloFeature| -> u64 {
+        ctx.features
+            .iter()
+            .position(|&x| x == f)
+            .map_or(0, |i| ctx.feature_reads[i].load(Ordering::Relaxed))
+    };
+    let have_funnel = ctx.features.contains(&crate::solo::SoloFeature::Gene)
+        && ctx.features.contains(&crate::solo::SoloFeature::GeneFull);
+    let region = have_funnel.then(|| RegionFunnel {
+        exonic: ctx.region_stats.exonic.load(Ordering::Relaxed),
+        intronic: ctx.region_stats.intronic.load(Ordering::Relaxed),
+        intergenic: ctx.region_stats.intergenic.load(Ordering::Relaxed),
+        antisense: ctx.region_stats.antisense.load(Ordering::Relaxed),
+    });
+
+    let gzip = matches!(params.solo_out_gzip.as_str(), "yes" | "Yes" | "true");
+    let n_genes = ctx.gene_ann.gene_ids.len();
+    let multi_methods = MultiMethod::parse_list(&params.solo_multi_mappers);
+
+    // One {prefix}{soloOutFileNames[0]}<feature>/{raw,filtered}/ per feature.
+    for (feature, recorder) in ctx.features.iter().zip(&ctx.recorders) {
+        let feature_dir = params.output_path(&format!("{solo_dir}{}/", feature.dir_name()));
+        let raw_dir = feature_dir.join("raw");
+        std::fs::create_dir_all(&raw_dir).map_err(|e| Error::io(e, &raw_dir))?;
+
+        // Stream the deduplicated counts into a shared temp body, then finalize
+        // the raw matrix (and the filtered one below) from it.
+        let (body, mstats) = build_matrix_body(
+            ctx,
+            recorder,
+            method,
+            filtering,
+            umi_len,
+            pseudocount,
+            &raw_dir,
+            n_genes,
+        )?;
+        write_features(&raw_dir.join(&features_name), &ctx.gene_ann.gene_ids, gzip)?;
+        write_barcodes(
+            &raw_dir.join(&barcodes_name),
+            &ctx.whitelist,
+            sorted.len(),
+            gzip,
+        )?;
+        finalize_matrix(
+            &body,
+            &raw_dir.join(&matrix_name),
+            gzip,
+            n_genes,
+            sorted.len(),
+            mstats.nnz,
+            None,
+        )?;
+        log::info!(
+            "STARsolo: wrote {}/raw matrix ({} genes × {} barcodes, {} entries){}",
+            feature.dir_name(),
+            n_genes,
+            sorted.len(),
+            mstats.nnz,
+            if gzip { " [gzip]" } else { "" },
+        );
+
+        // Filtered (cell-called) matrix per --soloCellFilter. EmptyDrops_CR runs
+        // the Monte-Carlo rescue (needs the per-cell profiles in the body).
+        let called = if params
+            .solo_cell_filter
+            .first()
+            .is_some_and(|m| m == "EmptyDrops_CR")
+        {
+            Some(emptydrops_called(
+                &mstats.cells,
+                &body,
+                n_genes,
+                &params.solo_cell_filter,
+            )?)
+        } else {
+            called_cells(&mstats.cells, &params.solo_cell_filter)
+        };
+        if let Some(cbs) = called
+            && !cbs.is_empty()
+        {
+            let filt_dir = feature_dir.join("filtered");
+            std::fs::create_dir_all(&filt_dir).map_err(|e| Error::io(e, &filt_dir))?;
+            let remap: HashMap<u32, u32> = cbs
+                .iter()
+                .enumerate()
+                .map(|(i, &cb)| (cb, i as u32 + 1))
+                .collect();
+            write_features(&filt_dir.join(&features_name), &ctx.gene_ann.gene_ids, gzip)?;
+            write_barcodes_subset(&filt_dir.join(&barcodes_name), &ctx.whitelist, &cbs, gzip)?;
+            let fnnz = finalize_matrix(
+                &body,
+                &filt_dir.join(&matrix_name),
+                gzip,
+                n_genes,
+                cbs.len(),
+                0,
+                Some(&remap),
+            )?;
+            log::info!(
+                "STARsolo: wrote {}/filtered matrix ({} cells, {} entries)",
+                feature.dir_name(),
+                cbs.len(),
+                fnnz,
+            );
+        }
+
+        // --soloMultiMappers: UniqueAndMult-<method>.mtx alongside raw.
+        if !multi_methods.is_empty() {
+            let mg = recorder.multi_gene.lock().unwrap();
+            build_multi_matrices(
+                &body,
+                &mg,
+                &multi_methods,
+                &raw_dir,
+                &matrix_name,
+                n_genes,
+                sorted.len(),
+                gzip,
+            )?;
+            log::info!(
+                "STARsolo: wrote {} UniqueAndMult matrices for {} ({} ambiguous reads)",
+                multi_methods.len(),
+                feature.dir_name(),
+                mg.len(),
+            );
+        }
+
+        write_summary(
+            &feature_dir.join("Summary.csv"),
+            feature.dir_name(),
+            &mstats,
+            total_reads,
+            valid_barcodes,
+            mapped_unique,
+            mapped_multi,
+            reads_of(*feature),
+            region,
+        )?;
+        log::info!("STARsolo: wrote {}/Summary.csv", feature.dir_name());
+    }
+
+    // SJ (splice-junction) feature: rows are the SJ.out.tab junctions.
+    if ctx.sj_enabled
+        && let Some(sjs) = sj_stats
+    {
+        let sj_dir = params.output_path(&format!("{solo_dir}SJ/raw/"));
+        std::fs::create_dir_all(&sj_dir).map_err(|e| Error::io(e, &sj_dir))?;
+        let order = sjs.sj_feature_order(params); // (intron_start, intron_end), row order
+        let row: HashMap<(u64, u64), u32> = order
+            .iter()
+            .enumerate()
+            .map(|(i, &k)| (k, i as u32))
+            .collect();
+        // features.tsv = the SJ.out.tab lines (same sorted order as the rows).
+        write_file(&sj_dir.join(&features_name), gzip, |w| {
+            sjs.write_sj_lines(w, genome, params).map(|_| ())
+        })?;
+        write_barcodes(
+            &sj_dir.join(&barcodes_name),
+            &ctx.whitelist,
+            sorted.len(),
+            gzip,
+        )?;
+        let umi_len = params.solo_umi_len as usize;
+        let nnz = build_sj_matrix(
+            &ctx.sj_records.lock().unwrap(),
+            &row,
+            method,
+            umi_len,
+            &sj_dir.join(&matrix_name),
+            order.len(),
+            sorted.len(),
+            gzip,
+        )?;
+        log::info!(
+            "STARsolo: wrote SJ/raw matrix ({} junctions × {} barcodes, {} entries)",
+            order.len(),
+            sorted.len(),
+            nnz,
+        );
+    }
+
+    // Velocyto feature: spliced / unspliced / ambiguous gene×cell matrices.
+    if ctx.velocyto_enabled {
+        let velo_dir = params.output_path(&format!("{solo_dir}Velocyto/raw/"));
+        std::fs::create_dir_all(&velo_dir).map_err(|e| Error::io(e, &velo_dir))?;
+        write_features(&velo_dir.join(&features_name), &ctx.gene_ann.gene_ids, gzip)?;
+        write_barcodes(
+            &velo_dir.join(&barcodes_name),
+            &ctx.whitelist,
+            sorted.len(),
+            gzip,
+        )?;
+        let umi_len = params.solo_umi_len as usize;
+        let nnz = build_velocyto_matrices(
+            &ctx.velocyto_records.lock().unwrap(),
+            method,
+            umi_len,
+            &velo_dir,
+            n_genes,
+            sorted.len(),
+            gzip,
+        )?;
+        log::info!(
+            "STARsolo: wrote Velocyto/raw matrices (spliced={} unspliced={} ambiguous={} entries)",
+            nnz[0],
+            nnz[1],
+            nnz[2],
+        );
+    }
+    Ok(())
+}
+
+/// Build the SJ feature matrix from (cell, UMI, junction) records, mapping each
+/// junction's absolute intron coords to its `SJ.out.tab` row and UMI-collapsing
+/// per (cell, junction). Junctions not in `row` (filtered out of SJ.out.tab) are
+/// dropped. Same MatrixMarket layout as the gene matrix (junctions are rows).
+#[allow(clippy::too_many_arguments)]
+fn build_sj_matrix(
+    records: &[crate::solo::SjCountRecord],
+    row: &HashMap<(u64, u64), u32>,
+    method: UmiDedup,
+    umi_len: usize,
+    matrix_path: &Path,
+    n_junctions: usize,
+    n_barcodes: usize,
+    gzip: bool,
+) -> Result<usize, Error> {
+    // Group by cell barcode (ascending column order).
+    let mut recs: Vec<&crate::solo::SjCountRecord> = records.iter().collect();
+    recs.sort_unstable_by_key(|r| r.cb);
+
+    let dir = matrix_path.parent().unwrap_or_else(|| Path::new("."));
+    let mut body_tmp = tempfile::Builder::new()
+        .prefix(".sj_body")
+        .tempfile_in(dir)
+        .map_err(|e| Error::io(e, dir))?;
+    let mut nnz = 0usize;
+    {
+        let mut body = std::io::BufWriter::new(body_tmp.as_file_mut());
+        let mut i = 0;
+        while i < recs.len() {
+            let cb = recs[i].cb;
+            // junction row → (umi → read count) for this cell.
+            let mut sj_umis: HashMap<u32, HashMap<u64, u32>> = HashMap::new();
+            while i < recs.len() && recs[i].cb == cb {
+                let r = recs[i];
+                if let Some(&rw) = row.get(&(r.intron_start, r.intron_end)) {
+                    *sj_umis.entry(rw).or_default().entry(r.umi).or_insert(0) += 1;
+                }
+                i += 1;
+            }
+            let mut entries: Vec<(u32, u64)> = sj_umis
+                .into_iter()
+                .map(|(rw, umis)| (rw, dedup_count(&umis, method, umi_len)))
+                .filter(|&(_, c)| c > 0)
+                .collect();
+            entries.sort_unstable_by_key(|&(rw, _)| rw);
+            for (rw, c) in entries {
+                writeln!(body, "{} {} {}", rw + 1, cb + 1, c).map_err(|e| Error::io(e, dir))?;
+                nnz += 1;
+            }
+        }
+        body.flush().map_err(|e| Error::io(e, dir))?;
+    }
+
+    write_file(matrix_path, gzip, |w| {
+        writeln!(w, "%%MatrixMarket matrix coordinate integer general")
+            .map_err(|e| Error::io(e, matrix_path))?;
+        writeln!(w, "%").map_err(|e| Error::io(e, matrix_path))?;
+        writeln!(w, "{n_junctions} {n_barcodes} {nnz}").map_err(|e| Error::io(e, matrix_path))?;
+        let mut r =
+            std::fs::File::open(body_tmp.path()).map_err(|e| Error::io(e, body_tmp.path()))?;
+        std::io::copy(&mut r, w).map_err(|e| Error::io(e, matrix_path))?;
+        Ok(())
+    })?;
+    Ok(nnz)
+}
+
+/// Build the three `Velocyto` matrices (`spliced`/`unspliced`/`ambiguous`) from
+/// (cell, UMI, gene, category) records. Per (cell, gene) each UMI is resolved to
+/// one category (priority unspliced > spliced > ambiguous — any intron evidence
+/// makes the molecule nascent), then UMI-deduplicated per category. Genes are
+/// rows, cells columns — same layout as the Gene matrix, written as three files
+/// scVelo/dynamo ingest directly.
+#[allow(clippy::too_many_arguments)]
+fn build_velocyto_matrices(
+    records: &[crate::solo::VelocytoRecord],
+    method: UmiDedup,
+    umi_len: usize,
+    dir: &Path,
+    n_genes: usize,
+    n_barcodes: usize,
+    gzip: bool,
+) -> Result<[usize; 3], Error> {
+    use crate::solo::VelocytoCategory;
+    // Category → matrix index (file order) and resolution priority.
+    let cat_idx = |c: VelocytoCategory| match c {
+        VelocytoCategory::Spliced => 0usize,
+        VelocytoCategory::Unspliced => 1,
+        VelocytoCategory::Ambiguous => 2,
+    };
+    let priority = |c: VelocytoCategory| match c {
+        VelocytoCategory::Unspliced => 2u8,
+        VelocytoCategory::Spliced => 1,
+        VelocytoCategory::Ambiguous => 0,
+    };
+    let names = ["spliced.mtx", "unspliced.mtx", "ambiguous.mtx"];
+
+    let mut recs: Vec<&crate::solo::VelocytoRecord> = records.iter().collect();
+    recs.sort_unstable_by_key(|r| r.cb);
+
+    let mut bodies: Vec<tempfile::NamedTempFile> = Vec::new();
+    for _ in 0..3 {
+        bodies.push(
+            tempfile::Builder::new()
+                .prefix(".velo_body")
+                .tempfile_in(dir)
+                .map_err(|e| Error::io(e, dir))?,
+        );
+    }
+    let mut nnz = [0usize; 3];
+    {
+        let mut writers: Vec<std::io::BufWriter<&mut std::fs::File>> = bodies
+            .iter_mut()
+            .map(|t| std::io::BufWriter::new(t.as_file_mut()))
+            .collect();
+        let mut i = 0;
+        while i < recs.len() {
+            let cb = recs[i].cb;
+            // gene → umi → (resolved category, read count)
+            let mut gene_umi: HashMap<u32, HashMap<u64, (VelocytoCategory, u32)>> = HashMap::new();
+            while i < recs.len() && recs[i].cb == cb {
+                let r = recs[i];
+                let e = gene_umi
+                    .entry(r.gene)
+                    .or_default()
+                    .entry(r.umi)
+                    .or_insert((r.category, 0));
+                e.1 += 1;
+                if priority(r.category) > priority(e.0) {
+                    e.0 = r.category;
+                }
+                i += 1;
+            }
+            // Per gene, dedup UMIs within each resolved category, emit entries.
+            let mut genes: Vec<&u32> = gene_umi.keys().collect();
+            genes.sort_unstable();
+            for &g in &genes {
+                let umis = &gene_umi[g];
+                let mut by_cat: [HashMap<u64, u32>; 3] =
+                    [HashMap::new(), HashMap::new(), HashMap::new()];
+                for (&umi, &(cat, rc)) in umis {
+                    by_cat[cat_idx(cat)].insert(umi, rc);
+                }
+                for (k, w) in writers.iter_mut().enumerate() {
+                    let c = dedup_count(&by_cat[k], method, umi_len);
+                    if c > 0 {
+                        writeln!(w, "{} {} {}", g + 1, cb + 1, c).map_err(|e| Error::io(e, dir))?;
+                        nnz[k] += 1;
+                    }
+                }
+            }
+        }
+        for w in &mut writers {
+            w.flush().map_err(|e| Error::io(e, dir))?;
+        }
+    }
+
+    for (k, body) in bodies.iter().enumerate() {
+        let path = dir.join(names[k]);
+        write_file(&path, gzip, |w| {
+            writeln!(w, "%%MatrixMarket matrix coordinate integer general")
+                .map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "%").map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "{n_genes} {n_barcodes} {}", nnz[k]).map_err(|e| Error::io(e, &path))?;
+            let mut r = std::fs::File::open(body.path()).map_err(|e| Error::io(e, body.path()))?;
+            std::io::copy(&mut r, w).map_err(|e| Error::io(e, &path))?;
+            Ok(())
+        })?;
+    }
+    Ok(nnz)
+}
+
+/// CellRanger-style positional mapping bins over uniquely-mapped reads.
+#[derive(Clone, Copy)]
+struct RegionFunnel {
+    exonic: u64,
+    intronic: u64,
+    intergenic: u64,
+    antisense: u64,
+}
+
+/// Write a CellRanger/STARsolo-style `Summary.csv` for one feature: the
+/// sequencing/mapping funnel (genome → exonic → intronic → intergenic, antisense)
+/// plus per-cell UMI/gene statistics over the CR2.2-knee-called cells.
+#[allow(clippy::too_many_arguments)]
+fn write_summary(
+    path: &Path,
+    feature_name: &str,
+    mstats: &MatrixStats,
+    total_reads: u64,
+    valid_barcodes: u64,
+    mapped_unique: u64,
+    mapped_multi: u64,
+    feature_mapped: u64,
+    region: Option<RegionFunnel>,
+) -> Result<(), Error> {
+    let frac = |num: u64| -> f64 {
+        if total_reads == 0 {
+            0.0
+        } else {
+            num as f64 / total_reads as f64
+        }
+    };
+
+    // Cell calling: CR2.2 knee on per-barcode UMI totals.
+    let mut umis_desc: Vec<u64> = mstats.cells.iter().map(|c| c.n_umis).collect();
+    umis_desc.sort_unstable_by(|a, b| b.cmp(a));
+    let thr = knee_cr22(&umis_desc, 3000, 0.99, 10.0);
+    let cells: Vec<&CellStat> = mstats.cells.iter().filter(|c| c.n_umis >= thr).collect();
+    let n_cells = cells.len();
+
+    // Totals across all barcodes (for sequencing saturation + fraction-in-cells).
+    let total_reads_counted: u64 = mstats.cells.iter().map(|c| c.n_reads).sum();
+    let total_umis_all: u64 = mstats.cells.iter().map(|c| c.n_umis).sum();
+    let saturation = if total_reads_counted > 0 {
+        1.0 - total_umis_all as f64 / total_reads_counted as f64
+    } else {
+        0.0
+    };
+
+    // Per-cell aggregates over called cells.
+    let reads_in_cells: u64 = cells.iter().map(|c| c.n_reads).sum();
+    let umis_in_cells: u64 = cells.iter().map(|c| c.n_umis).sum();
+    let mut reads_sorted: Vec<u64> = cells.iter().map(|c| c.n_reads).collect();
+    let mut umis_sorted: Vec<u64> = cells.iter().map(|c| c.n_umis).collect();
+    let mut genes_sorted: Vec<u64> = cells.iter().map(|c| c.n_genes as u64).collect();
+    reads_sorted.sort_unstable();
+    umis_sorted.sort_unstable();
+    genes_sorted.sort_unstable();
+    let mean = |sum: u64| -> u64 {
+        if n_cells == 0 {
+            0
+        } else {
+            sum / n_cells as u64
+        }
+    };
+
+    use std::fmt::Write as _;
+    let mut out = String::new();
+    let mut row = |k: &str, v: String| {
+        let _ = writeln!(out, "{k},{v}");
+    };
+    row("Number of Reads", total_reads.to_string());
+    row(
+        "Reads With Valid Barcodes",
+        format!("{:.6}", frac(valid_barcodes)),
+    );
+    row("Sequencing Saturation", format!("{saturation:.6}"));
+    row(
+        "Reads Mapped to Genome: Unique+Multiple",
+        format!("{:.6}", frac(mapped_unique + mapped_multi)),
+    );
+    row(
+        "Reads Mapped to Genome: Unique",
+        format!("{:.6}", frac(mapped_unique)),
+    );
+    row(
+        &format!("Reads Mapped to {feature_name}: Unique {feature_name}"),
+        format!("{:.6}", frac(feature_mapped)),
+    );
+    // CellRanger-style positional funnel over uniquely-mapped reads (each region
+    // counted by where the read falls, independent of strand; antisense is a
+    // separate orientation metric). Available only with Gene + GeneFull.
+    if let Some(r) = region {
+        row(
+            "Reads Mapped Confidently to Exonic Regions",
+            format!("{:.6}", frac(r.exonic)),
+        );
+        row(
+            "Reads Mapped Confidently to Intronic Regions",
+            format!("{:.6}", frac(r.intronic)),
+        );
+        row(
+            "Reads Mapped Confidently to Intergenic Regions",
+            format!("{:.6}", frac(r.intergenic)),
+        );
+        row(
+            "Reads Mapped Antisense to Gene",
+            format!("{:.6}", frac(r.antisense)),
+        );
+    }
+    row("Estimated Number of Cells", n_cells.to_string());
+    row(
+        &format!("Unique Reads in Cells Mapped to {feature_name}"),
+        reads_in_cells.to_string(),
+    );
+    row(
+        "Fraction of Unique Reads in Cells",
+        format!(
+            "{:.6}",
+            if total_reads_counted > 0 {
+                reads_in_cells as f64 / total_reads_counted as f64
+            } else {
+                0.0
+            }
+        ),
+    );
+    row("Mean Reads per Cell", mean(reads_in_cells).to_string());
+    row(
+        "Median Reads per Cell",
+        median_sorted(&reads_sorted).to_string(),
+    );
+    row("UMIs in Cells", umis_in_cells.to_string());
+    row("Mean UMI per Cell", mean(umis_in_cells).to_string());
+    row(
+        "Median UMI per Cell",
+        median_sorted(&umis_sorted).to_string(),
+    );
+    row(
+        &format!("Mean {feature_name} per Cell"),
+        mean(genes_sorted.iter().sum()).to_string(),
+    );
+    row(
+        &format!("Median {feature_name} per Cell"),
+        median_sorted(&genes_sorted).to_string(),
+    );
+    row(
+        &format!("Total {feature_name} Detected"),
+        mstats.genes_detected.to_string(),
+    );
+
+    std::fs::write(path, out).map_err(|e| Error::io(e, path))?;
+    Ok(())
+}
+
+/// `features.tsv`: `gene_id <TAB> gene_name <TAB> "Gene Expression"` (CellRanger
+/// v3 layout). We have no gene names, so the id is repeated.
+fn write_features(path: &Path, gene_ids: &[String], gzip: bool) -> Result<(), Error> {
+    write_file(path, gzip, |w| {
+        for id in gene_ids {
+            writeln!(w, "{id}\t{id}\tGene Expression").map_err(|e| Error::io(e, path))?;
+        }
+        Ok(())
+    })?;
+    Ok(())
+}
+
+/// Unpack `cb` into `line` (with trailing newline) and write it.
+fn write_one_barcode(
+    w: &mut dyn std::io::Write,
+    whitelist: &CbWhitelist,
+    cb: u32,
+    line: &mut Vec<u8>,
+    path: &Path,
+) -> Result<(), Error> {
+    line.clear();
+    whitelist.unpack_barcode_into(cb, line);
+    line.push(b'\n');
+    w.write_all(line).map_err(|e| Error::io(e, path))
+}
+
+/// `barcodes.tsv`: full whitelist in sorted order (matches the raw matrix
+/// columns). Lists millions of lines, so the writer is buffered and the barcode
+/// is unpacked into a reused scratch buffer (no per-line allocation).
+fn write_barcodes(path: &Path, whitelist: &CbWhitelist, n: usize, gzip: bool) -> Result<(), Error> {
+    let len = whitelist.barcode_len();
+    write_file(path, gzip, |w| {
+        let mut line: Vec<u8> = Vec::with_capacity(len + 1);
+        for i in 0..n {
+            write_one_barcode(w, whitelist, i as u32, &mut line, path)?;
+        }
+        Ok(())
+    })?;
+    Ok(())
+}
+
+/// `barcodes.tsv` for the filtered matrix: only the called-cell barcodes, in the
+/// same (cb-ascending) order as the filtered matrix columns.
+fn write_barcodes_subset(
+    path: &Path,
+    whitelist: &CbWhitelist,
+    cbs: &[u32],
+    gzip: bool,
+) -> Result<(), Error> {
+    let len = whitelist.barcode_len();
+    write_file(path, gzip, |w| {
+        let mut line: Vec<u8> = Vec::with_capacity(len + 1);
+        for &cb in cbs {
+            write_one_barcode(w, whitelist, cb, &mut line, path)?;
+        }
+        Ok(())
+    })?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::io::fastq::encode_base;
+    use crate::solo::whitelist::pack_barcode;
+
+    #[test]
+    fn median_sorted_odd_even_empty() {
+        assert_eq!(median_sorted(&[]), 0);
+        assert_eq!(median_sorted(&[5]), 5);
+        assert_eq!(median_sorted(&[1, 2, 3]), 2);
+        assert_eq!(median_sorted(&[10, 20, 30, 40]), 25); // midpoint(20,30)
+    }
+
+    #[test]
+    fn distribute_multi_methods() {
+        // Unique counts: gene 0 has 4, gene 1 has none. One ambiguous molecule
+        // maps to {0,1}.
+        let u: HashMap<u32, f64> = [(0u32, 4.0)].into_iter().collect();
+        let mols = vec![vec![0u32, 1u32]];
+
+        // Uniform: +0.5 to each gene in the set.
+        let uni = distribute_multi(MultiMethod::Uniform, &u, &mols);
+        assert!((uni[&0] - 4.5).abs() < 1e-9);
+        assert!((uni[&1] - 0.5).abs() < 1e-9);
+
+        // PropUnique: all weight to gene 0 (gene 1 has 0 unique) → 5 / 0.
+        let pu = distribute_multi(MultiMethod::PropUnique, &u, &mols);
+        assert!((pu[&0] - 5.0).abs() < 1e-9);
+        assert!(pu.get(&1).copied().unwrap_or(0.0).abs() < 1e-9);
+
+        // EM converges to all weight on gene 0 as well.
+        let em = distribute_multi(MultiMethod::Em, &u, &mols);
+        assert!((em[&0] - 5.0).abs() < 1e-6);
+        assert!(em.get(&1).copied().unwrap_or(0.0).abs() < 1e-6);
+
+        // With no unique evidence, PropUnique falls back to uniform.
+        let empty: HashMap<u32, f64> = HashMap::new();
+        let pu0 = distribute_multi(MultiMethod::PropUnique, &empty, &mols);
+        assert!((pu0[&0] - 0.5).abs() < 1e-9 && (pu0[&1] - 0.5).abs() < 1e-9);
+    }
+
+    #[test]
+    fn called_cells_methods() {
+        let mk = |cb, u| CellStat {
+            cb,
+            n_reads: u,
+            n_umis: u,
+            n_genes: 1,
+        };
+        let cells = vec![mk(5, 1000), mk(2, 900), mk(8, 50), mk(1, 40)];
+        let s = |v: &[&str]| v.iter().map(ToString::to_string).collect::<Vec<_>>();
+
+        // TopCells 2: the two highest-UMI cells (cb 5, 2), returned cb-ascending.
+        assert_eq!(
+            called_cells(&cells, &s(&["TopCells", "2"])).unwrap(),
+            vec![2, 5]
+        );
+        // None: no filtered output.
+        assert!(called_cells(&cells, &s(&["None"])).is_none());
+        // CellRanger2.2: called cbs are sorted ascending.
+        let cr = called_cells(&cells, &s(&["CellRanger2.2", "3000", "0.99", "10"])).unwrap();
+        assert!(cr.windows(2).all(|w| w[0] < w[1]));
+        // EmptyDrops_CR falls back to the same knee here.
+        assert_eq!(
+            called_cells(&cells, &s(&["EmptyDrops_CR", "3000", "0.99", "10"])),
+            Some(cr)
+        );
+    }
+
+    #[test]
+    fn knee_cr22_threshold() {
+        // 100 cells at 1000 UMI, then a long ambient tail at 10.
+        let mut umis: Vec<u64> = vec![1000; 100];
+        umis.extend(std::iter::repeat_n(10u64, 5000));
+        umis.sort_unstable_by(|a, b| b.cmp(a));
+        // robust max = umis[round(3000*0.01)] = umis[30] = 1000; thr = 1000/10 = 100.
+        let thr = knee_cr22(&umis, 3000, 0.99, 10.0);
+        assert_eq!(thr, 100);
+        let cells = umis.iter().filter(|&&u| u >= thr).count();
+        assert_eq!(cells, 100); // the 100 real cells, none of the ambient tail
+    }
+
+    fn umi(s: &str) -> u64 {
+        match pack_barcode(&s.bytes().map(encode_base).collect::<Vec<_>>()) {
+            crate::solo::whitelist::PackResult::NoN(p) => p,
+            _ => panic!("N in test UMI"),
+        }
+    }
+
+    fn counts(pairs: &[(&str, u32)]) -> HashMap<u64, u32> {
+        pairs.iter().map(|&(s, c)| (umi(s), c)).collect()
+    }
+
+    #[test]
+    fn dedup_method_parsing() {
+        assert_eq!("1MM_All".parse::<UmiDedup>().unwrap(), UmiDedup::OneMmAll);
+        assert_eq!("Exact".parse::<UmiDedup>().unwrap(), UmiDedup::Exact);
+        assert_eq!("NoDedup".parse::<UmiDedup>().unwrap(), UmiDedup::NoDedup);
+        assert!("bogus".parse::<UmiDedup>().is_err());
+    }
+
+    #[test]
+    fn exact_counts_distinct_umis() {
+        let c = counts(&[("AAAA", 3), ("AAAC", 1), ("TTTT", 5)]);
+        assert_eq!(dedup_count(&c, UmiDedup::Exact, 4), 3);
+    }
+
+    #[test]
+    fn nodedup_sums_reads() {
+        let c = counts(&[("AAAA", 3), ("AAAC", 1), ("TTTT", 5)]);
+        assert_eq!(dedup_count(&c, UmiDedup::NoDedup, 4), 9);
+    }
+
+    #[test]
+    fn one_mm_all_merges_neighbors() {
+        // AAAA–AAAC are Hamming-1 (one component); TTTT separate → 2 molecules.
+        let c = counts(&[("AAAA", 3), ("AAAC", 1), ("TTTT", 5)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmAll, 4), 2);
+    }
+
+    #[test]
+    fn one_mm_all_transitive_chain() {
+        // AAAA–AAAC–AACC chain: all one component even though AAAA/AACC are 2 apart.
+        let c = counts(&[("AAAA", 1), ("AAAC", 1), ("AACC", 1)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmAll, 4), 1);
+    }
+
+    #[test]
+    fn directional_absorbs_low_count_neighbor() {
+        // hub AAAA count 5 absorbs AAAC count 1 (5 >= 2*1+0); TTTT survives.
+        let c = counts(&[("AAAA", 5), ("AAAC", 1), ("TTTT", 5)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmDirectional, 4), 2);
+        // Equal counts are NOT absorbed (5 >= 2*5 is false).
+        let c2 = counts(&[("AAAA", 5), ("AAAC", 5)]);
+        assert_eq!(dedup_count(&c2, UmiDedup::OneMmDirectional, 4), 2);
+    }
+
+    #[test]
+    fn directional_umitools_threshold() {
+        // count_hub >= 2*leaf - 1: hub 3 absorbs leaf 2 (3 >= 3). Directional(0)
+        // would not (3 >= 4 false).
+        let c = counts(&[("AAAA", 3), ("AAAC", 2)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmDirectionalUmiTools, 4), 1);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmDirectional, 4), 2);
+    }
+
+    #[test]
+    fn cellranger_1mm_collapses_neighbor() {
+        // AAAA (5) and AAAC (1) are 1MM → low-count corrected to high-count →
+        // 1 molecule. TTTT separate → 2 total.
+        let c = counts(&[("AAAA", 5), ("AAAC", 1), ("TTTT", 5)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmCr, 4), 2);
+        assert_eq!("1MM_CR".parse::<UmiDedup>().unwrap(), UmiDedup::OneMmCr);
+    }
+
+    #[test]
+    fn cellranger_1mm_non_transitive() {
+        // Chain AAAA(1)–AAAC(2)–AACC(4): each corrects to its highest-count 1MM
+        // neighbor. AAAA→AAAC (only neighbor), AAAC→AACC, AACC→self. Corrected
+        // set {AAAC, AACC, AACC} → 2 molecules (NOT 1 like the transitive All).
+        let c = counts(&[("AAAA", 1), ("AAAC", 2), ("AACC", 4)]);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmCr, 4), 2);
+        assert_eq!(dedup_count(&c, UmiDedup::OneMmAll, 4), 1);
+    }
+
+    #[test]
+    fn umi_filtering_parsing() {
+        assert_eq!("-".parse::<UmiFiltering>().unwrap(), UmiFiltering::None);
+        assert_eq!(
+            "MultiGeneUMI_CR".parse::<UmiFiltering>().unwrap(),
+            UmiFiltering::MultiGeneUmiCr
+        );
+        assert!("bogus".parse::<UmiFiltering>().is_err());
+    }
+
+    #[test]
+    fn multi_gene_umi_cr_keeps_top_gene() {
+        // UMI maps to gene 0 (3 reads) and gene 1 (1 read). CR keeps only gene 0.
+        let mut genes = HashMap::new();
+        genes.insert(0u32, 3u32);
+        genes.insert(1u32, 1u32);
+        let kept = filter_multi_gene_umi(&genes, UmiFiltering::MultiGeneUmiCr);
+        assert_eq!(kept.len(), 1);
+        assert_eq!(*kept[0].0, 0);
+        // Plain MultiGeneUMI with all-singletons drops the UMI entirely.
+        let mut single = HashMap::new();
+        single.insert(0u32, 1u32);
+        single.insert(1u32, 1u32);
+        assert_eq!(
+            filter_multi_gene_umi(&single, UmiFiltering::MultiGeneUmi).len(),
+            0
+        );
+    }
+
+    #[test]
+    fn resolve_multi_prefers_higher_prior() {
+        use crate::solo::whitelist::CbCandidate;
+        let cands = vec![
+            CbCandidate {
+                wl_index: 0,
+                mismatch_pos: 1,
+                mismatch_qual: b'I',
+            },
+            CbCandidate {
+                wl_index: 1,
+                mismatch_pos: 2,
+                mismatch_qual: b'I',
+            },
+        ];
+        // Same quality → higher exact-count prior wins.
+        assert_eq!(resolve_multi_cb(&cands, &[10, 3], 0.0), Some(0));
+        assert_eq!(resolve_multi_cb(&cands, &[3, 10], 0.0), Some(1));
+        // No prior signal and no pseudocount → rejected.
+        assert_eq!(resolve_multi_cb(&cands, &[0, 0], 0.0), None);
+        // Pseudocount gives every candidate positive weight → argmax accepted.
+        assert!(resolve_multi_cb(&cands, &[0, 0], 1.0).is_some());
+    }
+}
diff --git a/src/solo/gene.rs b/src/solo/gene.rs
new file mode 100644
index 0000000..c4d45b2
--- /dev/null
+++ b/src/solo/gene.rs
@@ -0,0 +1,529 @@
+//! Per-read gene assignment for the STARsolo `Gene` feature (Phase 14.3).
+//!
+//! A read is assigned to a gene by intersecting the gene model with the read's
+//! alignment(s). Following STARsolo's `Gene` feature under the default
+//! `--soloMultiMappers Unique`, the read's gene set is the UNION of genes
+//! concordant with any of its alignments (strand-filtered by `--soloStrand`):
+//! exactly one gene → assigned; zero → no feature; more than one → ambiguous.
+//! A multi-locus read whose loci all fall in the same gene is therefore still
+//! gene-unique, unlike `--quantMode GeneCounts` which drops all multimappers.
+
+use crate::align::transcript::Transcript;
+use crate::quant::GeneAnnotation;
+use std::cell::RefCell;
+use std::str::FromStr;
+
+/// `--soloStrand`: orientation of the cDNA read relative to its gene.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum SoloStrand {
+    /// Read maps to the sense (same) strand as the gene (10x 3'/5', default).
+    #[default]
+    Forward,
+    /// Read maps to the antisense (opposite) strand.
+    Reverse,
+    /// Strand is ignored.
+    Unstranded,
+}
+
+impl FromStr for SoloStrand {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Forward" => Ok(Self::Forward),
+            "Reverse" => Ok(Self::Reverse),
+            "Unstranded" => Ok(Self::Unstranded),
+            _ => Err(format!(
+                "unknown soloStrand '{s}'; expected Forward, Reverse, or Unstranded"
+            )),
+        }
+    }
+}
+
+/// A STARsolo `--soloFeatures` value that quantifies genes.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SoloFeature {
+    /// Exonic counting: a read counts toward a gene only if it overlaps an exon.
+    Gene,
+    /// Full gene-body counting (CellRanger `include-introns`): a read counts if
+    /// it overlaps the gene locus, including purely intronic reads.
+    GeneFull,
+}
+
+impl SoloFeature {
+    /// Output sub-directory name (`Solo.out/<dir>/raw/`).
+    pub fn dir_name(self) -> &'static str {
+        match self {
+            SoloFeature::Gene => "Gene",
+            SoloFeature::GeneFull => "GeneFull",
+        }
+    }
+}
+
+impl FromStr for SoloFeature {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Gene" => Ok(Self::Gene),
+            "GeneFull" => Ok(Self::GeneFull),
+            _ => Err(format!(
+                "unsupported soloFeature '{s}'; supported: Gene, GeneFull"
+            )),
+        }
+    }
+}
+
+/// Outcome of assigning a read to a gene.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum GeneAssignment {
+    /// Concordant with exactly one gene (the assigned gene index).
+    Gene(u32),
+    /// Mapped but overlaps no gene on the selected strand.
+    NoFeature,
+    /// Overlaps more than one gene → not uniquely assignable.
+    Ambiguous,
+    /// Read did not map (no transcripts / too many loci).
+    Unmapped,
+}
+
+/// Whether gene `g` is kept for read alignment `tr` under `strand`.
+#[inline]
+fn strand_keeps(strand: SoloStrand, gene_is_reverse: bool, read_is_reverse: bool) -> bool {
+    match strand {
+        SoloStrand::Unstranded => true,
+        SoloStrand::Forward => gene_is_reverse == read_is_reverse,
+        SoloStrand::Reverse => gene_is_reverse != read_is_reverse,
+    }
+}
+
+/// RNA-velocity read category (Sullivan et al. 2025 mature/nascent/ambiguous,
+/// reported as scVelo's spliced/unspliced/ambiguous).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum VelocytoCategory {
+    /// Spans an exon–exon junction → processed (mature) mRNA.
+    Spliced,
+    /// No junction, but a block extends into an intron → nascent mRNA.
+    Unspliced,
+    /// No junction, all blocks wholly within exons → origin indistinguishable.
+    Ambiguous,
+}
+
+/// Classify a uniquely-mapped read (assigned to gene `g` by gene-body overlap)
+/// into its velocity category from the alignment: a splice in the CIGAR means
+/// the read is mature; otherwise an aligned block that leaves the exons (into an
+/// intron) means nascent; a wholly-exonic block is ambiguous.
+pub fn velocyto_category(
+    transcripts: &[Transcript],
+    gene_ann: &GeneAnnotation,
+    g: u32,
+) -> VelocytoCategory {
+    if transcripts.iter().any(|t| t.n_junction > 0) {
+        return VelocytoCategory::Spliced;
+    }
+    let g = g as usize;
+    let all_exonic = transcripts.iter().all(|t| {
+        t.exons
+            .iter()
+            .all(|e| gene_ann.block_is_exonic(g, e.genome_start, e.genome_end))
+    });
+    if all_exonic {
+        VelocytoCategory::Ambiguous
+    } else {
+        VelocytoCategory::Unspliced
+    }
+}
+
+/// CellRanger-style positional region of a uniquely-mapped read (independent of
+/// strand): which genomic region the read falls in.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Region {
+    /// Overlaps ≥1 annotated exon.
+    Exonic,
+    /// Overlaps a gene body but no exon (purely intronic).
+    Intronic,
+    /// Overlaps no gene body.
+    Intergenic,
+}
+
+/// Everything one read's alignment set tells us, computed in a single pass over
+/// the gene model (the two overlap queries are shared between the per-feature
+/// gene assignment and the region classification, so this costs no more than the
+/// old two `assign_gene_se` calls).
+#[derive(Debug, Clone)]
+pub struct ReadClass {
+    /// Sense-strand exonic gene assignment (the `Gene` feature). `Unmapped` if
+    /// exon overlap was not requested.
+    pub gene: GeneAssignment,
+    /// Sense-strand gene-body assignment (the `GeneFull` feature). `Unmapped` if
+    /// body overlap was not requested.
+    pub gene_full: GeneAssignment,
+    /// Positional region (only when both exon + body overlap were computed).
+    pub region: Option<Region>,
+    /// Read maps to a gene body on the antisense strand and to none on the sense
+    /// strand (CellRanger's "Reads Mapped Antisense to Gene").
+    pub antisense: bool,
+    /// Multi-gene set for the `Gene` feature (the sense exon genes), populated
+    /// only when `want_multi` and the read is gene-ambiguous (>1 gene). Used by
+    /// `--soloMultiMappers` to distribute the read; empty otherwise.
+    pub gene_multi: Vec<u32>,
+    /// Multi-gene set for the `GeneFull` feature (sense body genes).
+    pub gene_full_multi: Vec<u32>,
+}
+
+fn assignment_of(sense_genes: &[usize]) -> GeneAssignment {
+    match sense_genes.len() {
+        0 => GeneAssignment::NoFeature,
+        1 => GeneAssignment::Gene(sense_genes[0] as u32),
+        _ => GeneAssignment::Ambiguous,
+    }
+}
+
+/// Classify a read in one pass: sense-strand `Gene`/`GeneFull` assignments plus
+/// the CellRanger-style positional region + antisense flag. `want_exon` /
+/// `want_body` skip the corresponding overlap query when a feature is not needed.
+pub fn classify_read(
+    transcripts: &[Transcript],
+    gene_ann: &GeneAnnotation,
+    strand: SoloStrand,
+    want_exon: bool,
+    want_body: bool,
+    want_multi: bool,
+) -> ReadClass {
+    if transcripts.is_empty() {
+        return ReadClass {
+            gene: GeneAssignment::Unmapped,
+            gene_full: GeneAssignment::Unmapped,
+            region: None,
+            antisense: false,
+            gene_multi: Vec::new(),
+            gene_full_multi: Vec::new(),
+        };
+    }
+
+    thread_local! {
+        static RAW: RefCell<Vec<usize>> = const { RefCell::new(Vec::new()) };
+        static EXON_S: RefCell<Vec<usize>> = const { RefCell::new(Vec::new()) };
+        static BODY_S: RefCell<Vec<usize>> = const { RefCell::new(Vec::new()) };
+    }
+
+    RAW.with(|rb| {
+        EXON_S.with(|eb| {
+            BODY_S.with(|bb| {
+                let mut raw = rb.borrow_mut();
+                let mut exon_s = eb.borrow_mut();
+                let mut body_s = bb.borrow_mut();
+                exon_s.clear();
+                body_s.clear();
+                // `*_any` track positional (either-strand) overlap for the region;
+                // `body_anti_any` tracks an antisense-only body hit.
+                let (mut exon_any, mut body_any, mut body_anti_any) = (false, false, false);
+
+                for tr in transcripts {
+                    if want_exon {
+                        gene_ann.overlapping_genes_into(tr, &mut raw);
+                        for &g in raw.iter() {
+                            exon_any = true;
+                            if strand_keeps(strand, gene_ann.gene_is_reverse[g], tr.is_reverse) {
+                                exon_s.push(g);
+                            }
+                        }
+                    }
+                    if want_body {
+                        gene_ann.overlapping_genes_full_into(tr, &mut raw);
+                        for &g in raw.iter() {
+                            body_any = true;
+                            if strand_keeps(strand, gene_ann.gene_is_reverse[g], tr.is_reverse) {
+                                body_s.push(g);
+                            } else {
+                                body_anti_any = true;
+                            }
+                        }
+                    }
+                }
+                exon_s.sort_unstable();
+                exon_s.dedup();
+                body_s.sort_unstable();
+                body_s.dedup();
+
+                let region = if want_exon && want_body {
+                    Some(if exon_any {
+                        Region::Exonic
+                    } else if body_any {
+                        Region::Intronic
+                    } else {
+                        Region::Intergenic
+                    })
+                } else {
+                    None
+                };
+
+                // Capture the multi-gene sets only when requested and ambiguous,
+                // for --soloMultiMappers distribution.
+                let gene_multi = if want_multi && want_exon && exon_s.len() > 1 {
+                    exon_s.iter().map(|&g| g as u32).collect()
+                } else {
+                    Vec::new()
+                };
+                let gene_full_multi = if want_multi && want_body && body_s.len() > 1 {
+                    body_s.iter().map(|&g| g as u32).collect()
+                } else {
+                    Vec::new()
+                };
+
+                ReadClass {
+                    gene: if want_exon {
+                        assignment_of(&exon_s)
+                    } else {
+                        GeneAssignment::Unmapped
+                    },
+                    gene_full: if want_body {
+                        assignment_of(&body_s)
+                    } else {
+                        GeneAssignment::Unmapped
+                    },
+                    region,
+                    antisense: body_anti_any && body_s.is_empty(),
+                    gene_multi,
+                    gene_full_multi,
+                }
+            })
+        })
+    })
+}
+
+/// Assign a single-end (cDNA) read to a gene from its alignment set, using the
+/// `Gene` (exonic) or `GeneFull` (gene-body, intron-inclusive) overlap basis.
+/// Thin wrapper over [`classify_read`] for the single-feature case (and tests).
+pub fn assign_gene_se(
+    transcripts: &[Transcript],
+    gene_ann: &GeneAnnotation,
+    strand: SoloStrand,
+    feature: SoloFeature,
+) -> GeneAssignment {
+    let want_exon = feature == SoloFeature::Gene;
+    let class = classify_read(transcripts, gene_ann, strand, want_exon, !want_exon, false);
+    match feature {
+        SoloFeature::Gene => class.gene,
+        SoloFeature::GeneFull => class.gene_full,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::align::transcript::{Exon, Transcript};
+    use crate::genome::Genome;
+    use crate::junction::gtf::GtfRecord;
+    use std::collections::HashMap;
+
+    fn genome() -> Genome {
+        Genome {
+            sequence: vec![0u8; 2000].into(),
+            n_genome: 2000,
+            n_genome_real: 2000,
+            n_chr_real: 1,
+            chr_start: vec![0, 1000],
+            chr_length: vec![1000],
+            chr_name: vec!["chr1".to_string()],
+        }
+    }
+
+    fn gtf_exon(start: u64, end: u64, strand: char, gene: &str) -> GtfRecord {
+        let mut attrs = HashMap::new();
+        attrs.insert("gene_id".to_string(), gene.to_string());
+        attrs.insert("transcript_id".to_string(), format!("{gene}_t1"));
+        GtfRecord {
+            seqname: "chr1".to_string(),
+            feature: "exon".to_string(),
+            start,
+            end,
+            strand,
+            attributes: attrs,
+        }
+    }
+
+    /// G1 (+) at 100-200, G2 (-) at 300-400.
+    fn annotation() -> GeneAnnotation {
+        let exons = vec![gtf_exon(100, 200, '+', "G1"), gtf_exon(300, 400, '-', "G2")];
+        GeneAnnotation::from_gtf_exons(&exons, &genome())
+    }
+
+    fn read_at(start: u64, end: u64, is_reverse: bool) -> Transcript {
+        Transcript {
+            chr_idx: 0,
+            genome_start: start,
+            genome_end: end,
+            is_reverse,
+            exons: vec![Exon {
+                genome_start: start,
+                genome_end: end,
+                read_start: 0,
+                read_end: (end - start) as usize,
+                i_frag: 0,
+            }],
+            cigar: Vec::new(),
+            score: 0,
+            n_mismatch: 0,
+            n_gap: 0,
+            n_junction: 0,
+            junction_motifs: Vec::new(),
+            junction_annotated: Vec::new(),
+            read_seq: Vec::new(),
+        }
+    }
+
+    #[test]
+    fn unmapped_when_no_transcripts() {
+        let ann = annotation();
+        assert_eq!(
+            assign_gene_se(&[], &ann, SoloStrand::Forward, SoloFeature::Gene),
+            GeneAssignment::Unmapped
+        );
+    }
+
+    #[test]
+    fn forward_sense_assigns_g1() {
+        let ann = annotation();
+        // Read on + strand overlapping G1 (a + gene).
+        let tr = read_at(120, 180, false);
+        match assign_gene_se(&[tr], &ann, SoloStrand::Forward, SoloFeature::Gene) {
+            GeneAssignment::Gene(g) => assert_eq!(ann.gene_ids[g as usize], "G1"),
+            other => panic!("expected G1, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn forward_antisense_is_no_feature() {
+        let ann = annotation();
+        // Read on - strand overlapping G1 (+): wrong strand under Forward.
+        let tr = read_at(120, 180, true);
+        assert_eq!(
+            assign_gene_se(&[tr], &ann, SoloStrand::Forward, SoloFeature::Gene),
+            GeneAssignment::NoFeature
+        );
+    }
+
+    #[test]
+    fn reverse_strand_picks_antisense() {
+        let ann = annotation();
+        // Read on - strand overlapping G1 (+): kept under Reverse.
+        let tr = read_at(120, 180, true);
+        match assign_gene_se(&[tr], &ann, SoloStrand::Reverse, SoloFeature::Gene) {
+            GeneAssignment::Gene(g) => assert_eq!(ann.gene_ids[g as usize], "G1"),
+            other => panic!("expected G1 under Reverse, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn no_overlap_is_no_feature() {
+        let ann = annotation();
+        let tr = read_at(500, 600, false);
+        assert_eq!(
+            assign_gene_se(&[tr], &ann, SoloStrand::Unstranded, SoloFeature::Gene),
+            GeneAssignment::NoFeature
+        );
+    }
+
+    #[test]
+    fn multilocus_same_gene_is_unique() {
+        let ann = annotation();
+        // Two loci both inside G1 → still gene-unique.
+        let a = read_at(110, 150, false);
+        let b = read_at(150, 190, false);
+        match assign_gene_se(&[a, b], &ann, SoloStrand::Forward, SoloFeature::Gene) {
+            GeneAssignment::Gene(g) => assert_eq!(ann.gene_ids[g as usize], "G1"),
+            other => panic!("expected G1, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn two_genes_unstranded_is_ambiguous() {
+        let ann = annotation();
+        // One locus in G1 (+), one in G2 (-); unstranded sees both.
+        let a = read_at(120, 180, false);
+        let b = read_at(320, 380, true);
+        assert_eq!(
+            assign_gene_se(&[a, b], &ann, SoloStrand::Unstranded, SoloFeature::Gene),
+            GeneAssignment::Ambiguous
+        );
+    }
+
+    #[test]
+    fn genefull_counts_intronic_read() {
+        // Two-exon gene G3 (+): exons [500,600) and [800,900) → gene body
+        // [500,900) with an intron at [600,800).
+        let g = genome();
+        let exons = vec![gtf_exon(501, 600, '+', "G3"), gtf_exon(801, 900, '+', "G3")];
+        let ann = GeneAnnotation::from_gtf_exons(&exons, &g);
+        // A read entirely inside the intron overlaps no exon...
+        assert_eq!(
+            assign_gene_se(
+                &[read_at(650, 700, false)],
+                &ann,
+                SoloStrand::Forward,
+                SoloFeature::Gene
+            ),
+            GeneAssignment::NoFeature
+        );
+        // ...but does overlap the gene body, so GeneFull counts it.
+        match assign_gene_se(
+            &[read_at(650, 700, false)],
+            &ann,
+            SoloStrand::Forward,
+            SoloFeature::GeneFull,
+        ) {
+            GeneAssignment::Gene(gi) => assert_eq!(ann.gene_ids[gi as usize], "G3"),
+            other => panic!("expected G3 under GeneFull, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn classify_read_regions_and_antisense() {
+        // Ga (+): exons [100,200) and [400,500) → body [100,500), intron [200,400).
+        let g = genome();
+        let exons = vec![gtf_exon(101, 200, '+', "Ga"), gtf_exon(401, 500, '+', "Ga")];
+        let ann = GeneAnnotation::from_gtf_exons(&exons, &g);
+        let cls = |start, end, rev| {
+            classify_read(
+                &[read_at(start, end, rev)],
+                &ann,
+                SoloStrand::Forward,
+                true,
+                true,
+                false,
+            )
+        };
+
+        // In an exon, sense strand → Exonic, not antisense.
+        let c = cls(120, 180, false);
+        assert_eq!(c.region, Some(Region::Exonic));
+        assert!(!c.antisense);
+        assert!(matches!(c.gene, GeneAssignment::Gene(_)));
+
+        // Entirely within the intron → Intronic (body but no exon).
+        assert_eq!(cls(250, 350, false).region, Some(Region::Intronic));
+
+        // Outside the gene → Intergenic.
+        assert_eq!(cls(700, 800, false).region, Some(Region::Intergenic));
+
+        // Exonic position but read on the opposite strand of a (+) gene:
+        // positionally Exonic, flagged antisense, no sense gene assignment.
+        let c = cls(120, 180, true);
+        assert_eq!(c.region, Some(Region::Exonic));
+        assert!(c.antisense);
+        assert_eq!(c.gene, GeneAssignment::NoFeature);
+
+        // No region computed when only one side requested.
+        assert_eq!(
+            classify_read(
+                &[read_at(120, 180, false)],
+                &ann,
+                SoloStrand::Forward,
+                true,
+                false,
+                false
+            )
+            .region,
+            None
+        );
+    }
+}
diff --git a/src/solo/mod.rs b/src/solo/mod.rs
new file mode 100644
index 0000000..91fee76
--- /dev/null
+++ b/src/solo/mod.rs
@@ -0,0 +1,957 @@
+//! STARsolo single-cell support (Phase 14).
+//!
+//! Phase 14.1 covers barcode-read input plumbing: parsing the cell barcode (CB)
+//! and unique molecular identifier (UMI) out of the barcode read for
+//! `--soloType CB_UMI_Simple` (droplet 10x-style geometry). Whitelist
+//! correction (14.2), gene assignment (14.3), UMI deduplication and matrix
+//! output (14.4+) build on the structures defined here.
+//!
+//! The barcode read is the SECOND `--readFilesIn` file (STAR convention:
+//! `--readFilesIn cDNA_read barcode_read`). It is never aligned — only parsed.
+
+pub mod count;
+pub mod gene;
+pub mod smartseq;
+pub mod whitelist;
+
+pub use count::{UmiDedup, UmiFiltering, write_gene_matrix};
+pub use gene::{
+    GeneAssignment, Region, SoloFeature, SoloStrand, VelocytoCategory, assign_gene_se,
+    classify_read, velocyto_category,
+};
+pub use whitelist::{
+    CbCandidate, CbMatch, CbMatchStats, CbMatchType, CbWhitelist, UmiCheck, check_umi, pack_barcode,
+};
+
+use crate::align::transcript::Transcript;
+use crate::error::Error;
+use crate::io::fastq::{EncodedRead, FastqReader, decode_base};
+use crate::params::{Parameters, SoloType};
+use crate::quant::GeneAnnotation;
+use std::path::Path;
+use std::sync::Mutex;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Cell-barcode + UMI read geometry. `Simple` is a single fixed-position CB +
+/// UMI (`CB_UMI_Simple`); `Complex` assembles the CB from several fixed-position
+/// segments (`CB_UMI_Complex`). All offsets are 0-based.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum SoloBarcodeLayout {
+    Simple {
+        cb_start: usize,
+        cb_len: usize,
+        umi_start: usize,
+        umi_len: usize,
+    },
+    /// Multi-segment CB: each `(start, len)` is one segment, concatenated in
+    /// order to form the cell barcode; `umi = (start, len)`.
+    Complex {
+        cb_segments: Vec<(usize, usize)>,
+        umi: (usize, usize),
+    },
+}
+
+/// Parse a `--soloCBposition`/`--soloUMIposition` spec
+/// (`startAnchor_startDist_endAnchor_endDist`) into a 0-based `(start, len)`.
+/// Only read-start anchoring (`anchor = 0`) is supported.
+fn parse_position(spec: &str) -> Result<(usize, usize), Error> {
+    let f: Vec<&str> = spec.split('_').collect();
+    if f.len() != 4 {
+        return Err(invalid_pos(
+            spec,
+            "expected startAnchor_startDist_endAnchor_endDist",
+        ));
+    }
+    let (sa, sd, ea, ed) = (
+        f[0].parse::<i64>().ok(),
+        f[1].parse::<i64>().ok(),
+        f[2].parse::<i64>().ok(),
+        f[3].parse::<i64>().ok(),
+    );
+    match (sa, sd, ea, ed) {
+        (Some(0), Some(sd), Some(0), Some(ed)) if sd >= 0 && ed >= sd => {
+            Ok((sd as usize, (ed - sd + 1) as usize))
+        }
+        (Some(0), _, Some(0), _) => Err(invalid_pos(spec, "end < start")),
+        _ => Err(invalid_pos(
+            spec,
+            "only read-start anchoring (anchor=0) is supported",
+        )),
+    }
+}
+
+fn invalid_pos(spec: &str, why: &str) -> Error {
+    Error::from(std::io::Error::new(
+        std::io::ErrorKind::InvalidInput,
+        format!("invalid position spec '{spec}': {why}"),
+    ))
+}
+
+impl SoloBarcodeLayout {
+    /// Build the layout from CLI parameters. `CB_UMI_Complex` parses
+    /// `--soloCBposition`/`--soloUMIposition`; otherwise fixed Simple geometry.
+    pub fn from_params(params: &Parameters) -> Self {
+        if params.solo_type == SoloType::CbUmiComplex && !params.solo_cb_position.is_empty() {
+            let cb_segments = params
+                .solo_cb_position
+                .iter()
+                .filter_map(|s| parse_position(s).ok())
+                .collect();
+            let umi = parse_position(&params.solo_umi_position).unwrap_or((0, 0));
+            return Self::Complex { cb_segments, umi };
+        }
+        Self::Simple {
+            cb_start: (params.solo_cb_start.max(1) - 1) as usize,
+            cb_len: params.solo_cb_len as usize,
+            umi_start: (params.solo_umi_start.max(1) - 1) as usize,
+            umi_len: params.solo_umi_len as usize,
+        }
+    }
+
+    /// Minimum barcode-read length required to extract the CB and UMI.
+    pub fn min_read_len(&self) -> usize {
+        match self {
+            Self::Simple {
+                cb_start,
+                cb_len,
+                umi_start,
+                umi_len,
+            } => (cb_start + cb_len).max(umi_start + umi_len),
+            Self::Complex { cb_segments, umi } => cb_segments
+                .iter()
+                .map(|&(s, l)| s + l)
+                .chain(std::iter::once(umi.0 + umi.1))
+                .max()
+                .unwrap_or(0),
+        }
+    }
+
+    /// Extract the CB (concatenating segments for `Complex`) and UMI from one
+    /// barcode read. `None` if the read is shorter than [`Self::min_read_len`].
+    pub fn extract(&self, barcode_read: &EncodedRead) -> Option<CellBarcode> {
+        let seq = &barcode_read.sequence;
+        let qual = &barcode_read.quality;
+        if seq.len() < self.min_read_len() {
+            return None;
+        }
+        match self {
+            Self::Simple {
+                cb_start,
+                cb_len,
+                umi_start,
+                umi_len,
+            } => Some(CellBarcode {
+                cb_seq: seq[*cb_start..cb_start + cb_len].to_vec(),
+                cb_qual: slice_or_empty(qual, *cb_start, *cb_len),
+                umi_seq: seq[*umi_start..umi_start + umi_len].to_vec(),
+                umi_qual: slice_or_empty(qual, *umi_start, *umi_len),
+            }),
+            Self::Complex { cb_segments, umi } => {
+                let mut cb_seq = Vec::new();
+                let mut cb_qual = Vec::new();
+                for &(s, l) in cb_segments {
+                    cb_seq.extend_from_slice(&seq[s..s + l]);
+                    cb_qual.extend_from_slice(&slice_or_empty(qual, s, l));
+                }
+                Some(CellBarcode {
+                    cb_seq,
+                    cb_qual,
+                    umi_seq: seq[umi.0..umi.0 + umi.1].to_vec(),
+                    umi_qual: slice_or_empty(qual, umi.0, umi.1),
+                })
+            }
+        }
+    }
+}
+
+fn slice_or_empty(data: &[u8], start: usize, len: usize) -> Vec<u8> {
+    if start + len <= data.len() {
+        data[start..start + len].to_vec()
+    } else {
+        Vec::new()
+    }
+}
+
+/// A cell barcode + UMI extracted from one barcode read.
+///
+/// Sequences are stored in genome encoding (0=A, 1=C, 2=G, 3=T, 4=N) to match
+/// the rest of the pipeline; qualities are raw Phred+33 ASCII bytes.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct CellBarcode {
+    pub cb_seq: Vec<u8>,
+    pub cb_qual: Vec<u8>,
+    pub umi_seq: Vec<u8>,
+    pub umi_qual: Vec<u8>,
+}
+
+impl CellBarcode {
+    /// True if the cell barcode contains an `N` (encoded 4) — such barcodes
+    /// cannot match a whitelist exactly.
+    pub fn cb_has_n(&self) -> bool {
+        self.cb_seq.contains(&4)
+    }
+
+    /// True if the UMI contains an `N`. STARsolo discards reads whose UMI has
+    /// any ambiguous base.
+    pub fn umi_has_n(&self) -> bool {
+        self.umi_seq.contains(&4)
+    }
+
+    /// Decode the cell barcode to an ASCII `ACGTN` string (for CB SAM tags and
+    /// `barcodes.tsv`).
+    pub fn cb_string(&self) -> String {
+        decode_seq(&self.cb_seq)
+    }
+
+    /// Decode the UMI to an ASCII `ACGTN` string (for UB SAM tags).
+    pub fn umi_string(&self) -> String {
+        decode_seq(&self.umi_seq)
+    }
+}
+
+fn decode_seq(encoded: &[u8]) -> String {
+    encoded.iter().map(|&b| decode_base(b) as char).collect()
+}
+
+/// Reads cDNA reads and their paired barcode reads in lockstep from two FASTQ
+/// files. The cDNA read flows into the normal alignment path; the barcode read
+/// is parsed into a [`CellBarcode`] (or `None` when too short).
+pub struct SoloReadReader {
+    cdna: FastqReader,
+    barcode: FastqReader,
+    layout: SoloBarcodeLayout,
+}
+
+/// One cDNA read paired with its (optional) extracted barcode.
+pub struct SoloRead {
+    pub cdna: EncodedRead,
+    /// `None` when the barcode read was too short to extract CB+UMI.
+    pub barcode: Option<CellBarcode>,
+}
+
+impl SoloReadReader {
+    /// Open the cDNA and barcode FASTQ files for a solo run.
+    pub fn open(
+        cdna_path: &Path,
+        barcode_path: &Path,
+        layout: SoloBarcodeLayout,
+        decompress_cmd: Option<&str>,
+    ) -> Result<Self, Error> {
+        Ok(Self {
+            cdna: FastqReader::open(cdna_path, decompress_cmd)?,
+            barcode: FastqReader::open(barcode_path, decompress_cmd)?,
+            layout,
+        })
+    }
+
+    /// Fetch the next paired (cDNA, barcode) read. Errors if the two files
+    /// have different lengths.
+    pub fn next_read(&mut self) -> Result<Option<SoloRead>, Error> {
+        let cdna_opt = self.cdna.next_encoded()?;
+        let barcode_opt = self.barcode.next_encoded()?;
+        match (cdna_opt, barcode_opt) {
+            (Some(cdna), Some(bc)) => {
+                let barcode = self.layout.extract(&bc);
+                Ok(Some(SoloRead { cdna, barcode }))
+            }
+            (None, None) => Ok(None),
+            (Some(_), None) => Err(Error::from(std::io::Error::new(
+                std::io::ErrorKind::UnexpectedEof,
+                "solo: cDNA read file has more reads than the barcode read file",
+            ))),
+            (None, Some(_)) => Err(Error::from(std::io::Error::new(
+                std::io::ErrorKind::UnexpectedEof,
+                "solo: barcode read file has more reads than the cDNA read file",
+            ))),
+        }
+    }
+
+    /// Read up to `batch_size` paired reads for parallel processing.
+    pub fn read_batch(&mut self, batch_size: usize) -> Result<Vec<SoloRead>, Error> {
+        let mut batch = Vec::with_capacity(batch_size);
+        for _ in 0..batch_size {
+            match self.next_read()? {
+                Some(read) => batch.push(read),
+                None => break,
+            }
+        }
+        Ok(batch)
+    }
+}
+
+/// Build a [`SoloReadReader`] from parameters, resolving the cDNA/barcode files
+/// from `--readFilesIn`. Returns an error if solo is enabled but the read files
+/// are missing (validation should have caught this earlier).
+pub fn open_reader(params: &Parameters) -> Result<SoloReadReader, Error> {
+    debug_assert!(matches!(
+        params.solo_type,
+        SoloType::CbUmiSimple | SoloType::CbUmiComplex
+    ));
+    let cdna = params.cdna_read_file().ok_or_else(|| {
+        Error::from(std::io::Error::new(
+            std::io::ErrorKind::InvalidInput,
+            "solo: missing cDNA read file",
+        ))
+    })?;
+    let barcode = params.barcode_read_file().ok_or_else(|| {
+        Error::from(std::io::Error::new(
+            std::io::ErrorKind::InvalidInput,
+            "solo: missing barcode read file",
+        ))
+    })?;
+    let layout = SoloBarcodeLayout::from_params(params);
+    SoloReadReader::open(cdna, barcode, layout, params.read_files_command.as_deref())
+}
+
+// ---------------------------------------------------------------------------
+// CellRanger4 adapter clipping (--clipAdapterType CellRanger4)
+// ---------------------------------------------------------------------------
+
+/// The 10x template-switch oligo (TSO), clipped from the 5' of the cDNA read
+/// under `--clipAdapterType CellRanger4`. Encoded 0=A,1=C,2=G,3=T.
+const TSO_SEQ: &[u8] = b"AAGCAGTGGTATCAACGCAGAGTACATGGG";
+
+/// Clip the 10x TSO from the 5' end and trim a 3' polyA tail of the cDNA read,
+/// matching `--clipAdapterType CellRanger4`. Operates on encoded bases
+/// (0=A..3=T,4=N) with parallel quality bytes. Returns the clipped read.
+///
+/// Conservative thresholds (full-length TSO match ≤ 3 mismatches at the 5'
+/// anchor; trailing polyA run ≥ 8) keep this a no-op on adapter-free reads.
+pub fn clip_adapter_cr4(seq: &[u8], qual: &[u8]) -> (Vec<u8>, Vec<u8>) {
+    let mut start = 0usize;
+    let mut end = seq.len();
+
+    // 5' TSO: compare the read prefix against the full TSO; clip on a match.
+    if seq.len() >= TSO_SEQ.len() {
+        let tso: Vec<u8> = TSO_SEQ
+            .iter()
+            .map(|&b| crate::io::fastq::encode_base(b))
+            .collect();
+        let mismatches = seq[..tso.len()]
+            .iter()
+            .zip(&tso)
+            .filter(|(a, b)| a != b)
+            .count();
+        if mismatches <= 3 {
+            start = tso.len();
+        }
+    }
+
+    // 3' polyA: trim a trailing run of A (encoded 0) of length >= 8.
+    let mut run = 0usize;
+    while end > start && seq[end - 1] == 0 {
+        run += 1;
+        end -= 1;
+    }
+    if run < 8 {
+        end += run; // not a real polyA tail; keep those bases
+    }
+
+    if start == 0 && end == seq.len() {
+        return (seq.to_vec(), qual.to_vec());
+    }
+    (
+        seq[start..end].to_vec(),
+        qual.get(start..end.min(qual.len()))
+            .map(<[u8]>::to_vec)
+            .unwrap_or_default(),
+    )
+}
+
+// ---------------------------------------------------------------------------
+// Solo counting context + per-read processing (Phase 14.3)
+// ---------------------------------------------------------------------------
+
+/// A fully-resolved per-read count record: one (cell, UMI, gene) observation.
+/// These are collapsed by UMI per (cell, gene) into the count matrix (14.4).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SoloCountRecord {
+    /// Sorted whitelist index of the cell barcode.
+    pub cb: u32,
+    /// 2-bit packed UMI.
+    pub umi: u64,
+    /// Assigned gene index.
+    pub gene: u32,
+}
+
+/// One (cell, UMI, splice-junction) observation for the `SJ` feature. The
+/// junction is identified by its absolute intron coordinates; it is mapped to a
+/// matrix row (the `SJ.out.tab` order) at output time.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct SjCountRecord {
+    pub cb: u32,
+    pub umi: u64,
+    pub intron_start: u64,
+    pub intron_end: u64,
+}
+
+/// One (cell, UMI, gene) observation for the `Velocyto` feature, tagged with the
+/// read's spliced/unspliced/ambiguous category.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct VelocytoRecord {
+    pub cb: u32,
+    pub umi: u64,
+    pub gene: u32,
+    pub category: VelocytoCategory,
+}
+
+/// A read whose cell barcode matched multiple whitelist entries by 1MM
+/// (`1MM_multi`). Resolution to a single CB needs the global exact-count table
+/// and is deferred to the collation stage (Phase 14.4).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SoloMultiRecord {
+    /// Candidate whitelist barcodes + mismatch quality.
+    pub candidates: Vec<CbCandidate>,
+    pub umi: u64,
+    pub gene: u32,
+}
+
+/// A read that mapped to multiple genes (gene-ambiguous). Distributed across its
+/// gene set by `--soloMultiMappers` into the `UniqueAndMult-*.mtx` matrices.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct MultiGeneRecord {
+    pub cb: u32,
+    pub umi: u64,
+    pub genes: Vec<u32>,
+}
+
+/// Thread-safe sink for the records produced during alignment.
+#[derive(Default)]
+pub struct SoloRecorder {
+    pub records: Mutex<Vec<SoloCountRecord>>,
+    pub multi_records: Mutex<Vec<SoloMultiRecord>>,
+    /// Gene-ambiguous reads for `--soloMultiMappers` (resolved CB only).
+    pub multi_gene: Mutex<Vec<MultiGeneRecord>>,
+}
+
+impl SoloRecorder {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Append a batch's records (called from the sequential write phase).
+    pub fn extend(&self, recs: Vec<SoloCountRecord>, multi: Vec<SoloMultiRecord>) {
+        if !recs.is_empty() {
+            self.records.lock().unwrap().extend(recs);
+        }
+        if !multi.is_empty() {
+            self.multi_records.lock().unwrap().extend(multi);
+        }
+    }
+
+    /// Number of fully-resolved count records collected so far.
+    pub fn n_records(&self) -> usize {
+        self.records.lock().unwrap().len()
+    }
+
+    /// Number of deferred multi-CB records collected so far.
+    pub fn n_multi_records(&self) -> usize {
+        self.multi_records.lock().unwrap().len()
+    }
+}
+
+/// Everything the alignment loop needs to quantify a solo run, shared as an
+/// `Arc` across rayon threads. The gene model is built from `--sjdbGTFfile`;
+/// the whitelist and stats are read concurrently (interior atomics).
+pub struct SoloContext {
+    pub layout: SoloBarcodeLayout,
+    pub whitelist: CbWhitelist,
+    pub match_type: CbMatchType,
+    pub strand: SoloStrand,
+    pub gene_ann: GeneAnnotation,
+    pub stats: CbMatchStats,
+    /// Quantified features (`Gene`, `GeneFull`, …), each with its own recorder
+    /// and `Solo.out/<feature>/raw/` output. Parallel to `recorders`.
+    pub features: Vec<SoloFeature>,
+    pub recorders: Vec<SoloRecorder>,
+    /// Reads uniquely assigned to a gene per feature (parallel to `features`),
+    /// among valid-barcode reads — the STARsolo "Reads Mapped to <feature>:
+    /// Unique" metric.
+    pub feature_reads: Vec<AtomicU64>,
+    /// CellRanger-style positional mapping funnel over uniquely-mapped reads
+    /// (independent of barcode), populated only when both `Gene` and `GeneFull`
+    /// features run.
+    pub region_stats: RegionStats,
+    /// `--soloFeatures SJ`: collect per-cell splice-junction counts.
+    pub sj_enabled: bool,
+    /// (cell, UMI, junction) observations for the SJ feature.
+    pub sj_records: Mutex<Vec<SjCountRecord>>,
+    /// `--soloFeatures Velocyto`: collect spliced/unspliced/ambiguous counts.
+    pub velocyto_enabled: bool,
+    /// (cell, UMI, gene, category) observations for the Velocyto feature.
+    pub velocyto_records: Mutex<Vec<VelocytoRecord>>,
+    /// `--soloMultiMappers` includes a non-`Unique` method → capture gene-
+    /// ambiguous reads for distribution into `UniqueAndMult-*.mtx`.
+    pub want_multi: bool,
+}
+
+/// Per-region read tallies for the `Summary.csv` mapping funnel (uniquely-mapped
+/// reads, mirroring CellRanger's "confidently mapped to ... regions").
+#[derive(Default)]
+pub struct RegionStats {
+    pub exonic: AtomicU64,
+    pub intronic: AtomicU64,
+    pub intergenic: AtomicU64,
+    pub antisense: AtomicU64,
+}
+
+/// What happened to one solo read — one `(record, multi)` per quantified
+/// feature, parallel to [`SoloContext::features`].
+#[derive(Debug, Default)]
+pub struct SoloReadOutcome {
+    pub per_feature: Vec<FeatureOutcome>,
+    /// SJ-feature records for this read (one per crossed junction); empty unless
+    /// `--soloFeatures SJ` and the read is uniquely mapped with a resolved CB.
+    pub sj: Vec<SjCountRecord>,
+    /// Velocyto record for this read (resolved CB, gene-assigned), if enabled.
+    pub velocyto: Option<VelocytoRecord>,
+}
+
+/// The record(s) one read produces for a single feature.
+#[derive(Debug, Default)]
+pub struct FeatureOutcome {
+    /// A resolved count record, if the read was fully assignable.
+    pub record: Option<SoloCountRecord>,
+    /// A deferred multi-CB record, if the CB was an unresolved 1MM_multi.
+    pub multi: Option<SoloMultiRecord>,
+    /// A gene-ambiguous record (resolved CB), for `--soloMultiMappers`.
+    pub multi_gene: Option<MultiGeneRecord>,
+}
+
+impl SoloContext {
+    /// Build the solo context from parameters: load the whitelist and build the
+    /// gene model from `--sjdbGTFfile`. Call once before alignment.
+    pub fn build(params: &Parameters, genome: &crate::genome::Genome) -> Result<Self, Error> {
+        let whitelist = if params.solo_type == SoloType::CbUmiComplex {
+            // One whitelist per CB segment → combined cartesian-product whitelist.
+            let paths: Vec<std::path::PathBuf> = params
+                .solo_cb_whitelist
+                .iter()
+                .map(std::path::PathBuf::from)
+                .collect();
+            log::info!(
+                "STARsolo CB_UMI_Complex: combining {} segment whitelists",
+                paths.len()
+            );
+            let wl = CbWhitelist::load_complex(&paths)?;
+            log::info!("STARsolo: {} combined whitelist barcodes", wl.len());
+            wl
+        } else {
+            match params.solo_cb_whitelist_path() {
+                Some(path) => {
+                    log::info!(
+                        "STARsolo: loading cell-barcode whitelist from {}",
+                        path.display()
+                    );
+                    let wl = CbWhitelist::load(&path)?;
+                    log::info!("STARsolo: {} whitelist barcodes loaded", wl.len());
+                    wl
+                }
+                None => CbWhitelist::NoWhitelist {
+                    len: params.solo_cb_len as usize,
+                },
+            }
+        };
+
+        // Gene model from the GTF (validated to be present for Gene/GeneFull).
+        let gtf_path = params.sjdb_gtf_file.as_ref().ok_or_else(|| {
+            Error::from(std::io::Error::new(
+                std::io::ErrorKind::InvalidInput,
+                "STARsolo Gene feature requires --sjdbGTFfile",
+            ))
+        })?;
+        let exons = crate::junction::gtf::parse_gtf_configured(
+            gtf_path,
+            &params.sjdb_gtf_feature_exon,
+            &params.sjdb_gtf_chr_prefix,
+        )?;
+        let gene_ann = GeneAnnotation::from_gtf_exons_configured(
+            &exons,
+            genome,
+            &params.sjdb_gtf_tag_exon_parent_gene,
+        );
+        log::info!(
+            "STARsolo: {} genes loaded from {}",
+            gene_ann.n_genes(),
+            gtf_path.display()
+        );
+
+        let strand: SoloStrand = params.solo_strand.parse().map_err(|e: String| {
+            Error::from(std::io::Error::new(std::io::ErrorKind::InvalidInput, e))
+        })?;
+
+        // Quantified gene features (Gene, GeneFull). Validation guarantees these
+        // parse; default to Gene if somehow empty.
+        let features: Vec<SoloFeature> = params
+            .solo_features
+            .iter()
+            .filter_map(|f| f.parse().ok())
+            .collect();
+        let features = if features.is_empty() {
+            vec![SoloFeature::Gene]
+        } else {
+            features
+        };
+        let recorders = features.iter().map(|_| SoloRecorder::new()).collect();
+        let feature_reads = features.iter().map(|_| AtomicU64::new(0)).collect();
+        let sj_enabled = params.solo_features.iter().any(|f| f == "SJ");
+        let velocyto_enabled = params.solo_features.iter().any(|f| f == "Velocyto");
+        let want_multi = params.solo_multi_mappers.iter().any(|m| m != "Unique");
+
+        Ok(Self {
+            layout: SoloBarcodeLayout::from_params(params),
+            whitelist,
+            match_type: params.solo_cb_match_type(),
+            strand,
+            gene_ann,
+            stats: CbMatchStats::new(),
+            features,
+            recorders,
+            feature_reads,
+            region_stats: RegionStats::default(),
+            sj_enabled,
+            sj_records: Mutex::new(Vec::new()),
+            velocyto_enabled,
+            velocyto_records: Mutex::new(Vec::new()),
+            want_multi,
+        })
+    }
+
+    /// Process one solo read: match the cell barcode, validate the UMI, assign
+    /// a gene, and (on success) produce a count record. Stats are recorded
+    /// here; the returned records are appended to the recorder by the caller.
+    pub fn process_read(
+        &self,
+        cdna_transcripts: &[Transcript],
+        barcode: Option<&CellBarcode>,
+        junctions: &[(u64, u64)],
+    ) -> SoloReadOutcome {
+        let mut out = SoloReadOutcome::default();
+
+        // One-pass classification: the two overlap queries are shared between the
+        // per-feature gene assignment and the CellRanger-style mapping funnel, so
+        // this is no more work than the old per-feature `assign_gene_se` calls.
+        let want_exon = self.features.contains(&SoloFeature::Gene);
+        // Velocyto assigns its gene by gene-body overlap, so it needs `want_body`.
+        let want_body = self.features.contains(&SoloFeature::GeneFull) || self.velocyto_enabled;
+        let class = classify_read(
+            cdna_transcripts,
+            &self.gene_ann,
+            self.strand,
+            want_exon,
+            want_body,
+            self.want_multi,
+        );
+
+        // Mapping funnel: count uniquely-mapped reads by region (CellRanger's
+        // "confidently mapped" = MAPQ 255 ≈ a single alignment), independent of
+        // barcode validity.
+        if cdna_transcripts.len() == 1 {
+            match class.region {
+                Some(Region::Exonic) => {
+                    self.region_stats.exonic.fetch_add(1, Ordering::Relaxed);
+                }
+                Some(Region::Intronic) => {
+                    self.region_stats.intronic.fetch_add(1, Ordering::Relaxed);
+                }
+                Some(Region::Intergenic) => {
+                    self.region_stats.intergenic.fetch_add(1, Ordering::Relaxed);
+                }
+                None => {}
+            }
+            if class.antisense {
+                self.region_stats.antisense.fetch_add(1, Ordering::Relaxed);
+            }
+        }
+
+        // No barcode read (too short) → nothing to count (region already tallied).
+        let Some(bc) = barcode else {
+            return out;
+        };
+
+        // Cell-barcode match.
+        let cb_match = self
+            .whitelist
+            .match_cb(&bc.cb_seq, &bc.cb_qual, self.match_type);
+        self.stats.record_cb(&cb_match);
+
+        let cb_resolved: Option<u32> = match &cb_match {
+            CbMatch::Exact(idx) | CbMatch::Corrected(idx) => Some(*idx),
+            CbMatch::Multi(_) => None, // deferred to collation
+            CbMatch::NoMatch | CbMatch::NinCb | CbMatch::MultMatchRejected => return out,
+        };
+
+        // UMI validity.
+        let umi = match check_umi(&bc.umi_seq) {
+            UmiCheck::Ok(packed) => {
+                self.stats.record_umi(&UmiCheck::Ok(packed));
+                packed
+            }
+            rejected => {
+                self.stats.record_umi(&rejected);
+                return out;
+            }
+        };
+
+        // SJ feature: record (cell, UMI, junction) for each crossed junction.
+        // Only for resolved CBs (1MM_multi deferral is not applied to SJ).
+        if self.sj_enabled
+            && !junctions.is_empty()
+            && let Some(cb) = cb_resolved
+        {
+            out.sj = junctions
+                .iter()
+                .map(|&(intron_start, intron_end)| SjCountRecord {
+                    cb,
+                    umi,
+                    intron_start,
+                    intron_end,
+                })
+                .collect();
+        }
+
+        // Velocyto feature: gene from gene-body overlap, then classify the read
+        // spliced/unspliced/ambiguous. Resolved CB only.
+        if self.velocyto_enabled
+            && let Some(cb) = cb_resolved
+            && let GeneAssignment::Gene(gene) = class.gene_full
+        {
+            out.velocyto = Some(VelocytoRecord {
+                cb,
+                umi,
+                gene,
+                category: velocyto_category(cdna_transcripts, &self.gene_ann, gene),
+            });
+        }
+
+        // The CB match + UMI are shared across features; reuse the cached
+        // per-feature gene assignment from `classify_read`. One outcome/feature.
+        out.per_feature = self
+            .features
+            .iter()
+            .enumerate()
+            .map(|(fi, &feature)| {
+                let mut fo = FeatureOutcome::default();
+                let assignment = match feature {
+                    SoloFeature::Gene => class.gene,
+                    SoloFeature::GeneFull => class.gene_full,
+                };
+                let gene = match assignment {
+                    GeneAssignment::Gene(g) => g,
+                    GeneAssignment::Ambiguous => {
+                        // Gene-ambiguous read: record its gene set for
+                        // --soloMultiMappers distribution (resolved CB only).
+                        if let Some(cb) = cb_resolved {
+                            let genes = match feature {
+                                SoloFeature::Gene => &class.gene_multi,
+                                SoloFeature::GeneFull => &class.gene_full_multi,
+                            };
+                            if !genes.is_empty() {
+                                fo.multi_gene = Some(MultiGeneRecord {
+                                    cb,
+                                    umi,
+                                    genes: genes.clone(),
+                                });
+                            }
+                        }
+                        return fo;
+                    }
+                    GeneAssignment::NoFeature | GeneAssignment::Unmapped => return fo,
+                };
+                // Reads uniquely mapped to a gene under this feature, among
+                // valid-barcode reads (STARsolo "Reads Mapped to <feature>").
+                self.feature_reads[fi].fetch_add(1, Ordering::Relaxed);
+                match (cb_resolved, &cb_match) {
+                    (Some(cb), _) => fo.record = Some(SoloCountRecord { cb, umi, gene }),
+                    (None, CbMatch::Multi(cands)) => {
+                        fo.multi = Some(SoloMultiRecord {
+                            candidates: cands.clone(),
+                            umi,
+                            gene,
+                        });
+                    }
+                    (None, _) => unreachable!("non-multi unresolved CB returned early"),
+                }
+                fo
+            })
+            .collect();
+        out
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::io::fastq::encode_base;
+
+    fn encoded_read(name: &str, seq: &str, qual: &str) -> EncodedRead {
+        EncodedRead {
+            name: name.to_string(),
+            sequence: seq.bytes().map(encode_base).collect(),
+            quality: qual.bytes().collect(),
+        }
+    }
+
+    fn v2_layout() -> SoloBarcodeLayout {
+        // 10x v2: CB at 1..16 (16 bp), UMI at 17..26 (10 bp).
+        SoloBarcodeLayout::Simple {
+            cb_start: 0,
+            cb_len: 16,
+            umi_start: 16,
+            umi_len: 10,
+        }
+    }
+
+    #[test]
+    fn layout_from_params_converts_to_zero_based() {
+        let params = Parameters::try_parse_from([
+            "rustar-aligner",
+            "--soloType",
+            "CB_UMI_Simple",
+            "--readFilesIn",
+            "cdna.fq",
+            "bc.fq",
+            "--sjdbGTFfile",
+            "genes.gtf",
+            "--soloCBwhitelist",
+            "wl.txt",
+        ])
+        .unwrap();
+        let layout = SoloBarcodeLayout::from_params(&params);
+        assert_eq!(
+            layout,
+            SoloBarcodeLayout::Simple {
+                cb_start: 0,
+                cb_len: 16,
+                umi_start: 16,
+                umi_len: 10,
+            }
+        );
+        assert_eq!(layout.min_read_len(), 26);
+    }
+
+    #[test]
+    fn complex_layout_assembles_segments() {
+        // Two CB segments [0..2] + [4..6] (skipping a 2bp linker), UMI [6..8].
+        let layout = SoloBarcodeLayout::Complex {
+            cb_segments: vec![(0, 2), (4, 2)],
+            umi: (6, 2),
+        };
+        let read = encoded_read("r", "AACCGGTT", "IIIIIIII");
+        let bc = layout.extract(&read).unwrap();
+        // CB = bases [0,1] ++ [4,5] = "AA" ++ "GG"; UMI = [6,7] = "TT".
+        assert_eq!(
+            bc.cb_seq,
+            "AAGG".bytes().map(encode_base).collect::<Vec<_>>()
+        );
+        assert_eq!(
+            bc.umi_seq,
+            "TT".bytes().map(encode_base).collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn parse_position_read_start() {
+        assert_eq!(parse_position("0_0_0_7").unwrap(), (0, 8));
+        assert_eq!(parse_position("0_8_0_15").unwrap(), (8, 8));
+        assert!(parse_position("2_0_2_7").is_err()); // adapter anchor unsupported
+        assert!(parse_position("0_5_0_2").is_err()); // end < start
+    }
+
+    #[test]
+    fn extract_v2_barcode() {
+        let layout = v2_layout();
+        // 16bp CB = AAAAAAAACCCCCCCC, 10bp UMI = GGGGGTTTTT.
+        let read = encoded_read(
+            "bc1",
+            "AAAAAAAACCCCCCCCGGGGGTTTTT",
+            "IIIIIIIIIIIIIIIIJJJJJJJJJJ",
+        );
+        let bc = layout.extract(&read).expect("should extract");
+        assert_eq!(bc.cb_string(), "AAAAAAAACCCCCCCC");
+        assert_eq!(bc.umi_string(), "GGGGGTTTTT");
+        assert_eq!(bc.cb_qual.len(), 16);
+        assert_eq!(bc.umi_qual.len(), 10);
+        assert!(!bc.cb_has_n());
+        assert!(!bc.umi_has_n());
+    }
+
+    #[test]
+    fn extract_too_short_returns_none() {
+        let layout = v2_layout();
+        let read = encoded_read("short", "AAAAAAAACCCC", "IIIIIIIIIIII");
+        assert!(layout.extract(&read).is_none());
+    }
+
+    #[test]
+    fn detects_n_in_cb_and_umi() {
+        let layout = v2_layout();
+        let read = encoded_read(
+            "bcN",
+            "AAAAAAAANCCCCCCCGGGGGTTTTN",
+            "IIIIIIIIIIIIIIIIJJJJJJJJJJ",
+        );
+        let bc = layout.extract(&read).unwrap();
+        assert!(bc.cb_has_n());
+        assert!(bc.umi_has_n());
+    }
+
+    #[test]
+    fn reader_pairs_cdna_and_barcode() {
+        use std::io::Write;
+        use tempfile::NamedTempFile;
+
+        let mut cdna = NamedTempFile::new().unwrap();
+        writeln!(cdna, "@r1\nACGTACGTAC\n+\nIIIIIIIIII").unwrap();
+        writeln!(cdna, "@r2\nTTTTGGGGCC\n+\nIIIIIIIIII").unwrap();
+        cdna.flush().unwrap();
+
+        let mut bc = NamedTempFile::new().unwrap();
+        writeln!(
+            bc,
+            "@r1\nAAAAAAAACCCCCCCCGGGGGTTTTT\n+\nIIIIIIIIIIIIIIIIJJJJJJJJJJ"
+        )
+        .unwrap();
+        writeln!(
+            bc,
+            "@r2\nGGGGGGGGTTTTTTTTACGTACGTAC\n+\nIIIIIIIIIIIIIIIIJJJJJJJJJJ"
+        )
+        .unwrap();
+        bc.flush().unwrap();
+
+        let mut reader = SoloReadReader::open(cdna.path(), bc.path(), v2_layout(), None).unwrap();
+        let batch = reader.read_batch(10).unwrap();
+        assert_eq!(batch.len(), 2);
+        assert_eq!(batch[0].cdna.name, "r1");
+        assert_eq!(
+            batch[0].barcode.as_ref().unwrap().cb_string(),
+            "AAAAAAAACCCCCCCC"
+        );
+        assert_eq!(
+            batch[1].barcode.as_ref().unwrap().umi_string(),
+            "ACGTACGTAC"
+        );
+    }
+
+    #[test]
+    fn reader_length_mismatch_errors() {
+        use std::io::Write;
+        use tempfile::NamedTempFile;
+
+        let mut cdna = NamedTempFile::new().unwrap();
+        writeln!(cdna, "@r1\nACGT\n+\nIIII").unwrap();
+        writeln!(cdna, "@r2\nTTTT\n+\nIIII").unwrap();
+        cdna.flush().unwrap();
+
+        let mut bc = NamedTempFile::new().unwrap();
+        writeln!(
+            bc,
+            "@r1\nAAAAAAAACCCCCCCCGGGGGTTTTT\n+\nIIIIIIIIIIIIIIIIJJJJJJJJJJ"
+        )
+        .unwrap();
+        bc.flush().unwrap();
+
+        let mut reader = SoloReadReader::open(cdna.path(), bc.path(), v2_layout(), None).unwrap();
+        assert!(reader.read_batch(10).is_err());
+    }
+}
diff --git a/src/solo/smartseq.rs b/src/solo/smartseq.rs
new file mode 100644
index 0000000..207468b
--- /dev/null
+++ b/src/solo/smartseq.rs
@@ -0,0 +1,138 @@
+//! `--soloType SmartSeq` — plate-based full-length protocols (Smart-seq2).
+//!
+//! There are no cell barcodes or UMIs in the reads. Each plate well is a
+//! separate library given by a `--readFilesManifest` line
+//! (`read1 <TAB> read2 <TAB> cellID`); the cell identity is the manifest cellID,
+//! and a gene's count for a cell is the number of its uniquely-gene-assigned
+//! reads (no UMI deduplication). Output mirrors the droplet path:
+//! `Solo.out/Gene/raw/{matrix.mtx, barcodes.tsv (cell IDs), features.tsv}`.
+//!
+//! Supports both single-end manifests (`read2 = -`, read counts) and paired-end
+//! (`read2` = mate-2 file, fragment counts via paired alignment).
+
+use crate::error::Error;
+use std::path::{Path, PathBuf};
+use std::sync::Mutex;
+
+/// One plate-well cell from the manifest.
+pub struct SmartSeqCell {
+    pub read1: PathBuf,
+    /// Mate-2 file for paired-end SmartSeq; `None` for single-end (`read2 = -`).
+    pub read2: Option<PathBuf>,
+    pub cell_id: String,
+}
+
+/// Parse a `--readFilesManifest` TSV into per-cell entries. Lines are
+/// `read1 <TAB> read2 <TAB> cellID`; blank lines and `#` comments are skipped.
+/// `read2 = -` is single-end; any other value is the mate-2 file (paired-end).
+pub fn parse_manifest(path: &Path) -> Result<Vec<SmartSeqCell>, Error> {
+    let text = std::fs::read_to_string(path).map_err(|e| Error::io(e, path))?;
+    let mut cells = Vec::new();
+    for (lineno, line) in text.lines().enumerate() {
+        let line = line.trim();
+        if line.is_empty() || line.starts_with('#') {
+            continue;
+        }
+        let f: Vec<&str> = line.split('\t').collect();
+        if f.len() < 3 {
+            return Err(invalid(format!(
+                "readFilesManifest line {}: expected 'read1<TAB>read2<TAB>cellID', got {:?}",
+                lineno + 1,
+                line
+            )));
+        }
+        cells.push(SmartSeqCell {
+            read1: PathBuf::from(f[0]),
+            read2: (f[1] != "-").then(|| PathBuf::from(f[1])),
+            cell_id: f[2].to_string(),
+        });
+    }
+    if cells.is_empty() {
+        return Err(invalid(format!(
+            "readFilesManifest {} has no cell entries",
+            path.display()
+        )));
+    }
+    Ok(cells)
+}
+
+fn invalid(msg: String) -> Error {
+    Error::from(std::io::Error::new(std::io::ErrorKind::InvalidInput, msg))
+}
+
+/// Per-cell, per-gene read counts for a SmartSeq run. `cells` is the manifest
+/// order (the matrix column order); `counts[cell]` maps gene → read count.
+pub struct SmartSeqCounts {
+    pub cell_ids: Vec<String>,
+    pub counts: Vec<Mutex<std::collections::HashMap<u32, u64>>>,
+    pub n_genes: usize,
+}
+
+impl SmartSeqCounts {
+    pub fn new(cell_ids: Vec<String>, n_genes: usize) -> Self {
+        let counts = (0..cell_ids.len())
+            .map(|_| Mutex::new(std::collections::HashMap::new()))
+            .collect();
+        Self {
+            cell_ids,
+            counts,
+            n_genes,
+        }
+    }
+
+    /// Add `+1` to (cell, gene) for one uniquely-assigned read.
+    pub fn add(&self, cell: usize, gene: u32) {
+        *self.counts[cell].lock().unwrap().entry(gene).or_insert(0) += 1;
+    }
+
+    /// Write `Solo.out/Gene/raw/{matrix.mtx, barcodes.tsv, features.tsv}` —
+    /// genes × cells, integer read counts. `gzip` appends `.gz`.
+    pub fn write_matrix(
+        &self,
+        raw_dir: &Path,
+        gene_ids: &[String],
+        gzip: bool,
+    ) -> Result<usize, Error> {
+        std::fs::create_dir_all(raw_dir).map_err(|e| Error::io(e, raw_dir))?;
+
+        // features.tsv (CellRanger v3 layout: id, name, "Gene Expression").
+        crate::solo::count::write_file(&raw_dir.join("features.tsv"), gzip, |w| {
+            for id in gene_ids {
+                writeln!(w, "{id}\t{id}\tGene Expression").map_err(|e| Error::io(e, raw_dir))?;
+            }
+            Ok(())
+        })?;
+        // barcodes.tsv = the manifest cell IDs (one per matrix column).
+        crate::solo::count::write_file(&raw_dir.join("barcodes.tsv"), gzip, |w| {
+            for cid in &self.cell_ids {
+                writeln!(w, "{cid}").map_err(|e| Error::io(e, raw_dir))?;
+            }
+            Ok(())
+        })?;
+
+        // matrix.mtx — collect entries cell-ascending, gene-ascending.
+        let mut nnz = 0usize;
+        let path = raw_dir.join("matrix.mtx");
+        // Pre-count nnz.
+        for c in &self.counts {
+            nnz += c.lock().unwrap().len();
+        }
+        crate::solo::count::write_file(&path, gzip, |w| {
+            writeln!(w, "%%MatrixMarket matrix coordinate integer general")
+                .map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "%").map_err(|e| Error::io(e, &path))?;
+            writeln!(w, "{} {} {}", self.n_genes, self.cell_ids.len(), nnz)
+                .map_err(|e| Error::io(e, &path))?;
+            for (ci, cell) in self.counts.iter().enumerate() {
+                let map = cell.lock().unwrap();
+                let mut entries: Vec<(u32, u64)> = map.iter().map(|(&g, &c)| (g, c)).collect();
+                entries.sort_unstable_by_key(|&(g, _)| g);
+                for (g, c) in entries {
+                    writeln!(w, "{} {} {}", g + 1, ci + 1, c).map_err(|e| Error::io(e, &path))?;
+                }
+            }
+            Ok(())
+        })?;
+        Ok(nnz)
+    }
+}
diff --git a/src/solo/whitelist.rs b/src/solo/whitelist.rs
new file mode 100644
index 0000000..4023836
--- /dev/null
+++ b/src/solo/whitelist.rs
@@ -0,0 +1,740 @@
+//! Cell-barcode whitelist loading and read-stage CB/UMI matching (Phase 14.2).
+//!
+//! Faithful port of STAR's `SoloReadBarcode_getCBandUMI.cpp` read stage:
+//! barcodes are 2-bit packed (seq[0] in the high bits) into a `u64` and the
+//! whitelist is a sorted array searched by binary search. Exact match,
+//! single-N correction, and 1-mismatch (1MM / 1MM_multi) correction follow
+//! STAR's enumeration exactly.
+//!
+//! The 1MM_multi *posterior* resolution (count + quality weighted) is a
+//! collation-stage concern and is deferred to Phase 14.4 — here a multi-match
+//! read records all candidate whitelist indices plus the mismatch-position
+//! quality, exactly as STAR's `cbMatchString`.
+
+use crate::error::Error;
+use crate::io::fastq::{decode_base, encode_base};
+use flate2::read::GzDecoder;
+use std::fs::File;
+use std::io::{BufRead, BufReader};
+use std::path::Path;
+use std::str::FromStr;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+/// Maximum barcode length representable in a `u64` (32 × 2-bit bases).
+pub const CB_LEN_MAX: usize = 32;
+
+// ---------------------------------------------------------------------------
+// Barcode packing
+// ---------------------------------------------------------------------------
+
+/// Result of packing an encoded barcode into a `u64`.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PackResult {
+    /// No ambiguous bases; the packed value.
+    NoN(u64),
+    /// Exactly one `N`; `packed` has `A` (0) at the N position.
+    OneN { packed: u64, pos: usize },
+    /// More than one `N` — uncorrectable.
+    ManyN,
+}
+
+/// 2-bit pack an encoded barcode (`0=A,1=C,2=G,3=T,4=N`) with `seq[0]` in the
+/// high bits, matching STAR's `convertNuclStrToInt64`.
+pub fn pack_barcode(seq: &[u8]) -> PackResult {
+    let len = seq.len();
+    let mut packed: u64 = 0;
+    let mut n_pos: Option<usize> = None;
+    let mut n_count = 0usize;
+    for (i, &b) in seq.iter().enumerate() {
+        let shift = 2 * (len - 1 - i);
+        if b >= 4 {
+            n_count += 1;
+            if n_count > 1 {
+                return PackResult::ManyN;
+            }
+            n_pos = Some(i);
+            // leave 0 (A) at this position; correction substitutes all 4 bases
+        } else {
+            packed |= (b as u64) << shift;
+        }
+    }
+    match n_pos {
+        None => PackResult::NoN(packed),
+        Some(pos) => PackResult::OneN { packed, pos },
+    }
+}
+
+/// Unpack a `u64` of `len` 2-bit bases back to an ASCII `ACGT` string
+/// (`seq[0]` from the high bits).
+pub fn unpack_barcode(packed: u64, len: usize) -> String {
+    (0..len)
+        .map(|i| {
+            let shift = 2 * (len - 1 - i);
+            decode_base(((packed >> shift) & 0b11) as u8) as char
+        })
+        .collect()
+}
+
+/// Bit shift for the base at sequence index `pos` in a `len`-base packing.
+#[inline]
+fn shift_for(pos: usize, len: usize) -> u32 {
+    (2 * (len - 1 - pos)) as u32
+}
+
+// ---------------------------------------------------------------------------
+// Match-type configuration (--soloCBmatchWLtype)
+// ---------------------------------------------------------------------------
+
+/// Flags decoded from `--soloCBmatchWLtype`. Mirrors STAR's `CBmatchWL`
+/// boolean fields one-for-one, so the multiple bools are intentional.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[allow(clippy::struct_excessive_bools)]
+pub struct CbMatchType {
+    /// Allow a single mismatch to the whitelist.
+    pub mm1: bool,
+    /// Keep multiple 1MM candidates for posterior resolution.
+    pub mm1_multi: bool,
+    /// Allow multiple matches for the N-substitution path.
+    pub mm1_multi_nbase: bool,
+    /// Add pseudocounts in posterior resolution (collation stage).
+    pub pseudocounts: bool,
+}
+
+impl FromStr for CbMatchType {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Exact" => Ok(Self {
+                mm1: false,
+                mm1_multi: false,
+                mm1_multi_nbase: false,
+                pseudocounts: false,
+            }),
+            "1MM" => Ok(Self {
+                mm1: true,
+                mm1_multi: false,
+                mm1_multi_nbase: false,
+                pseudocounts: false,
+            }),
+            "1MM_multi" => Ok(Self {
+                mm1: true,
+                mm1_multi: true,
+                mm1_multi_nbase: false,
+                pseudocounts: false,
+            }),
+            "1MM_multi_pseudocounts" => Ok(Self {
+                mm1: true,
+                mm1_multi: true,
+                mm1_multi_nbase: false,
+                pseudocounts: true,
+            }),
+            "1MM_multi_Nbase_pseudocounts" => Ok(Self {
+                mm1: true,
+                mm1_multi: true,
+                mm1_multi_nbase: true,
+                pseudocounts: true,
+            }),
+            _ => Err(format!(
+                "unknown soloCBmatchWLtype '{s}'; expected Exact, 1MM, 1MM_multi, 1MM_multi_pseudocounts, or 1MM_multi_Nbase_pseudocounts"
+            )),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Match result
+// ---------------------------------------------------------------------------
+
+/// One candidate whitelist barcode reachable by a single edit, plus the quality
+/// of the mismatched base (for posterior resolution at collation).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct CbCandidate {
+    /// Index into the sorted whitelist.
+    pub wl_index: u32,
+    /// 0-based mismatch position in the read barcode.
+    pub mismatch_pos: usize,
+    /// Raw Phred+33 quality byte at the mismatch position.
+    pub mismatch_qual: u8,
+}
+
+/// Outcome of matching one cell barcode to the whitelist. The negative STAR
+/// `cbMatch` codes map to the rejection variants.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum CbMatch {
+    /// Exact whitelist hit (cbMatch=0); carries the sorted whitelist index.
+    Exact(u32),
+    /// Unambiguous single-edit correction (cbMatch=1).
+    Corrected(u32),
+    /// Multiple 1MM candidates kept for later posterior resolution (cbMatch>1).
+    Multi(Vec<CbCandidate>),
+    /// No whitelist match within one edit (cbMatch=-1).
+    NoMatch,
+    /// More than one `N` in the barcode (cbMatch=-2).
+    NinCb,
+    /// >1 whitelist match but `mm1_multi` not enabled (cbMatch=-3).
+    MultMatchRejected,
+}
+
+// ---------------------------------------------------------------------------
+// UMI validity (matches STAR umiCheck=-23 / -24)
+// ---------------------------------------------------------------------------
+
+/// Outcome of validating a UMI.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UmiCheck {
+    /// Valid UMI; carries the packed value.
+    Ok(u64),
+    /// Contains an `N` (cbMatch=-23).
+    NinUmi,
+    /// Exact homopolymer, e.g. all-A (cbMatch=-24).
+    Homopolymer,
+}
+
+/// Validate a UMI: reject any `N`, then reject exact homopolymers.
+pub fn check_umi(umi_seq: &[u8]) -> UmiCheck {
+    match pack_barcode(umi_seq) {
+        PackResult::ManyN | PackResult::OneN { .. } => UmiCheck::NinUmi,
+        PackResult::NoN(packed) => {
+            if is_homopolymer(umi_seq) {
+                UmiCheck::Homopolymer
+            } else {
+                UmiCheck::Ok(packed)
+            }
+        }
+    }
+}
+
+fn is_homopolymer(seq: &[u8]) -> bool {
+    match seq.first() {
+        None => false,
+        Some(&first) => seq.iter().all(|&b| b == first),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Whitelist
+// ---------------------------------------------------------------------------
+
+/// Cell-barcode whitelist. `List` is an explicit, sorted, de-duplicated set of
+/// packed barcodes; `NoWhitelist` corresponds to `--soloCBwhitelist None`.
+pub enum CbWhitelist {
+    List {
+        /// Sorted unique packed barcodes (binary-search target).
+        sorted: Vec<u64>,
+        /// `orig_index[k]` = line number of `sorted[k]` in the whitelist file,
+        /// for `barcodes.tsv` column ordering (Phase 14.4).
+        orig_index: Vec<u32>,
+        /// Per-whitelist-index exact-match read counts (posterior prior).
+        exact_counts: Vec<AtomicU64>,
+        /// Barcode length in bases.
+        len: usize,
+    },
+    /// `--soloCBwhitelist None`: keep every valid (N-free) barcode as observed.
+    NoWhitelist { len: usize },
+}
+
+impl CbWhitelist {
+    /// Number of whitelist barcodes (0 for `NoWhitelist`).
+    pub fn len(&self) -> usize {
+        match self {
+            Self::List { sorted, .. } => sorted.len(),
+            Self::NoWhitelist { .. } => 0,
+        }
+    }
+
+    /// True if the whitelist has no barcodes.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Barcode length in bases.
+    pub fn barcode_len(&self) -> usize {
+        match self {
+            Self::List { len, .. } | Self::NoWhitelist { len } => *len,
+        }
+    }
+
+    /// Decode the whitelist barcode at sorted index `idx` to an ASCII string.
+    pub fn barcode_string(&self, idx: u32) -> Option<String> {
+        match self {
+            Self::List { sorted, len, .. } => {
+                sorted.get(idx as usize).map(|&p| unpack_barcode(p, *len))
+            }
+            Self::NoWhitelist { .. } => None,
+        }
+    }
+
+    /// Append the ASCII `ACGT` barcode at sorted index `idx` to `out` without
+    /// allocating a `String` — used when writing the full whitelist to
+    /// `barcodes.tsv` (millions of lines). Appends nothing for an out-of-range
+    /// index or `NoWhitelist`.
+    pub fn unpack_barcode_into(&self, idx: u32, out: &mut Vec<u8>) {
+        if let Self::List { sorted, len, .. } = self
+            && let Some(&packed) = sorted.get(idx as usize)
+        {
+            for i in 0..*len {
+                let shift = 2 * (*len - 1 - i);
+                out.push(decode_base(((packed >> shift) & 0b11) as u8));
+            }
+        }
+    }
+
+    /// Load a whitelist from a file (plain or gzip). One barcode per line;
+    /// blank lines ignored. Barcodes are encoded, packed, sorted, de-duplicated.
+    pub fn load(path: &Path) -> Result<Self, Error> {
+        let (packed, len) = Self::load_packed(path)?;
+        Ok(Self::from_packed_list(packed, len))
+    }
+
+    /// Build a `List` whitelist from packed barcodes (sort + dedup + index).
+    pub fn from_packed_list(packed: Vec<u64>, len: usize) -> Self {
+        let mut indexed: Vec<(u64, u32)> = packed
+            .into_iter()
+            .enumerate()
+            .map(|(i, p)| (p, i as u32))
+            .collect();
+        indexed.sort_unstable_by_key(|&(p, _)| p);
+        indexed.dedup_by_key(|&mut (p, _)| p);
+        let sorted: Vec<u64> = indexed.iter().map(|&(p, _)| p).collect();
+        let orig_index: Vec<u32> = indexed.iter().map(|&(_, i)| i).collect();
+        let exact_counts = (0..sorted.len()).map(|_| AtomicU64::new(0)).collect();
+        Self::List {
+            sorted,
+            orig_index,
+            exact_counts,
+            len,
+        }
+    }
+
+    /// `CB_UMI_Complex`: combine per-segment whitelists into one whitelist of
+    /// concatenated barcodes (the cartesian product, segment order = file order).
+    /// Matching the assembled CB against this is equivalent to STARsolo's
+    /// per-segment matching for both Exact and 1MM (a 1MM in the concatenation is
+    /// a 1MM in exactly one segment). Errors if the combined length exceeds 32.
+    pub fn load_complex(paths: &[std::path::PathBuf]) -> Result<Self, Error> {
+        let segs: Vec<(Vec<u64>, usize)> = paths
+            .iter()
+            .map(|p| Self::load_packed(p))
+            .collect::<Result<_, _>>()?;
+        let total_len: usize = segs.iter().map(|(_, l)| l).sum();
+        if total_len == 0 || total_len > CB_LEN_MAX {
+            return Err(Error::from(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!("combined CB length {total_len} out of range (1..={CB_LEN_MAX})"),
+            )));
+        }
+        let n_combos: usize = segs.iter().map(|(p, _)| p.len()).product();
+        if n_combos > 100_000_000 {
+            return Err(Error::from(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!("CB_UMI_Complex whitelist product is {n_combos} barcodes (too large)"),
+            )));
+        }
+        let mut combined: Vec<u64> = vec![0];
+        for (packed, len) in &segs {
+            let mut next = Vec::with_capacity(combined.len() * packed.len());
+            for &c in &combined {
+                for &p in packed {
+                    next.push((c << (2 * len)) | p);
+                }
+            }
+            combined = next;
+        }
+        Ok(Self::from_packed_list(combined, total_len))
+    }
+
+    /// Read a whitelist file into raw packed barcodes + barcode length.
+    fn load_packed(path: &Path) -> Result<(Vec<u64>, usize), Error> {
+        let reader = open_maybe_gzip(path)?;
+        let mut packed: Vec<u64> = Vec::new();
+        let mut len: usize = 0;
+        for (lineno, line) in reader.lines().enumerate() {
+            let line = line.map_err(Error::from)?;
+            let bc = line.trim();
+            if bc.is_empty() {
+                continue;
+            }
+            // STARsolo whitelists may carry a second column (e.g. translated
+            // barcodes for multi-ome); take the first whitespace token.
+            let bc = bc.split_whitespace().next().unwrap_or("");
+            if bc.is_empty() {
+                continue;
+            }
+            if len == 0 {
+                len = bc.len();
+                if len == 0 || len > CB_LEN_MAX {
+                    return Err(Error::from(std::io::Error::new(
+                        std::io::ErrorKind::InvalidData,
+                        format!("whitelist barcode length {len} out of range (1..={CB_LEN_MAX})"),
+                    )));
+                }
+            } else if bc.len() != len {
+                return Err(Error::from(std::io::Error::new(
+                    std::io::ErrorKind::InvalidData,
+                    format!(
+                        "whitelist barcode on line {} has length {} (expected {len})",
+                        lineno + 1,
+                        bc.len()
+                    ),
+                )));
+            }
+            let encoded: Vec<u8> = bc.bytes().map(encode_base).collect();
+            match pack_barcode(&encoded) {
+                PackResult::NoN(p) => packed.push(p),
+                _ => {
+                    return Err(Error::from(std::io::Error::new(
+                        std::io::ErrorKind::InvalidData,
+                        format!("whitelist barcode '{bc}' on line {} contains N", lineno + 1),
+                    )));
+                }
+            }
+        }
+        if packed.is_empty() {
+            return Err(Error::from(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                "whitelist is empty",
+            )));
+        }
+        Ok((packed, len))
+    }
+
+    /// Binary-search the sorted whitelist for `packed`; returns the sorted index.
+    fn search(&self, packed: u64) -> Option<u32> {
+        match self {
+            Self::List { sorted, .. } => sorted.binary_search(&packed).ok().map(|i| i as u32),
+            Self::NoWhitelist { .. } => None,
+        }
+    }
+
+    /// Increment the exact-match count for sorted whitelist index `idx`.
+    fn bump_exact(&self, idx: u32) {
+        if let Self::List { exact_counts, .. } = self {
+            exact_counts[idx as usize].fetch_add(1, Ordering::Relaxed);
+        }
+    }
+
+    /// Snapshot of exact-match counts per sorted whitelist index (for the
+    /// Phase 14.4 posterior). Empty for `NoWhitelist`.
+    pub fn exact_count_snapshot(&self) -> Vec<u64> {
+        match self {
+            Self::List { exact_counts, .. } => exact_counts
+                .iter()
+                .map(|c| c.load(Ordering::Relaxed))
+                .collect(),
+            Self::NoWhitelist { .. } => Vec::new(),
+        }
+    }
+
+    /// Match one cell barcode against the whitelist following STAR's read stage.
+    ///
+    /// `cb_seq` is encoded (`0..=4`); `cb_qual` is raw Phred+33 (parallel to
+    /// `cb_seq`). On an exact hit the whitelist's exact-count is incremented.
+    pub fn match_cb(&self, cb_seq: &[u8], cb_qual: &[u8], match_type: CbMatchType) -> CbMatch {
+        let len = cb_seq.len();
+        match self {
+            Self::NoWhitelist { .. } => match pack_barcode(cb_seq) {
+                // No whitelist: every N-free barcode is its own "cell". We
+                // cannot return a stable index without a whitelist, so callers
+                // treat NoWhitelist specially; report NoMatch for N-containing.
+                PackResult::NoN(_) => CbMatch::Exact(0),
+                _ => CbMatch::NinCb,
+            },
+            Self::List { .. } => match pack_barcode(cb_seq) {
+                PackResult::ManyN => CbMatch::NinCb,
+                PackResult::NoN(packed) => {
+                    if let Some(idx) = self.search(packed) {
+                        self.bump_exact(idx);
+                        return CbMatch::Exact(idx);
+                    }
+                    if !match_type.mm1 {
+                        return CbMatch::NoMatch;
+                    }
+                    // 1MM: every position × the 3 alternate bases.
+                    let mut candidates: Vec<CbCandidate> = Vec::new();
+                    for pos in 0..len {
+                        let shift = shift_for(pos, len);
+                        let orig = (packed >> shift) & 0b11;
+                        for alt in 0u64..4 {
+                            if alt == orig {
+                                continue;
+                            }
+                            let cand = (packed & !(0b11 << shift)) | (alt << shift);
+                            if let Some(idx) = self.search(cand) {
+                                candidates.push(CbCandidate {
+                                    wl_index: idx,
+                                    mismatch_pos: pos,
+                                    mismatch_qual: qual_at(cb_qual, pos),
+                                });
+                            }
+                        }
+                    }
+                    Self::resolve(candidates, match_type.mm1_multi)
+                }
+                PackResult::OneN { packed, pos } => {
+                    if !match_type.mm1 {
+                        return CbMatch::NoMatch;
+                    }
+                    // Substitute all 4 bases at the single N position.
+                    let shift = shift_for(pos, len);
+                    let mut candidates: Vec<CbCandidate> = Vec::new();
+                    for base in 0u64..4 {
+                        let cand = (packed & !(0b11 << shift)) | (base << shift);
+                        if let Some(idx) = self.search(cand) {
+                            candidates.push(CbCandidate {
+                                wl_index: idx,
+                                mismatch_pos: pos,
+                                mismatch_qual: qual_at(cb_qual, pos),
+                            });
+                        }
+                    }
+                    Self::resolve(candidates, match_type.mm1_multi_nbase)
+                }
+            },
+        }
+    }
+
+    /// Turn a candidate list into a [`CbMatch`], honoring the multi flag.
+    fn resolve(candidates: Vec<CbCandidate>, allow_multi: bool) -> CbMatch {
+        match candidates.len() {
+            0 => CbMatch::NoMatch,
+            1 => CbMatch::Corrected(candidates[0].wl_index),
+            _ => {
+                if allow_multi {
+                    CbMatch::Multi(candidates)
+                } else {
+                    CbMatch::MultMatchRejected
+                }
+            }
+        }
+    }
+}
+
+#[inline]
+fn qual_at(qual: &[u8], pos: usize) -> u8 {
+    qual.get(pos).copied().unwrap_or(b'!') // '!' = Phred 0
+}
+
+/// Open a file, transparently decompressing `.gz`.
+fn open_maybe_gzip(path: &Path) -> Result<Box<dyn BufRead>, Error> {
+    let file = File::open(path).map_err(|e| Error::io(e, path))?;
+    let is_gz = path
+        .extension()
+        .is_some_and(|e| e.eq_ignore_ascii_case("gz"));
+    if is_gz {
+        Ok(Box::new(BufReader::new(GzDecoder::new(file))))
+    } else {
+        Ok(Box::new(BufReader::new(file)))
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Stats (STAR cbMatch categories)
+// ---------------------------------------------------------------------------
+
+/// Per-run barcode-matching statistics, mirroring STAR's `SoloReadBarcodeStats`.
+#[derive(Debug, Default)]
+pub struct CbMatchStats {
+    pub yes_exact: AtomicU64,
+    pub yes_one_mm: AtomicU64,
+    pub yes_mult_mm: AtomicU64,
+    pub no_match: AtomicU64,
+    pub n_in_cb: AtomicU64,
+    pub mult_rejected: AtomicU64,
+    pub n_in_umi: AtomicU64,
+    pub umi_homopolymer: AtomicU64,
+}
+
+impl CbMatchStats {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Record one CB match outcome.
+    pub fn record_cb(&self, m: &CbMatch) {
+        let c = match m {
+            CbMatch::Exact(_) => &self.yes_exact,
+            CbMatch::Corrected(_) => &self.yes_one_mm,
+            CbMatch::Multi(_) => &self.yes_mult_mm,
+            CbMatch::NoMatch => &self.no_match,
+            CbMatch::NinCb => &self.n_in_cb,
+            CbMatch::MultMatchRejected => &self.mult_rejected,
+        };
+        c.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Record one UMI check outcome (only the rejection cases are counted).
+    pub fn record_umi(&self, u: &UmiCheck) {
+        match u {
+            UmiCheck::NinUmi => {
+                self.n_in_umi.fetch_add(1, Ordering::Relaxed);
+            }
+            UmiCheck::Homopolymer => {
+                self.umi_homopolymer.fetch_add(1, Ordering::Relaxed);
+            }
+            UmiCheck::Ok(_) => {}
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+
+    fn enc(s: &str) -> Vec<u8> {
+        s.bytes().map(encode_base).collect()
+    }
+
+    fn write_wl(barcodes: &[&str]) -> NamedTempFile {
+        let mut f = NamedTempFile::new().unwrap();
+        for b in barcodes {
+            writeln!(f, "{b}").unwrap();
+        }
+        f.flush().unwrap();
+        f
+    }
+
+    #[test]
+    fn pack_roundtrip() {
+        let s = "ACGTACGT";
+        match pack_barcode(&enc(s)) {
+            PackResult::NoN(p) => assert_eq!(unpack_barcode(p, 8), s),
+            _ => panic!("should pack cleanly"),
+        }
+    }
+
+    #[test]
+    fn pack_detects_one_and_many_n() {
+        assert!(matches!(
+            pack_barcode(&enc("ACNT")),
+            PackResult::OneN { pos: 2, .. }
+        ));
+        assert_eq!(pack_barcode(&enc("ANNT")), PackResult::ManyN);
+    }
+
+    #[test]
+    fn exact_match_and_count() {
+        let f = write_wl(&["AAAA", "ACGT", "TTTT"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("1MM_multi").unwrap();
+        let m = wl.match_cb(&enc("ACGT"), b"IIII", t);
+        match m {
+            CbMatch::Exact(idx) => assert_eq!(wl.barcode_string(idx).unwrap(), "ACGT"),
+            other => panic!("expected exact, got {other:?}"),
+        }
+        let counts = wl.exact_count_snapshot();
+        assert_eq!(counts.iter().sum::<u64>(), 1);
+    }
+
+    #[test]
+    fn single_mismatch_correction() {
+        let f = write_wl(&["AAAA", "ACGT", "TTTT"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("1MM").unwrap();
+        // ACGA differs from ACGT at last position only.
+        let m = wl.match_cb(&enc("ACGA"), b"IIII", t);
+        match m {
+            CbMatch::Corrected(idx) => assert_eq!(wl.barcode_string(idx).unwrap(), "ACGT"),
+            other => panic!("expected corrected, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn ambiguous_multi_match_behavior() {
+        // AAAA and CAAA both within 1MM of NAAA-ish read "GAAA"? Use TAAA read:
+        // candidates AAAA (pos0 T->A) and CAAA (pos0 T->C). Both in WL.
+        let f = write_wl(&["AAAA", "CAAA"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+
+        // 1MM (no multi): rejected as ambiguous.
+        let rej = wl.match_cb(&enc("TAAA"), b"IIII", CbMatchType::from_str("1MM").unwrap());
+        assert_eq!(rej, CbMatch::MultMatchRejected);
+
+        // 1MM_multi: both candidates kept for later resolution.
+        let multi = wl.match_cb(
+            &enc("TAAA"),
+            b"IIII",
+            CbMatchType::from_str("1MM_multi").unwrap(),
+        );
+        match multi {
+            CbMatch::Multi(c) => assert_eq!(c.len(), 2),
+            other => panic!("expected multi, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn no_match_when_too_far() {
+        let f = write_wl(&["AAAA", "TTTT"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("1MM_multi").unwrap();
+        // GGGG is >1 edit from both.
+        assert_eq!(wl.match_cb(&enc("GGGG"), b"IIII", t), CbMatch::NoMatch);
+    }
+
+    #[test]
+    fn n_correction_single() {
+        let f = write_wl(&["AAAA", "ACGT"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("1MM_multi").unwrap();
+        // ACGN → only ACGT matches among the 4 substitutions.
+        let m = wl.match_cb(&enc("ACGN"), b"IIII", t);
+        match m {
+            CbMatch::Corrected(idx) => assert_eq!(wl.barcode_string(idx).unwrap(), "ACGT"),
+            other => panic!("expected corrected, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn many_n_rejected() {
+        let f = write_wl(&["AAAA"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("1MM_multi").unwrap();
+        assert_eq!(wl.match_cb(&enc("NNAA"), b"IIII", t), CbMatch::NinCb);
+    }
+
+    #[test]
+    fn exact_only_mode_no_correction() {
+        let f = write_wl(&["ACGT"]);
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        let t = CbMatchType::from_str("Exact").unwrap();
+        assert_eq!(wl.match_cb(&enc("ACGA"), b"IIII", t), CbMatch::NoMatch);
+    }
+
+    #[test]
+    fn umi_checks() {
+        assert!(matches!(check_umi(&enc("ACGTAC")), UmiCheck::Ok(_)));
+        assert_eq!(check_umi(&enc("ACGTNC")), UmiCheck::NinUmi);
+        assert_eq!(check_umi(&enc("AAAAAA")), UmiCheck::Homopolymer);
+        assert_eq!(check_umi(&enc("TTTTTT")), UmiCheck::Homopolymer);
+    }
+
+    #[test]
+    fn whitelist_length_mismatch_errors() {
+        let f = write_wl(&["AAAA", "TTT"]);
+        assert!(CbWhitelist::load(f.path()).is_err());
+    }
+
+    #[test]
+    fn whitelist_gzip_load() {
+        use flate2::Compression;
+        use flate2::write::GzEncoder;
+        let f = tempfile::Builder::new().suffix(".gz").tempfile().unwrap();
+        let mut enc = GzEncoder::new(f.as_file(), Compression::default());
+        writeln!(enc, "AAAA\nACGT\nTTTT").unwrap();
+        enc.finish().unwrap();
+        let wl = CbWhitelist::load(f.path()).unwrap();
+        assert_eq!(wl.len(), 3);
+    }
+
+    #[test]
+    fn match_type_parsing() {
+        assert!(!CbMatchType::from_str("Exact").unwrap().mm1);
+        assert!(CbMatchType::from_str("1MM").unwrap().mm1);
+        assert!(!CbMatchType::from_str("1MM").unwrap().mm1_multi);
+        assert!(CbMatchType::from_str("1MM_multi").unwrap().mm1_multi);
+        let n = CbMatchType::from_str("1MM_multi_Nbase_pseudocounts").unwrap();
+        assert!(n.mm1_multi_nbase && n.pseudocounts);
+        assert!(CbMatchType::from_str("bogus").is_err());
+    }
+}
diff --git a/test/Dockerfile.bench b/test/Dockerfile.bench
new file mode 100644
index 0000000..f6397e0
--- /dev/null
+++ b/test/Dockerfile.bench
@@ -0,0 +1,15 @@
+# amd64 Linux image to benchmark CellRanger vs STARsolo vs rustar-aligner in a
+# consistent environment. CellRanger is x86_64-only, so everything runs under
+# linux/amd64 (Rosetta-accelerated on Apple Silicon) for a fair comparison.
+#
+# CellRanger itself is mounted at runtime (not baked in) from the extracted
+# cellranger-10.0.0/ directory.
+FROM --platform=linux/amd64 rust:1-bookworm
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      rna-star python3 samtools procps time pigz ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+RUN STAR --version && cargo --version && python3 --version
+WORKDIR /work
diff --git a/test/Dockerfile.solodiff b/test/Dockerfile.solodiff
new file mode 100644
index 0000000..f6a5822
--- /dev/null
+++ b/test/Dockerfile.solodiff
@@ -0,0 +1,19 @@
+# Linux environment to run the STARsolo CellRanger differential test in a
+# consistent way (real STAR works on Linux; the macOS build has a read bug).
+#
+# Build:  docker build -f test/Dockerfile.solodiff -t rustar-solodiff .
+# Run:    docker run --rm -v "$PWD":/work -w /work \
+#             -e CARGO_TARGET_DIR=/tmp/ct rustar-solodiff \
+#             bash -c "cargo build --release && \
+#                      python3 test/solo_cellranger_diff.py \
+#                        --star \$(which STAR) --rustar /tmp/ct/release/rustar-aligner"
+FROM rust:1-bookworm
+
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends rna-star python3 ca-certificates \
+ && rm -rf /var/lib/apt/lists/*
+
+# Report tool versions at build time for the record.
+RUN STAR --version && cargo --version && python3 --version
+
+WORKDIR /work
diff --git a/test/solo_bench.py b/test/solo_bench.py
new file mode 100644
index 0000000..7f35c07
--- /dev/null
+++ b/test/solo_bench.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""Runtime + output-stats benchmark: CellRanger vs STARsolo vs rustar-aligner.
+
+Runs inside the amd64 benchmark container (test/Dockerfile.bench) so all three
+tools run in one consistent Linux/x86_64 environment. Mouse GRCm39-2024-A
+reference (built from the CellRanger refdata fasta+gtf for STAR/rust; CellRanger
+uses the refdata directly), 5' GEM-X chemistry.
+
+Each step is wall-clock + peak-RSS timed via /usr/bin/time -v. Output stats are
+read from each tool's raw matrix (+ CellRanger metrics_summary.csv).
+
+Usage (inside container):
+  python3 test/solo_bench.py \
+     --fasta REF/genome.fa --gtf REF/genes.gtf \
+     --whitelist WL.txt --r1 R1.fq --r2 R2.fq \
+     --cellranger /work/bench/cellranger-10.0.0/cellranger \
+     --transcriptome /work/bench/refdata-gex-GRCm39-2024-A \
+     --sample 5k_Mouse_PBMCs_5p_gem-x_GEX --fastqdir /work/bench/gex \
+     --rustar /work/target-linux/release/rustar-aligner \
+     --star $(which STAR) --threads 14 --mem-gb 36 --out /work/bench/results
+"""
+import argparse
+import csv
+import gzip
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+
+# CellRanger 4/5-matching solo flags (3' clip omitted; 5' chemistry).
+SOLO_COMMON = [
+    "--soloType", "CB_UMI_Simple",
+    "--soloCBstart", "1", "--soloCBlen", "16",
+    "--soloUMIstart", "17", "--soloUMIlen", "12",
+    "--soloFeatures", "Gene",
+    "--soloStrand", "Reverse",                 # 5' GEX (SC5P-R2 strandedness "-")
+    "--soloCBmatchWLtype", "1MM_multi_Nbase_pseudocounts",
+    "--soloUMIfiltering", "MultiGeneUMI_CR",
+    "--soloUMIdedup", "1MM_CR",
+]
+
+TIME = ["/usr/bin/time", "-v"]
+
+
+def timed(cmd, logpath, env=None, cwd=None):
+    """Run cmd under /usr/bin/time -v; return (seconds, peak_rss_gb, ok)."""
+    print("  $", " ".join(str(c) for c in cmd), flush=True)
+    t0 = time.time()
+    with open(logpath, "w") as lf:
+        r = subprocess.run(TIME + list(map(str, cmd)), stdout=lf, stderr=subprocess.STDOUT, env=env, cwd=cwd)
+    wall = time.time() - t0
+    peak = None
+    with open(logpath) as lf:
+        txt = lf.read()
+    m = re.search(r"Maximum resident set size \(kbytes\):\s*(\d+)", txt)
+    if m:
+        peak = int(m.group(1)) / 1024 / 1024  # KB -> GB (GNU time reports KB)
+    if r.returncode != 0:
+        print(f"    !! exit {r.returncode}; tail:\n" + "\n".join(txt.splitlines()[-15:]))
+    return wall, peak, r.returncode == 0
+
+
+def index_built(idx_dir):
+    """True if a genome index already exists in idx_dir (skip rebuild/reuse)."""
+    return os.path.exists(os.path.join(idx_dir, "Genome")) or os.path.exists(
+        os.path.join(idx_dir, "SA")
+    )
+
+
+def opener(path):
+    return gzip.open(path, "rt") if path.endswith(".gz") else open(path)
+
+
+def matrix_stats(raw_dir):
+    """Read a MatrixMarket raw dir -> {n_barcodes_with_counts, total_umi, n_genes_detected}."""
+    mtx = None
+    for name in ("matrix.mtx.gz", "matrix.mtx"):
+        p = os.path.join(raw_dir, name)
+        if os.path.exists(p):
+            mtx = p
+            break
+    if not mtx:
+        return None
+    cells, genes, total = set(), set(), 0
+    with opener(mtx) as f:
+        header_done = False
+        for line in f:
+            if line.startswith("%"):
+                continue
+            if not header_done:
+                header_done = True  # dims line
+                continue
+            parts = line.split()
+            if len(parts) < 3:
+                continue
+            g, c, v = int(parts[0]), int(parts[1]), int(float(parts[2]))
+            if v > 0:
+                genes.add(g)
+                cells.add(c)
+                total += v
+    return {"n_barcodes_with_counts": len(cells), "n_genes_detected": len(genes), "total_umi": total}
+
+
+def cellranger_metrics(outs_dir):
+    p = os.path.join(outs_dir, "metrics_summary.csv")
+    if not os.path.exists(p):
+        return {}
+    with open(p) as f:
+        rows = list(csv.reader(f))
+    if len(rows) >= 2:
+        return dict(zip(rows[0], rows[1]))
+    return {}
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--fasta", required=True)
+    ap.add_argument("--gtf", required=True)
+    ap.add_argument("--whitelist", required=True)
+    ap.add_argument("--r1", required=True)
+    ap.add_argument("--r2", required=True)
+    ap.add_argument("--cellranger", required=True)
+    ap.add_argument("--transcriptome", required=True)
+    ap.add_argument("--sample", required=True)
+    ap.add_argument("--fastqdir", required=True)
+    ap.add_argument("--rustar", required=True)
+    ap.add_argument("--star", default="STAR")
+    ap.add_argument("--threads", type=int, default=14)
+    ap.add_argument("--mem-gb", type=int, default=36)
+    ap.add_argument("--out", required=True)
+    ap.add_argument("--sa-nbases", default="14")
+    ap.add_argument("--chemistry", default="auto", help="CellRanger --chemistry")
+    ap.add_argument("--rust-temp-dir", default=None,
+                    help="rustar --tempDir (caps-sa scratch; point at a disk with space)")
+    ap.add_argument("--skip", default="", help="comma list: cellranger,star,rustar")
+    args = ap.parse_args()
+
+    os.makedirs(args.out, exist_ok=True)
+    logs = os.path.join(args.out, "logs")
+    os.makedirs(logs, exist_ok=True)
+    skip = set(s.strip() for s in args.skip.split(",") if s.strip())
+    results = {}
+
+    # ---- STARsolo -------------------------------------------------------
+    if "star" not in skip:
+        print("\n===== STARsolo =====")
+        star_idx = os.path.join(args.out, "star_idx")
+        os.makedirs(star_idx, exist_ok=True)
+        if index_built(star_idx):
+            print("  (STAR index already present — skipping genomeGenerate)")
+            s_gen, s_gen_rss, ok = 0.0, 0.0, True
+        else:
+            s_gen, s_gen_rss, ok = timed(
+                [args.star, "--runMode", "genomeGenerate", "--genomeDir", star_idx,
+                 "--genomeFastaFiles", args.fasta, "--sjdbGTFfile", args.gtf,
+                 "--sjdbOverhang", "89", "--genomeSAindexNbases", args.sa_nbases,
+                 "--runThreadN", args.threads],
+                os.path.join(logs, "star_genomeGenerate.log"))
+        star_out = os.path.join(args.out, "star_out") + "/"
+        os.makedirs(star_out, exist_ok=True)
+        gz = ["--readFilesCommand", "zcat"] if args.r1.endswith(".gz") else []
+        s_run, s_run_rss, ok2 = timed(
+            [args.star, "--genomeDir", star_idx, "--readFilesIn", args.r2, args.r1,
+             "--runThreadN", args.threads, "--outSAMtype", "None"] + gz
+            + ["--soloCBwhitelist", args.whitelist, "--outFileNamePrefix", star_out]
+            + SOLO_COMMON,
+            os.path.join(logs, "star_solo.log"))
+        raw = os.path.join(star_out, "Solo.out", "Gene", "raw")
+        results["STARsolo"] = {
+            "index_build_s": round(s_gen, 1), "index_build_rss_gb": round(s_gen_rss or 0, 2),
+            "count_s": round(s_run, 1), "count_rss_gb": round(s_run_rss or 0, 2),
+            "stats": matrix_stats(raw), "ok": ok and ok2,
+        }
+
+    # ---- rustar-aligner -------------------------------------------------
+    if "rustar" not in skip:
+        print("\n===== rustar-aligner =====")
+        rust_idx = os.path.join(args.out, "rust_idx")
+        os.makedirs(rust_idx, exist_ok=True)
+        if index_built(rust_idx):
+            print("  (rustar index already present — skipping genomeGenerate)")
+            r_gen, r_gen_rss, ok = 0.0, 0.0, True
+        else:
+            tmp = ["--tempDir", args.rust_temp_dir] if args.rust_temp_dir else []
+            r_gen, r_gen_rss, ok = timed(
+                [args.rustar, "--runMode", "genomeGenerate", "--genomeDir", rust_idx,
+                 "--genomeFastaFiles", args.fasta, "--sjdbGTFfile", args.gtf,
+                 "--sjdbOverhang", "89", "--genomeSAindexNbases", args.sa_nbases,
+                 "--runThreadN", args.threads] + tmp,
+                os.path.join(logs, "rustar_genomeGenerate.log"))
+        rust_out = os.path.join(args.out, "rust_out") + "/"
+        os.makedirs(rust_out, exist_ok=True)
+        r_run, r_run_rss, ok2 = timed(
+            [args.rustar, "--genomeDir", rust_idx, "--readFilesIn", args.r2, args.r1,
+             "--sjdbGTFfile", args.gtf, "--runThreadN", args.threads,
+             "--outSAMtype", "SAM",
+             "--soloCBwhitelist", args.whitelist, "--outFileNamePrefix", rust_out]
+            + SOLO_COMMON,
+            os.path.join(logs, "rustar_solo.log"))
+        raw = os.path.join(rust_out, "Solo.out", "Gene", "raw")
+        results["rustar-aligner"] = {
+            "index_build_s": round(r_gen, 1), "index_build_rss_gb": round(r_gen_rss or 0, 2),
+            "count_s": round(r_run, 1), "count_rss_gb": round(r_run_rss or 0, 2),
+            "stats": matrix_stats(raw), "ok": ok and ok2,
+        }
+
+    # ---- CellRanger -----------------------------------------------------
+    if "cellranger" not in skip:
+        print("\n===== CellRanger =====")
+        cr_dir = os.path.join(args.out, "cr")
+        # cellranger count writes to ./<id>; run in args.out
+        if os.path.exists(os.path.join(args.out, "cr_run")):
+            subprocess.run(["rm", "-rf", os.path.join(args.out, "cr_run")])
+        c_run, c_rss, ok = timed(
+            [args.cellranger, "count", "--id", "cr_run",
+             "--transcriptome", args.transcriptome,
+             "--fastqs", args.fastqdir, "--sample", args.sample,
+             "--chemistry", args.chemistry,
+             "--create-bam", "false", "--nosecondary",
+             "--localcores", str(args.threads), "--localmem", str(args.mem_gb)],
+            os.path.join(logs, "cellranger_count.log"),
+            env={**os.environ}, cwd=args.out)
+        outs = os.path.join(args.out, "cr_run", "outs")
+        raw = os.path.join(outs, "raw_feature_bc_matrix")
+        results["CellRanger"] = {
+            "count_s": round(c_run, 1), "count_rss_gb": round(c_rss or 0, 2),
+            "stats": matrix_stats(raw),
+            "metrics": cellranger_metrics(outs), "ok": ok,
+        }
+
+    # ---- report ---------------------------------------------------------
+    with open(os.path.join(args.out, "benchmark.json"), "w") as f:
+        json.dump(results, f, indent=2)
+
+    print("\n================ BENCHMARK SUMMARY ================")
+    hdr = f"{'tool':<16}{'idx build(s)':>14}{'count(s)':>11}{'peak RSS(GB)':>14}{'barcodes':>10}{'genes':>8}{'total UMI':>12}"
+    print(hdr)
+    print("-" * len(hdr))
+    for tool, r in results.items():
+        st = r.get("stats") or {}
+        idx = r.get("index_build_s", "-")
+        peak = max(r.get("index_build_rss_gb", 0) or 0, r.get("count_rss_gb", 0) or 0)
+        print(f"{tool:<16}{str(idx):>14}{str(r.get('count_s','-')):>11}{peak:>14.2f}"
+              f"{str(st.get('n_barcodes_with_counts','-')):>10}"
+              f"{str(st.get('n_genes_detected','-')):>8}{str(st.get('total_umi','-')):>12}")
+    if "CellRanger" in results and results["CellRanger"].get("metrics"):
+        m = results["CellRanger"]["metrics"]
+        keys = ["Estimated Number of Cells", "Mean Reads per Cell", "Median Genes per Cell",
+                "Median UMI Counts per Cell", "Reads Mapped Confidently to Transcriptome"]
+        print("\nCellRanger reported metrics:")
+        for k in keys:
+            if k in m:
+                print(f"  {k}: {m[k]}")
+    print(f"\nFull results: {os.path.join(args.out, 'benchmark.json')}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_cellranger_diff.py b/test/solo_cellranger_diff.py
new file mode 100644
index 0000000..c66270e
--- /dev/null
+++ b/test/solo_cellranger_diff.py
@@ -0,0 +1,305 @@
+#!/usr/bin/env python3
+"""Differential test: rustar-aligner STARsolo vs real STAR, CellRanger-style run.
+
+Generates a small synthetic 10x-style dataset (genome + GTF + whitelist + cDNA
+read + barcode read), runs BOTH STAR and rustar-aligner with the
+CellRanger-4/5-matching solo flags from
+https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#matching-cellranger-4xx-and-5xx-results
+and compares the raw Gene count matrices decoded to {(barcode, gene_id): count}.
+
+Usage:
+    python3 test/solo_cellranger_diff.py [--star /path/to/STAR] [--rustar /path/to/rustar-aligner] [--keep]
+
+Exit code 0 = matrices match, 1 = mismatch / error.
+"""
+import argparse
+import os
+import random
+import shutil
+import subprocess
+import sys
+import tempfile
+
+# CellRanger 4.x/5.x matching flags (STARsolo.md).
+CELLRANGER_FLAGS = [
+    "--clipAdapterType", "CellRanger4",
+    "--outFilterScoreMin", "30",
+    "--soloCBmatchWLtype", "1MM_multi_Nbase_pseudocounts",
+    "--soloUMIfiltering", "MultiGeneUMI_CR",
+    "--soloUMIdedup", "1MM_CR",
+]
+
+CB_LEN = 16
+UMI_LEN = 12
+READ_LEN = 90
+BASES = "ACGT"
+
+
+def rand_seq(rng, n):
+    return "".join(rng.choice(BASES) for _ in range(n))
+
+
+# Two-exon gene layout (0-based): exon1 [s, s+150), intron [s+150, s+400) with
+# canonical GT..AG, exon2 [s+400, s+550). Multi-exon genes give STAR a non-empty
+# splice-junction DB, which it needs to set up the solo Transcriptome directory.
+GENE_A_START = 10000
+GENE_B_START = 30000
+
+
+def _plant_gene(g, s, rng):
+    g[s : s + 150] = list(rand_seq(rng, 150))          # exon1
+    g[s + 150 : s + 400] = list(rand_seq(rng, 250))    # intron body
+    g[s + 150], g[s + 151] = "G", "T"                  # donor
+    g[s + 398], g[s + 399] = "A", "G"                  # acceptor
+    g[s + 400 : s + 550] = list(rand_seq(rng, 150))    # exon2
+
+
+def build_genome(rng, length=50000):
+    g = list(rand_seq(rng, length))
+    _plant_gene(g, GENE_A_START, rng)
+    _plant_gene(g, GENE_B_START, rng)
+    return "".join(g)
+
+
+def pick_window(genome, exon_start):
+    """Pick a READ_LEN window inside exon1 ending in a non-A base (so the
+    CellRanger4 polyA trim is a guaranteed no-op for both tools). The window
+    stays inside the 150 bp exon1, so reads never span the junction."""
+    a = exon_start + 20
+    while genome[a + READ_LEN - 1] == "A":
+        a += 1
+    return genome[a : a + READ_LEN]
+
+
+def write_files(d, genome):
+    fa = os.path.join(d, "genome.fa")
+    with open(fa, "w") as f:
+        f.write(">chr1\n")
+        for i in range(0, len(genome), 70):
+            f.write(genome[i : i + 70] + "\n")
+
+    gtf = os.path.join(d, "genes.gtf")
+    with open(gtf, "w") as f:
+        # Two exons per gene (1-based inclusive), matching the planted layout.
+        f.write('chr1\tsrc\texon\t10001\t10150\t.\t+\t.\tgene_id "GENEA"; transcript_id "GENEA.1"; gene_name "GeneA";\n')
+        f.write('chr1\tsrc\texon\t10401\t10550\t.\t+\t.\tgene_id "GENEA"; transcript_id "GENEA.1"; gene_name "GeneA";\n')
+        f.write('chr1\tsrc\texon\t30001\t30150\t.\t+\t.\tgene_id "GENEB"; transcript_id "GENEB.1"; gene_name "GeneB";\n')
+        f.write('chr1\tsrc\texon\t30401\t30550\t.\t+\t.\tgene_id "GENEB"; transcript_id "GENEB.1"; gene_name "GeneB";\n')
+
+    wl = os.path.join(d, "whitelist.txt")
+    cbs = ["AAAACCCCGGGGTTTT", "ACACACACGTGTGTGT", "TTTTGGGGCCCCAAAA", "GTGTGTGTACACACAC"]
+    with open(wl, "w") as f:
+        f.write("\n".join(cbs) + "\n")
+
+    readA = pick_window(genome, 10000)
+    readB = pick_window(genome, 30000)
+
+    # (cell, gene-read, umi, n_reads). Designed to exercise:
+    #  - exact CB match (all CBs in whitelist)
+    #  - 1MM_CR UMI collapse: ACGTACGTACGT (5) + ACGTACGTACGA (1) -> 1 molecule
+    #  - distinct molecules counted, two genes, two cells.
+    plan = [
+        (cbs[0], readA, "ACGTACGTACGT", 5),
+        (cbs[0], readA, "ACGTACGTACGA", 1),   # 1MM neighbor of the above
+        (cbs[0], readA, "TGCATGCATGCA", 3),   # separate molecule
+        (cbs[0], readB, "GGGGTTTTAACC", 2),   # GeneB, cell0
+        (cbs[1], readA, "CATGCATGCATG", 4),   # GeneA, cell1
+    ]
+    # Expected decoded matrix.
+    expected = {
+        ("AAAACCCCGGGGTTTT", "GENEA"): 2,  # two molecules (1MM pair collapses)
+        ("AAAACCCCGGGGTTTT", "GENEB"): 1,
+        ("ACACACACGTGTGTGT", "GENEA"): 1,
+    }
+
+    cdna = os.path.join(d, "cdna.fq")
+    bc = os.path.join(d, "barcode.fq")
+    ci = 0
+    with open(cdna, "w") as cf, open(bc, "w") as bf:
+        for (cb, read, umi, n) in plan:
+            for _ in range(n):
+                name = f"read{ci}"
+                ci += 1
+                cf.write(f"@{name}\n{read}\n+\n{'I' * READ_LEN}\n")
+                barcode = cb + umi
+                bf.write(f"@{name}\n{barcode}\n+\n{'I' * len(barcode)}\n")
+    return fa, gtf, wl, cdna, bc, expected
+
+
+def run(cmd, **kw):
+    print("  $", " ".join(str(c) for c in cmd))
+    r = subprocess.run(cmd, capture_output=True, text=True, **kw)
+    if r.returncode != 0:
+        print(r.stdout[-2000:])
+        print(r.stderr[-4000:])
+        raise SystemExit(f"command failed ({r.returncode}): {cmd[0]}")
+    return r
+
+
+def run_star(star, d, fa, gtf, wl, cdna, bc):
+    # Generate WITH the GTF so geneInfo.tab lands in the index, then reset the
+    # recorded sjdbGTFfile to "-" in genomeParameters.txt. STAR's solo
+    # Transcriptome uses `trInfoDir = sjdbGTFfile=="-" ? genomeDir : sjdbInsert.outDir`
+    # (Transcriptome.cpp:18); with the path still recorded it points at an empty
+    # insert dir and fails with "/geneInfo.tab". Resetting to "-" makes it read
+    # geneInfo.tab from the genome dir. (The gene model is intact in the index.)
+    idx = os.path.join(d, "star_index")
+    os.makedirs(idx, exist_ok=True)
+    run([star, "--runMode", "genomeGenerate", "--genomeDir", idx,
+         "--genomeFastaFiles", fa, "--sjdbGTFfile", gtf,
+         "--genomeSAindexNbases", "7", "--sjdbOverhang", "89"])
+    gp = os.path.join(idx, "genomeParameters.txt")
+    lines = open(gp).read().splitlines()
+    with open(gp, "w") as f:
+        for ln in lines:
+            if ln.startswith("sjdbGTFfile\t"):
+                f.write("sjdbGTFfile\t-\n")
+            else:
+                f.write(ln + "\n")
+
+    out = os.path.join(d, "star_out") + os.sep
+    run([star, "--genomeDir", idx, "--readFilesIn", cdna, bc,
+         "--soloType", "CB_UMI_Simple", "--soloCBwhitelist", wl,
+         "--soloCBstart", "1", "--soloCBlen", str(CB_LEN),
+         "--soloUMIstart", str(CB_LEN + 1), "--soloUMIlen", str(UMI_LEN),
+         "--soloFeatures", "Gene", "--outSAMtype", "SAM",
+         "--outFileNamePrefix", out] + CELLRANGER_FLAGS)
+    # Guard against a STAR binary that silently reads 0 reads (broken bottle).
+    log = os.path.join(out, "Log.final.out")
+    if os.path.exists(log):
+        for ln in open(log):
+            if "Number of input reads" in ln and ln.strip().endswith("0"):
+                raise SystemExit(
+                    "STAR processed 0 input reads — the STAR binary appears broken "
+                    "on this machine (immediate EOF on FASTQ input). Install a working "
+                    "STAR and re-run with --star /path/to/STAR."
+                )
+    return os.path.join(out, "Solo.out", "Gene", "raw")
+
+
+def run_rustar(rustar, d, fa, gtf, wl, cdna, bc):
+    idx = os.path.join(d, "rustar_index")
+    os.makedirs(idx, exist_ok=True)
+    run([rustar, "--runMode", "genomeGenerate", "--genomeDir", idx,
+         "--genomeFastaFiles", fa, "--sjdbGTFfile", gtf,
+         "--genomeSAindexNbases", "7", "--sjdbOverhang", "89"])
+    out = os.path.join(d, "rustar_out") + os.sep
+    run([rustar, "--genomeDir", idx, "--readFilesIn", cdna, bc,
+         "--soloType", "CB_UMI_Simple", "--soloCBwhitelist", wl,
+         "--soloCBstart", "1", "--soloCBlen", str(CB_LEN),
+         "--soloUMIstart", str(CB_LEN + 1), "--soloUMIlen", str(UMI_LEN),
+         "--soloFeatures", "Gene", "--sjdbGTFfile", gtf,
+         "--outSAMtype", "SAM",
+         "--outFileNamePrefix", out] + CELLRANGER_FLAGS)
+    return os.path.join(out, "Solo.out", "Gene", "raw")
+
+
+def decode_matrix(raw_dir):
+    """Decode raw/{matrix.mtx,barcodes.tsv,features.tsv} -> {(barcode, gene_id): count}."""
+    feats = []
+    with open(os.path.join(raw_dir, "features.tsv")) as f:
+        for line in f:
+            feats.append(line.rstrip("\n").split("\t")[0])
+    barcodes = []
+    with open(os.path.join(raw_dir, "barcodes.tsv")) as f:
+        for line in f:
+            barcodes.append(line.strip())
+    out = {}
+    with open(os.path.join(raw_dir, "matrix.mtx")) as f:
+        lines = [l for l in f if not l.startswith("%")]
+    # first non-% line is dims
+    for entry in lines[1:]:
+        parts = entry.split()
+        if len(parts) < 3:
+            continue
+        row, col, cnt = int(parts[0]), int(parts[1]), int(float(parts[2]))
+        out[(barcodes[col - 1], feats[row - 1])] = cnt
+    return out
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--star", default=shutil.which("STAR") or "/opt/homebrew/bin/STAR")
+    ap.add_argument("--rustar", default=None)
+    ap.add_argument("--keep", action="store_true")
+    ap.add_argument("--seed", type=int, default=20260612)
+    args = ap.parse_args()
+
+    repo = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if args.rustar:
+        # Honor an explicit path exactly — never silently fall back to a
+        # different (possibly foreign-arch) binary.
+        rustar = args.rustar
+        if not os.path.exists(rustar):
+            raise SystemExit(f"--rustar binary not found: {rustar}")
+    else:
+        rustar = os.path.join(repo, "target", "release", "rustar-aligner")
+        if not os.path.exists(rustar):
+            rustar = os.path.join(repo, "target", "debug", "rustar-aligner")
+        if not os.path.exists(rustar):
+            raise SystemExit(
+                "rustar-aligner binary not found — build it first (cargo build [--release]) "
+                "or pass --rustar /path/to/rustar-aligner"
+            )
+    if not (args.star and os.path.exists(args.star)):
+        raise SystemExit(f"STAR binary not found: {args.star}")
+
+    d = tempfile.mkdtemp(prefix="solo_diff_")
+    print(f"workdir: {d}")
+    print(f"STAR:   {args.star}")
+    print(f"rustar: {rustar}")
+    rng = random.Random(args.seed)
+    try:
+        genome = build_genome(rng)
+        fa, gtf, wl, cdna, bc, expected = write_files(d, genome)
+
+        print("\n== rustar-aligner ==")
+        rustar_raw = run_rustar(rustar, d, fa, gtf, wl, cdna, bc)
+        rustar_m = decode_matrix(rustar_raw)
+
+        print("\n== expected (hand-computed CellRanger result) ==")
+        for k, v in sorted(expected.items()):
+            print(f"   {k} = {v}")
+        print("== rustar matrix ==")
+        for k, v in sorted(rustar_m.items()):
+            print(f"   {k} = {v}")
+
+        # Core guarantee: rustar's CellRanger-style matrix matches the expectation.
+        if rustar_m != expected:
+            print("\nFAIL: rustar matrix does not match the expected CellRanger result:")
+            for k in sorted(set(rustar_m) | set(expected)):
+                if rustar_m.get(k) != expected.get(k):
+                    print(f"   {k}: rustar={rustar_m.get(k)} expected={expected.get(k)}")
+            return 1
+        print("\nrustar matrix matches the expected CellRanger result.")
+
+        # Live comparison against the real STAR binary, when it works on this host.
+        print("\n== STAR ==")
+        try:
+            star_raw = run_star(args.star, d, fa, gtf, wl, cdna, bc)
+            star_m = decode_matrix(star_raw)
+        except SystemExit as e:
+            print(f"\nSTAR could not run a live comparison on this host: {e}")
+            print("PASS (rustar validated against the CellRanger expectation; "
+                  "run on a host with a working STAR for the live diff).")
+            return 0
+        print("== STAR matrix ==")
+        for k, v in sorted(star_m.items()):
+            print(f"   {k} = {v}")
+        if star_m == rustar_m:
+            print("\nPASS: rustar-aligner matrix matches real STARsolo exactly.")
+            return 0
+        print("\nFAIL: rustar vs STAR mismatch:")
+        for k in sorted(set(star_m) | set(rustar_m)):
+            if star_m.get(k) != rustar_m.get(k):
+                print(f"   {k}: STAR={star_m.get(k)} rustar={rustar_m.get(k)}")
+        return 1
+    finally:
+        if args.keep:
+            print(f"(kept workdir {d})")
+        else:
+            shutil.rmtree(d, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_compare_h5ad.py b/test/solo_compare_h5ad.py
new file mode 100644
index 0000000..99b3b9f
--- /dev/null
+++ b/test/solo_compare_h5ad.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+"""Knee-call + compare CellRanger / STARsolo / rustar-aligner raw matrices.
+
+For a fair comparison that isolates *counting* differences from *cell-calling*
+differences, the SAME knee filter (CellRanger 2.2 — STARsolo's default
+--soloCellFilter) is applied to each tool's RAW matrix. Each filtered result is
+written as an .h5ad (AnnData, cells x genes) and the three are compared:
+n cells, median UMI/genes per cell, barcode overlap, per-cell UMI correlation on
+shared barcodes, and gene-level pseudobulk correlation.
+
+Usage:
+    .venv/bin/python test/solo_compare_h5ad.py \
+        --cellranger <outs/raw_feature_bc_matrix> \
+        --starsolo   <Solo.out/Gene/raw> \
+        --rustar     <Solo.out/Gene/raw> \
+        --out <dir>
+"""
+import argparse
+import gzip
+import json
+import os
+import sys
+
+import anndata as ad
+import numpy as np
+import pandas as pd
+import scipy.io
+import scipy.sparse as sp
+
+
+def _find(d, base):
+    for c in (base, base + ".gz"):
+        p = os.path.join(d, c)
+        if os.path.exists(p):
+            return p
+    raise FileNotFoundError(f"{base}[.gz] not found in {d}")
+
+
+def _open_text(p):
+    return gzip.open(p, "rt") if p.endswith(".gz") else open(p)
+
+
+def load_raw(d):
+    """Load a 10x/STARsolo raw matrix dir -> (X cells x genes CSR, barcodes, gene_ids)."""
+    mp = _find(d, "matrix.mtx")
+    handle = gzip.open(mp, "rb") if mp.endswith(".gz") else open(mp, "rb")
+    with handle:
+        m = scipy.io.mmread(handle)  # features x barcodes
+    X = sp.csr_matrix(m).T.tocsr()  # -> barcodes (cells) x features (genes)
+    barcodes = np.array([l.split("\t")[0].strip() for l in _open_text(_find(d, "barcodes.tsv"))])
+    genes = np.array([l.split("\t")[0].strip() for l in _open_text(_find(d, "features.tsv"))])
+    return X, barcodes, genes
+
+
+def norm_bc(bc):
+    """Strip 10x '-1' gem-group suffix so barcodes are comparable across tools."""
+    return np.array([b.split("-")[0] for b in bc])
+
+
+def revcomp(s):
+    t = str.maketrans("ACGT", "TGCA")
+    return s.translate(t)[::-1]
+
+
+def knee_cr22(totals, n_expected=3000, max_pct=0.99, max_min_ratio=10):
+    """CellRanger-2.2 knee threshold on per-barcode totals (STARsolo default)."""
+    counts = np.sort(totals[totals > 0])[::-1]
+    if counts.size == 0:
+        return 0.0
+    idx = min(int(round(n_expected * (1 - max_pct))), counts.size - 1)
+    robust_max = counts[idx]
+    return robust_max / max_min_ratio
+
+
+def load_cell_set(path):
+    """Load an EmptyDrops cells.txt (one barcode/line) -> normalized set, or None."""
+    if not path or not os.path.exists(path):
+        return None
+    with _open_text(path) as fh:
+        return set(l.split("\t")[0].split("-")[0].strip() for l in fh if l.strip())
+
+
+def build_filtered(name, raw_dir, rc_barcodes=False, cells=None):
+    """Filter a raw matrix to called cells. If `cells` (a normalized barcode set,
+    e.g. from EmptyDrops) is given, keep exactly those; otherwise CR2.2 knee."""
+    X, bc, genes = load_raw(raw_dir)
+    bc = norm_bc(bc)
+    if rc_barcodes:
+        bc = np.array([revcomp(b) for b in bc])
+    totals = np.asarray(X.sum(axis=1)).ravel()
+    if cells is not None:
+        thr = -1.0
+        keep = np.array([b in cells for b in bc])
+    else:
+        thr = knee_cr22(totals)
+        keep = totals >= thr
+    Xf = X[keep]
+    bcf = bc[keep]
+    A = ad.AnnData(X=Xf, obs=pd.DataFrame(index=bcf), var=pd.DataFrame(index=genes))
+    A.obs["n_umi"] = np.asarray(Xf.sum(axis=1)).ravel()
+    A.obs["n_genes"] = np.asarray((Xf > 0).sum(axis=1)).ravel()
+    return A, thr
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--cellranger", required=True)
+    ap.add_argument("--starsolo", required=True)
+    ap.add_argument("--rustar", required=True)
+    ap.add_argument("--out", required=True)
+    # Optional EmptyDrops cells.txt per tool; when given, filter by these calls
+    # instead of the CR2.2 knee (CellRanger uses its own filtered barcodes).
+    ap.add_argument("--rustar-cells")
+    ap.add_argument("--starsolo-cells")
+    ap.add_argument("--cellranger-cells")
+    args = ap.parse_args()
+    os.makedirs(args.out, exist_ok=True)
+
+    r_cells = load_cell_set(args.rustar_cells)
+    s_cells = load_cell_set(args.starsolo_cells)
+    c_cells = load_cell_set(args.cellranger_cells)
+
+    # Build STARsolo / rustar first; detect whether CellRanger barcodes need RC
+    # (some 5' chemistries report the reverse complement).
+    star, star_thr = build_filtered("STARsolo", args.starsolo, cells=s_cells)
+    rust, rust_thr = build_filtered("rustar", args.rustar, cells=r_cells)
+
+    cr_plain, _ = build_filtered("CellRanger", args.cellranger, rc_barcodes=False, cells=c_cells)
+    ov_plain = len(set(cr_plain.obs_names) & set(star.obs_names))
+    cr_rc, _ = build_filtered("CellRanger", args.cellranger, rc_barcodes=True, cells=c_cells)
+    ov_rc = len(set(cr_rc.obs_names) & set(star.obs_names))
+    cr = cr_rc if ov_rc > ov_plain else cr_plain
+    cr_orient = "reverse-complement" if ov_rc > ov_plain else "as-reported"
+
+    objs = {"CellRanger": cr, "STARsolo": star, "rustar-aligner": rust}
+    for name, A in objs.items():
+        path = os.path.join(args.out, f"{name.replace('-aligner','')}.filtered.h5ad")
+        A.write_h5ad(path)
+
+    print(f"\nCellRanger barcode orientation vs STARsolo: {cr_orient} "
+          f"(overlap as-reported={ov_plain}, rc={ov_rc})")
+
+    # ---- per-tool summary ----
+    print("\n================ filtered (CR2.2 knee) summary ================")
+    hdr = f"{'tool':<16}{'cells':>8}{'median UMI/cell':>17}{'median genes/cell':>19}{'genes detected':>16}{'total UMI':>12}"
+    print(hdr); print("-" * len(hdr))
+    rows = {}
+    for name, A in objs.items():
+        med_umi = int(np.median(A.obs["n_umi"])) if A.n_obs else 0
+        med_g = int(np.median(A.obs["n_genes"])) if A.n_obs else 0
+        genes_det = int((np.asarray(A.X.sum(axis=0)).ravel() > 0).sum())
+        tot = int(A.X.sum())
+        rows[name] = dict(cells=A.n_obs, median_umi=med_umi, median_genes=med_g,
+                          genes_detected=genes_det, total_umi=tot)
+        print(f"{name:<16}{A.n_obs:>8}{med_umi:>17}{med_g:>19}{genes_det:>16}{tot:>12}")
+
+    # ---- barcode overlap (called-cell sets) ----
+    sets = {n: set(A.obs_names) for n, A in objs.items()}
+    names = list(objs)
+    print("\n================ called-cell barcode overlap ================")
+    allc = sets[names[0]] & sets[names[1]] & sets[names[2]]
+    print(f"shared by all 3: {len(allc)}")
+    for i in range(len(names)):
+        for j in range(i + 1, len(names)):
+            a, b = names[i], names[j]
+            inter = len(sets[a] & sets[b]); uni = len(sets[a] | sets[b])
+            print(f"  {a} ∩ {b}: {inter}  (Jaccard {inter/uni:.3f})")
+
+    # ---- correlations on shared cells & genes ----
+    print("\n================ agreement on shared cells/genes ================")
+    shared_genes = list(set(cr.var_names) & set(star.var_names) & set(rust.var_names))
+    common_cells = sorted(allc)
+    corr = {}
+    if common_cells and shared_genes:
+        # per-cell total UMI vectors (aligned to common cells)
+        def cell_totals(A):
+            idx = [A.obs_names.get_loc(c) for c in common_cells]
+            return np.asarray(A[idx].X.sum(axis=1)).ravel()
+        tot = {n: cell_totals(A) for n, A in objs.items()}
+        # pseudobulk per gene (sum over shared cells), aligned to shared genes
+        def pseudobulk(A):
+            idx = [A.obs_names.get_loc(c) for c in common_cells]
+            gi = [A.var_names.get_loc(g) for g in shared_genes]
+            return np.asarray(A[idx][:, gi].X.sum(axis=0)).ravel()
+        pb = {n: pseudobulk(A) for n, A in objs.items()}
+        for i in range(len(names)):
+            for j in range(i + 1, len(names)):
+                a, b = names[i], names[j]
+                rc_cell = np.corrcoef(tot[a], tot[b])[0, 1]
+                rc_gene = np.corrcoef(pb[a], pb[b])[0, 1]
+                corr[f"{a} vs {b}"] = dict(per_cell_umi_r=round(float(rc_cell), 4),
+                                           pseudobulk_gene_r=round(float(rc_gene), 4))
+                print(f"  {a} vs {b}: per-cell UMI r={rc_cell:.4f}, gene pseudobulk r={rc_gene:.4f}  "
+                      f"(n_cells={len(common_cells)}, n_genes={len(shared_genes)})")
+
+    out = dict(threshold=dict(STARsolo=star_thr, rustar=rust_thr),
+               cellranger_orientation=cr_orient, summary=rows, correlations=corr,
+               shared_all3_cells=len(allc))
+    with open(os.path.join(args.out, "compare.json"), "w") as f:
+        json.dump(out, f, indent=2)
+    print(f"\nWrote {len(objs)} h5ad files + compare.json to {args.out}")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_diff_docker.sh b/test/solo_diff_docker.sh
new file mode 100755
index 0000000..dc0c921
--- /dev/null
+++ b/test/solo_diff_docker.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Run the STARsolo CellRanger differential test (rustar-aligner vs real STAR) in
+# a consistent Linux container, so the comparison works regardless of the host
+# (the macOS STAR build has a FASTQ-read bug; Linux STAR works).
+#
+# Requires a Docker-compatible runtime. On macOS without Docker Desktop:
+#   brew install colima docker && colima start
+#
+# Usage:  test/solo_diff_docker.sh [N_RUNS]
+set -euo pipefail
+
+cd "$(dirname "$0")/.."
+RUNS="${1:-1}"
+IMAGE=rustar-solodiff
+
+docker build -f test/Dockerfile.solodiff -t "$IMAGE" . >/dev/null
+
+# Build rustar for Linux into a host-mounted dir (persisted across runs), then
+# run the harness against the Linux STAR + Linux rustar binary.
+docker run --rm -v "$PWD":/work -w /work -e CARGO_TARGET_DIR=/work/target-linux "$IMAGE" bash -c '
+  set -e
+  cargo build --release 2>&1 | tail -1
+  RUSTAR=/work/target-linux/release/rustar-aligner
+  STARBIN=$(which STAR)
+  for i in $(seq 1 '"$RUNS"'); do
+    echo "===== differential run $i ====="
+    python3 test/solo_cellranger_diff.py --star "$STARBIN" --rustar "$RUSTAR"
+  done
+'
diff --git a/test/solo_genefull_compare.py b/test/solo_genefull_compare.py
new file mode 100644
index 0000000..ff1b5c7
--- /dev/null
+++ b/test/solo_genefull_compare.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+"""Compare GeneFull (intron-inclusive) quantification across rustar / STARsolo /
+CellRanger, plus the EmptyDrops-filtered cell sets.
+
+Part A (raw count parity): load each tool's raw matrix, report total UMIs, genes
+detected, cells with >0 UMI, and the per-cell UMI-total correlation between
+rustar-GeneFull and STARsolo-GeneFull (they should match closely) and each vs
+CellRanger (whose default raw matrix is intron-inclusive).
+
+Part B (filtered h5 parity): given EmptyDrops-called barcode lists for each tool
+(from the `emptydrops` Rust binary) and CellRanger's own filtered barcodes,
+report cell-set overlap (Jaccard) and per-cell UMI agreement on shared cells.
+
+Usage:
+  solo_genefull_compare.py \
+    --rustar  <rustar genefull raw dir> \
+    --starsolo <starsolo genefull raw dir> \
+    --cellranger <cellranger raw dir> \
+    [--rustar-cells f.txt --starsolo-cells f.txt --cr-cells f.txt] \
+    --out compare_genefull.json
+"""
+import argparse
+import gzip
+import json
+import os
+import sys
+
+
+def _open(p):
+    return gzip.open(p, "rt") if p.endswith(".gz") else open(p)
+
+
+def _find(d, base):
+    for c in (base, base + ".gz"):
+        p = os.path.join(d, c)
+        if os.path.exists(p):
+            return p
+    raise FileNotFoundError(f"{base}[.gz] not in {d}")
+
+
+def load_raw(d):
+    """Return (barcodes list, dict cell_idx->total_umi, n_genes, total_umi).
+
+    Barcodes are normalized (10x '-1' gem-group suffix stripped) so they are
+    comparable across tools (CellRanger appends '-1', STARsolo/rustar do not)."""
+    bcs = [l.split("\t")[0].split("-")[0].strip() for l in _open(_find(d, "barcodes.tsv"))]
+    genes = [l.split("\t")[0].strip() for l in _open(_find(d, "features.tsv"))]
+    totals = {}
+    total_umi = 0
+    genes_seen = set()
+    with _open(_find(d, "matrix.mtx")) as fh:
+        for line in fh:
+            if line.startswith("%"):
+                continue
+            break  # first non-% line is the dims header; skip it
+        for line in fh:
+            g, c, v = line.split()[:3]
+            v = int(float(v))
+            if v == 0:
+                continue
+            ci = int(c) - 1
+            totals[ci] = totals.get(ci, 0) + v
+            total_umi += v
+            genes_seen.add(int(g) - 1)
+    return bcs, totals, len(genes), total_umi, len(genes_seen)
+
+
+def summarize(name, d):
+    bcs, totals, n_genes, total_umi, genes_detected = load_raw(d)
+    cells = sum(1 for v in totals.values() if v > 0)
+    print(f"[{name}] cells>0={cells:,}  total_UMI={total_umi:,}  "
+          f"genes_detected={genes_detected:,}/{n_genes:,}")
+    return {"name": name, "barcodes": bcs, "totals": totals,
+            "cells_gt0": cells, "total_umi": total_umi,
+            "genes_detected": genes_detected, "n_genes": n_genes}
+
+
+def pearson(xs, ys):
+    n = len(xs)
+    if n < 2:
+        return float("nan")
+    mx = sum(xs) / n
+    my = sum(ys) / n
+    sxy = sum((x - mx) * (y - my) for x, y in zip(xs, ys))
+    sxx = sum((x - mx) ** 2 for x in xs)
+    syy = sum((y - my) ** 2 for y in ys)
+    if sxx == 0 or syy == 0:
+        return float("nan")
+    return sxy / (sxx ** 0.5 * syy ** 0.5)
+
+
+def per_cell_corr(a, b):
+    """Per-cell UMI-total correlation over the shared barcode set."""
+    a_by_bc = {a["barcodes"][i]: t for i, t in a["totals"].items()}
+    b_by_bc = {b["barcodes"][i]: t for i, t in b["totals"].items()}
+    shared = sorted(set(a_by_bc) & set(b_by_bc))
+    xs = [a_by_bc[bc] for bc in shared]
+    ys = [b_by_bc[bc] for bc in shared]
+    r = pearson(xs, ys)
+    exact = sum(1 for x, y in zip(xs, ys) if x == y)
+    return {"shared_cells": len(shared), "pearson_r": r,
+            "exact_total_match": exact,
+            "exact_frac": exact / len(shared) if shared else float("nan")}
+
+
+def read_cells(p):
+    if not p or not os.path.exists(p):
+        return None
+    return set(l.split("\t")[0].strip() for l in _open(p))
+
+
+def jaccard(a, b):
+    if a is None or b is None:
+        return None
+    inter = len(a & b)
+    union = len(a | b)
+    return {"a": len(a), "b": len(b), "intersection": inter,
+            "jaccard": inter / union if union else float("nan"),
+            "a_only": len(a - b), "b_only": len(b - a)}
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--rustar", required=True)
+    ap.add_argument("--starsolo", required=True)
+    ap.add_argument("--cellranger", required=True)
+    ap.add_argument("--rustar-cells")
+    ap.add_argument("--starsolo-cells")
+    ap.add_argument("--cr-cells")
+    ap.add_argument("--out", default="compare_genefull.json")
+    a = ap.parse_args()
+
+    print("=== Part A: GeneFull raw count parity ===")
+    R = summarize("rustar-GeneFull", a.rustar)
+    S = summarize("STARsolo-GeneFull", a.starsolo)
+    C = summarize("CellRanger-raw", a.cellranger)
+
+    print("\n=== per-cell UMI-total correlation ===")
+    rs = per_cell_corr(R, S)
+    print(f"rustar vs STARsolo : shared={rs['shared_cells']:,}  r={rs['pearson_r']:.6f}  "
+          f"exact_total={rs['exact_frac']:.4%}")
+    rc = per_cell_corr(R, C)
+    print(f"rustar vs CellRgr  : shared={rc['shared_cells']:,}  r={rc['pearson_r']:.6f}")
+    sc = per_cell_corr(S, C)
+    print(f"STAR   vs CellRgr  : shared={sc['shared_cells']:,}  r={sc['pearson_r']:.6f}")
+
+    out = {
+        "raw": {k: {kk: v[kk] for kk in ("cells_gt0", "total_umi",
+                                          "genes_detected", "n_genes")}
+                for k, v in (("rustar", R), ("starsolo", S), ("cellranger", C))},
+        "corr": {"rustar_vs_starsolo": rs, "rustar_vs_cr": rc, "starsolo_vs_cr": sc},
+    }
+
+    rcells = read_cells(a.rustar_cells)
+    scells = read_cells(a.starsolo_cells)
+    ccells = read_cells(a.cr_cells)
+    if rcells or ccells:
+        print("\n=== Part B: filtered cell-set overlap (EmptyDrops / CellRanger) ===")
+        out["filtered"] = {}
+        if rcells and ccells:
+            j = jaccard(rcells, ccells)
+            print(f"rustar-ED vs CR-filtered : rustar={j['a']:,} CR={j['b']:,} "
+                  f"shared={j['intersection']:,} jaccard={j['jaccard']:.4f}")
+            out["filtered"]["rustar_vs_cr"] = j
+        if scells and ccells:
+            j = jaccard(scells, ccells)
+            print(f"STAR-ED   vs CR-filtered : star={j['a']:,} CR={j['b']:,} "
+                  f"shared={j['intersection']:,} jaccard={j['jaccard']:.4f}")
+            out["filtered"]["starsolo_vs_cr"] = j
+        if rcells and scells:
+            j = jaccard(rcells, scells)
+            print(f"rustar-ED vs STAR-ED     : jaccard={j['jaccard']:.4f}")
+            out["filtered"]["rustar_vs_starsolo"] = j
+
+    with open(a.out, "w") as fh:
+        json.dump(out, fh, indent=2, default=str)
+    print(f"\nwrote {a.out}")
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_genefull_h5_compare.py b/test/solo_genefull_h5_compare.py
new file mode 100644
index 0000000..d75e60b
--- /dev/null
+++ b/test/solo_genefull_h5_compare.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+"""GeneFull intron-gap + EmptyDrops-filtered h5 comparison (rustar vs CellRanger).
+
+Loads matrices one at a time (memory-careful), reports:
+  A. intron effect — rustar Gene vs GeneFull total UMI (same cells);
+  B. raw-count parity — rustar GeneFull vs CellRanger raw total UMI / genes;
+  C. cell-set agreement — rustar EmptyDrops cells vs CellRanger native filtered,
+     and rustar-ED vs CellRanger-raw+same-EmptyDrops (isolates algorithm);
+  D. per-cell UMI correlation on the shared filtered cells;
+  writes rustar.GeneFull.filtered.h5ad + CellRanger.filtered.h5ad.
+"""
+import argparse, gzip, json, os, sys
+import numpy as np, scipy.io, scipy.sparse as sp, anndata as ad, pandas as pd
+
+
+def _find(d, base):
+    for c in (base, base + ".gz"):
+        p = os.path.join(d, c)
+        if os.path.exists(p):
+            return p
+    raise FileNotFoundError(f"{base}[.gz] in {d}")
+
+
+def _open(p):
+    return gzip.open(p, "rt") if p.endswith(".gz") else open(p)
+
+
+def load(d):
+    mp = _find(d, "matrix.mtx")
+    h = gzip.open(mp, "rb") if mp.endswith(".gz") else open(mp, "rb")
+    with h:
+        X = sp.csr_matrix(scipy.io.mmread(h)).T.tocsr()  # cells x genes
+    bc = np.array([l.split("\t")[0].split("-")[0].strip() for l in _open(_find(d, "barcodes.tsv"))])
+    genes = np.array([l.split("\t")[0].strip() for l in _open(_find(d, "features.tsv"))])
+    return X, bc, genes
+
+
+def cellset(p):
+    with _open(p) as fh:
+        return set(l.split("\t")[0].split("-")[0].strip() for l in fh if l.strip())
+
+
+def revcomp(s):
+    return s.translate(str.maketrans("ACGT", "TGCA"))[::-1]
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--rustar-gene", required=True)
+    ap.add_argument("--rustar-genefull", required=True)
+    ap.add_argument("--cellranger-raw", required=True)
+    ap.add_argument("--rustar-ed-cells", required=True)
+    ap.add_argument("--cr-ed-cells", required=True)
+    ap.add_argument("--cr-native-cells", required=True)
+    ap.add_argument("--out", required=True)
+    a = ap.parse_args()
+    os.makedirs(a.out, exist_ok=True)
+    out = {}
+
+    # ---- A. intron effect: rustar Gene vs GeneFull ----
+    Xg, bcg, _ = load(a.rustar_gene)
+    tot_gene = int(Xg.sum())
+    g_by_bc = dict(zip(bcg, np.asarray(Xg.sum(1)).ravel()))
+    del Xg
+    Xf, bcf, genes_f = load(a.rustar_genefull)
+    tot_genefull = int(Xf.sum())
+    f_by_bc = dict(zip(bcf, np.asarray(Xf.sum(1)).ravel()))
+    print(f"[A] intron effect (all barcodes):")
+    print(f"    rustar Gene     total UMI = {tot_gene:,}")
+    print(f"    rustar GeneFull total UMI = {tot_genefull:,}  "
+          f"(+{100*(tot_genefull-tot_gene)/tot_gene:.1f}%)")
+    out["intron_effect"] = {"gene_total_umi": tot_gene, "genefull_total_umi": tot_genefull,
+                            "pct_increase": round(100*(tot_genefull-tot_gene)/tot_gene, 2)}
+
+    # ---- B. raw parity: rustar GeneFull vs CellRanger raw ----
+    Xc, bcc, genes_c = load(a.cellranger_raw)
+    # 5' chemistry: CellRanger may report RC barcodes — detect against rustar.
+    rust_set = set(bcf)
+    ov_plain = len(set(bcc) & rust_set)
+    bcc_rc = np.array([revcomp(b) for b in bcc])
+    ov_rc = len(set(bcc_rc) & rust_set)
+    if ov_rc > ov_plain:
+        bcc = bcc_rc
+        cr_orient = "reverse-complement"
+    else:
+        cr_orient = "as-reported"
+    print(f"\n[B] CellRanger barcode orientation vs rustar: {cr_orient} "
+          f"(overlap plain={ov_plain:,} rc={ov_rc:,})")
+    tot_cr = int(Xc.sum())
+    print(f"    rustar GeneFull raw total UMI = {tot_genefull:,}, genes={ (np.asarray(Xf.sum(0)).ravel()>0).sum():,}")
+    print(f"    CellRanger      raw total UMI = {tot_cr:,}, genes={ (np.asarray(Xc.sum(0)).ravel()>0).sum():,}")
+    c_by_bc = dict(zip(bcc, np.asarray(Xc.sum(1)).ravel()))
+    out["raw_parity"] = {"rustar_genefull_total_umi": tot_genefull, "cellranger_total_umi": tot_cr,
+                         "cr_orientation": cr_orient}
+
+    # ---- C. cell-set agreement ----
+    r_ed = cellset(a.rustar_ed_cells)
+    cr_ed = cellset(a.cr_ed_cells)
+    cr_nat = cellset(a.cr_native_cells)
+    if cr_orient == "reverse-complement":
+        cr_ed = {revcomp(b) for b in cr_ed}
+        cr_nat = {revcomp(b) for b in cr_nat}
+
+    def jac(x, y):
+        i, u = len(x & y), len(x | y)
+        return {"a": len(x), "b": len(y), "shared": i, "jaccard": round(i/u, 4) if u else None,
+                "a_only": len(x - y), "b_only": len(y - x)}
+
+    print("\n[C] cell-set agreement:")
+    out["cell_sets"] = {}
+    for label, x, y in [("rustar-ED vs CR-raw-ED (same algo)", r_ed, cr_ed),
+                        ("rustar-ED vs CR-native-filtered", r_ed, cr_nat),
+                        ("CR-raw-ED vs CR-native-filtered", cr_ed, cr_nat)]:
+        j = jac(x, y)
+        print(f"    {label:<38}: a={j['a']:,} b={j['b']:,} shared={j['shared']:,} "
+              f"jaccard={j['jaccard']}")
+        out["cell_sets"][label] = j
+
+    # ---- D. per-cell UMI correlation on shared (rustar-ED ∩ CR-native) ----
+    shared = sorted(r_ed & cr_nat)
+    xs = [f_by_bc.get(b, 0) for b in shared]
+    ys = [c_by_bc.get(b, 0) for b in shared]
+    if len(shared) > 2:
+        r = float(np.corrcoef(xs, ys)[0, 1])
+        print(f"\n[D] per-cell UMI corr (rustar GeneFull vs CR raw) on {len(shared):,} shared "
+              f"filtered cells: r={r:.4f}")
+        out["per_cell_corr"] = {"shared_cells": len(shared), "pearson_r": round(r, 4)}
+
+    # ---- write filtered h5ad ----
+    def write_h5ad(name, X, bc, genes, keep_set):
+        keep = np.array([b in keep_set for b in bc])
+        Xk = X[keep]
+        A = ad.AnnData(X=Xk, obs=pd.DataFrame(index=bc[keep]), var=pd.DataFrame(index=genes))
+        A.obs["n_umi"] = np.asarray(Xk.sum(1)).ravel()
+        A.obs["n_genes"] = np.asarray((Xk > 0).sum(1)).ravel()
+        p = os.path.join(a.out, f"{name}.h5ad")
+        A.write_h5ad(p)
+        print(f"    wrote {p}  ({A.n_obs:,} cells)")
+        return A.n_obs
+
+    print("\n[E] writing EmptyDrops-filtered h5ad:")
+    out["h5ad"] = {
+        "rustar_genefull_ed": write_h5ad("rustar.GeneFull.emptydrops", Xf, bcf, genes_f, r_ed),
+        "cellranger_native": write_h5ad("CellRanger.filtered", Xc, bcc, genes_c, cr_nat),
+    }
+
+    with open(os.path.join(a.out, "genefull_h5_compare.json"), "w") as fh:
+        json.dump(out, fh, indent=2, default=str)
+    print(f"\nwrote {a.out}/genefull_h5_compare.json")
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_sj_multi_compare.py b/test/solo_sj_multi_compare.py
new file mode 100644
index 0000000..75fd9b5
--- /dev/null
+++ b/test/solo_sj_multi_compare.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+"""Diff rustar vs STARsolo SJ-feature and --soloMultiMappers matrices.
+
+Both tools index barcodes by the same sorted whitelist (columns align directly)
+and genes by the same GTF order (Gene rows align). SJ junctions differ per tool
+(each has its own SJ.out.tab), so SJ rows are matched by (chr,start,end).
+
+Reports, per matrix: shared rows/cols, total counts, Pearson r over shared
+entries, and the fraction of shared entries that match exactly.
+
+Usage:
+  solo_sj_multi_compare.py --rustar <Solo.out> --starsolo <Solo.out>
+"""
+import argparse
+import gzip
+import os
+import sys
+
+import numpy as np
+import scipy.io
+import scipy.sparse as sp
+
+
+def _open(p):
+    return gzip.open(p, "rb") if p.endswith(".gz") else open(p, "rb")
+
+
+def _find(d, base):
+    for c in (base, base + ".gz"):
+        p = os.path.join(d, c)
+        if os.path.exists(p):
+            return p
+    return None
+
+
+def load_mtx(d, name="matrix.mtx"):
+    p = _find(d, name)
+    if p is None:
+        return None
+    with _open(p) as fh:
+        m = scipy.io.mmread(fh).tocsr()  # features x barcodes
+    return m
+
+
+def load_features_keys(d):
+    """SJ features.tsv → list of (chr, start, end) per row. STARsolo symlinks
+    features.tsv → SJ.out.tab (run root); fall back to that if the symlink is
+    broken (it points at the in-container path)."""
+    p = _find(d, "features.tsv")
+    if p is None or not os.path.exists(p):
+        # d = .../Solo.out/SJ/raw → run root is three levels up.
+        alt = os.path.join(d, "..", "..", "..", "SJ.out.tab")
+        p = alt if os.path.exists(alt) else None
+    keys = []
+    op = gzip.open(p, "rt") if p.endswith(".gz") else open(p)
+    with op as fh:
+        for line in fh:
+            f = line.rstrip("\n").split("\t")
+            keys.append((f[0], f[1], f[2]))
+    return keys
+
+
+def compare_aligned(name, A, B):
+    """A, B are features×barcodes with identical row+col indexing."""
+    if A is None or B is None:
+        print(f"[{name}] missing matrix"); return
+    r = min(A.shape[0], B.shape[0])
+    c = min(A.shape[1], B.shape[1])
+    A = A[:r, :c]
+    B = B[:r, :c]
+    da = np.asarray(A.sum()); db = np.asarray(B.sum())
+    # union of nonzero coords
+    U = (A != 0).astype(np.int8) + (B != 0).astype(np.int8)
+    coo = U.tocoo()
+    av = np.asarray(A[coo.row, coo.col]).ravel()
+    bv = np.asarray(B[coo.row, coo.col]).ravel()
+    rr = np.corrcoef(av, bv)[0, 1] if len(av) > 1 else float("nan")
+    exact = np.mean(np.isclose(av, bv, atol=1e-4)) if len(av) else float("nan")
+    print(f"[{name}] rustar_total={float(da):,.1f} star_total={float(db):,.1f} "
+          f"shared_entries={len(av):,} r={rr:.5f} exact={exact:.4%}")
+
+
+def compare_sj(rdir, sdir):
+    ra = load_mtx(rdir); sa = load_mtx(sdir)
+    if ra is None or sa is None:
+        print("[SJ] missing matrix"); return
+    rk = load_features_keys(rdir); sk = load_features_keys(sdir)
+    print(f"[SJ] rustar junctions={len(rk):,} star junctions={len(sk):,}")
+    sidx = {k: i for i, k in enumerate(sk)}
+    shared = [(i, sidx[k]) for i, k in enumerate(rk) if k in sidx]
+    print(f"[SJ] shared junctions (by chr/start/end) = {len(shared):,} "
+          f"({len(shared)/max(len(rk),1):.1%} of rustar)")
+    if not shared:
+        return
+    rrows = [i for i, _ in shared]
+    srows = [j for _, j in shared]
+    c = min(ra.shape[1], sa.shape[1])
+    Rm = ra[rrows, :c]
+    Sm = sa[srows, :c]
+    U = (Rm != 0).astype(np.int8) + (Sm != 0).astype(np.int8)
+    coo = U.tocoo()
+    av = np.asarray(Rm[coo.row, coo.col]).ravel()
+    bv = np.asarray(Sm[coo.row, coo.col]).ravel()
+    rr = np.corrcoef(av, bv)[0, 1] if len(av) > 1 else float("nan")
+    exact = np.mean(av == bv) if len(av) else float("nan")
+    print(f"[SJ] on shared junctions: rustar_total={float(Rm.sum()):,} "
+          f"star_total={float(Sm.sum()):,} shared_entries={len(av):,} "
+          f"r={rr:.5f} exact={exact:.4%}")
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--rustar", required=True, help="rustar Solo.out dir")
+    ap.add_argument("--starsolo", required=True, help="STARsolo Solo.out dir")
+    a = ap.parse_args()
+
+    rg = os.path.join(a.rustar, "Gene", "raw")
+    sg = os.path.join(a.starsolo, "Gene", "raw")
+    print("=== Gene (unique) matrix sanity ===")
+    compare_aligned("Gene", load_mtx(rg), load_mtx(sg))
+
+    print("\n=== UniqueAndMult (--soloMultiMappers) ===")
+    for method in ("Uniform", "PropUnique", "Rescue", "EM"):
+        fn = f"UniqueAndMult-{method}.mtx"
+        compare_aligned(method, load_mtx(rg, fn), load_mtx(sg, fn))
+
+    print("\n=== SJ feature ===")
+    compare_sj(os.path.join(a.rustar, "SJ", "raw"),
+               os.path.join(a.starsolo, "SJ", "raw"))
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/test/solo_summary_compare.py b/test/solo_summary_compare.py
new file mode 100644
index 0000000..4b880b0
--- /dev/null
+++ b/test/solo_summary_compare.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""Cross-compare CellRanger-style summary metrics across rustar / STARsolo /
+CellRanger.
+
+rustar and STARsolo emit `Solo.out/<feature>/Summary.csv` (key,value with
+fractions in [0,1]); CellRanger emits `metrics_summary.csv` (one header row + one
+value row, percentages like "53.5%" and comma-grouped integers). This pulls the
+shared metrics into one table — genome/exon/intron/intergenic mapping rates plus
+per-cell UMI/gene stats.
+
+Usage:
+  solo_summary_compare.py \
+    --rustar    <rustar Solo.out/GeneFull/Summary.csv> \
+    --starsolo  <STARsolo Solo.out/GeneFull/Summary.csv> \
+    --cellranger <outs/metrics_summary.csv>
+"""
+import argparse
+import csv
+import sys
+
+
+def load_summary_csv(path):
+    """rustar/STARsolo Summary.csv -> {key: float-or-int}."""
+    d = {}
+    with open(path) as fh:
+        for line in fh:
+            if "," not in line:
+                continue
+            k, v = line.rstrip("\n").split(",", 1)
+            try:
+                d[k] = float(v) if "." in v else int(v)
+            except ValueError:
+                d[k] = v
+    return d
+
+
+def load_cr_metrics(path):
+    """CellRanger metrics_summary.csv -> {key: float} (percents -> fraction)."""
+    with open(path) as fh:
+        rows = list(csv.reader(fh))
+    keys, vals = rows[0], rows[1]
+    out = {}
+    for k, v in zip(keys, vals):
+        v = v.strip()
+        if v.endswith("%"):
+            out[k] = float(v[:-1]) / 100.0
+        else:
+            try:
+                out[k] = float(v.replace(",", ""))
+            except ValueError:
+                out[k] = v
+    return out
+
+
+def fmt_pct(x):
+    return f"{x*100:.1f}%" if isinstance(x, (int, float)) else str(x)
+
+
+def fmt_int(x):
+    return f"{int(x):,}" if isinstance(x, (int, float)) else str(x)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--rustar", required=True, help="rustar GeneFull Summary.csv")
+    ap.add_argument("--starsolo", required=True, help="STARsolo GeneFull Summary.csv")
+    ap.add_argument("--cellranger", required=True, help="CellRanger metrics_summary.csv")
+    ap.add_argument("--feature", default="GeneFull")
+    a = ap.parse_args()
+
+    R = load_summary_csv(a.rustar)
+    S = load_summary_csv(a.starsolo)
+    C = load_cr_metrics(a.cellranger)
+    f = a.feature
+
+    # (label, rustar key, starsolo key, cellranger key, formatter)
+    pct = fmt_pct
+    intg = fmt_int
+    rows = [
+        ("Valid barcodes", "Reads With Valid Barcodes", "Reads With Valid Barcodes", "Valid Barcodes", pct),
+        ("Sequencing saturation", "Sequencing Saturation", "Sequencing Saturation", "Sequencing Saturation", pct),
+        ("Reads mapped to genome (U+M)", "Reads Mapped to Genome: Unique+Multiple", "Reads Mapped to Genome: Unique+Multiple", "Reads Mapped to Genome", pct),
+        ("  ... exonic", "Reads Mapped Confidently to Exonic Regions", None, "Reads Mapped Confidently to Exonic Regions", pct),
+        ("  ... intronic", "Reads Mapped Confidently to Intronic Regions", None, "Reads Mapped Confidently to Intronic Regions", pct),
+        ("  ... intergenic", "Reads Mapped Confidently to Intergenic Regions", None, "Reads Mapped Confidently to Intergenic Regions", pct),
+        ("Reads antisense to gene", "Reads Mapped Antisense to Gene", None, "Reads Mapped Antisense to Gene", pct),
+        ("Estimated number of cells", "Estimated Number of Cells", "Estimated Number of Cells", "Estimated Number of Cells", intg),
+        ("Mean reads / cell", "Mean Reads per Cell", "Mean Reads per Cell", "Mean Reads per Cell", intg),
+        (f"Median genes / cell", f"Median {f} per Cell", f"Median {f} per Cell", "Median Genes per Cell", intg),
+        ("Median UMI / cell", "Median UMI per Cell", "Median UMI per Cell", "Median UMI Counts per Cell", intg),
+        ("Total genes detected", f"Total {f} Detected", f"Total {f} Detected", "Total Genes Detected", intg),
+        ("Fraction reads in cells", "Fraction of Unique Reads in Cells", "Fraction of Unique Reads in Cells", "Fraction Reads in Cells", pct),
+    ]
+
+    w = 34
+    print(f"\nCross-tool summary ({f} for rustar/STARsolo; CellRanger raw is intron-inclusive)\n")
+    print(f"{'metric':<{w}}{'rustar':>14}{'STARsolo':>14}{'CellRanger':>14}")
+    print("-" * (w + 42))
+    for label, rk, sk, ck, fn in rows:
+        rv = fn(R.get(rk)) if rk and rk in R else "—"
+        sv = fn(S.get(sk)) if sk and sk in S else "—"
+        cv = fn(C.get(ck)) if ck and ck in C else "—"
+        print(f"{label:<{w}}{rv:>14}{sv:>14}{cv:>14}")
+    print()
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/tests/alignment_features.rs b/tests/alignment_features.rs
index 27e3a8d..28525c6 100644
--- a/tests/alignment_features.rs
+++ b/tests/alignment_features.rs
@@ -879,3 +879,792 @@ fn test_bare_dot_prefix_is_literal_string() {
     }
     assert!(count >= 1, "expected at least 1 BAM record, got {count}");
 }
+
+// ---------------------------------------------------------------------------
+// Test 9 — STARsolo (Phase 14.1–14.4): barcode parse, CB match, gene assign,
+// UMI dedup, raw count-matrix output
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_starsolo_gene_matrix() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    // cDNA reads (R2): 50 bp from Exon1 of gene G1 (genome[10000..10050]),
+    // so each maps uniquely on the + strand inside G1 → Forward sense.
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let barcode_path = tmpdir.path().join("barcode.fq");
+    let wl_path = tmpdir.path().join("whitelist.txt");
+
+    let cb = "AAAACCCCGGGGTTTT"; // 16 bp, sorts first in the whitelist
+    // 8 reads, one cell, two well-separated UMI clouds (Hamming distance 10
+    // apart, 4 reads each) → 1MM_All collapses each cloud to 1 molecule → 2.
+    let umi_a = "ACGTACGTAC";
+    let umi_b = "TGCATGCATG";
+    let n_reads = 8usize;
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&barcode_path).unwrap();
+        let exon1 = &genome[10000..10050];
+        for i in 0..n_reads {
+            writeln!(cf, "@read{i}").unwrap();
+            cf.write_all(exon1).unwrap();
+            writeln!(cf, "\n+\n{}", "I".repeat(50)).unwrap();
+
+            let umi = if i < 4 { umi_a } else { umi_b };
+            writeln!(bf, "@read{i}").unwrap();
+            writeln!(bf, "{cb}{umi}").unwrap();
+            writeln!(bf, "+\n{}", "I".repeat(26)).unwrap();
+        }
+    }
+    {
+        let mut wf = fs::File::create(&wl_path).unwrap();
+        writeln!(wf, "{cb}").unwrap();
+        writeln!(wf, "CCCCGGGGTTTTAAAA").unwrap(); // decoys
+        writeln!(wf, "GGGGTTTTAAAACCCC").unwrap();
+    }
+
+    let output_dir = tmpdir.path().join("out_solo");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+
+    let assert = cargo_bin_cmd!("rustar-aligner")
+        .env("RUST_LOG", "info")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            barcode_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Simple",
+            "--soloCBwhitelist",
+            wl_path.to_str().unwrap(),
+            "--soloFeatures",
+            "Gene",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    // cDNA alignments are emitted like a normal SE run.
+    let sam_path = output_dir.join("Aligned.out.sam");
+    assert!(sam_path.exists(), "Aligned.out.sam not found");
+    assert!(
+        count_sam_records(&sam_path) >= n_reads,
+        "expected >= {n_reads} cDNA alignment records"
+    );
+
+    // 8 reads collected, all exact CB matches.
+    let stderr = String::from_utf8_lossy(&assert.get_output().stderr).to_string();
+    assert!(
+        stderr.contains("collected 8 resolved"),
+        "expected 8 resolved solo records in log, stderr was:\n{stderr}"
+    );
+    assert!(
+        stderr.contains("exact=8"),
+        "expected 8 exact CB matches in log, stderr was:\n{stderr}"
+    );
+
+    // Raw matrix output.
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    let features = fs::read_to_string(raw.join("features.tsv")).unwrap();
+    let barcodes = fs::read_to_string(raw.join("barcodes.tsv")).unwrap();
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+
+    // One gene G1 with a name column + feature type.
+    assert_eq!(features.lines().count(), 1);
+    assert!(
+        features.starts_with("G1\tG1\tGene Expression"),
+        "unexpected features.tsv:\n{features}"
+    );
+    // Three whitelist barcodes; the assayed CB sorts first.
+    assert_eq!(barcodes.lines().count(), 3);
+    assert_eq!(barcodes.lines().next().unwrap(), cb);
+
+    // MatrixMarket: header, dims "1 3 1" (1 gene × 3 barcodes, 1 entry),
+    // single entry "1 1 2" (gene 1, cell 1, 2 deduped molecules).
+    let mtx_lines: Vec<&str> = matrix.lines().collect();
+    assert!(
+        mtx_lines[0].starts_with("%%MatrixMarket matrix coordinate integer general"),
+        "unexpected mtx banner: {}",
+        mtx_lines[0]
+    );
+    let dims = mtx_lines.iter().find(|l| !l.starts_with('%')).unwrap();
+    assert_eq!(*dims, "1 3 1", "unexpected matrix dimensions");
+    let entry = mtx_lines.last().unwrap();
+    assert_eq!(
+        *entry, "1 1 2",
+        "expected 2 deduped molecules for G1 in cell 1"
+    );
+
+    // The default --soloCellFilter (CellRanger2.2) also writes a filtered/ matrix
+    // containing only the called cell (the one assayed barcode), column-renumbered.
+    let filt = output_dir.join("Solo.out").join("Gene").join("filtered");
+    let f_barcodes = fs::read_to_string(filt.join("barcodes.tsv")).unwrap();
+    assert_eq!(f_barcodes.lines().count(), 1, "expected 1 filtered cell");
+    assert_eq!(f_barcodes.lines().next().unwrap(), cb);
+    let f_matrix = fs::read_to_string(filt.join("matrix.mtx")).unwrap();
+    let f_dims = f_matrix.lines().find(|l| !l.starts_with('%')).unwrap();
+    assert_eq!(f_dims, "1 1 1", "unexpected filtered matrix dimensions");
+    assert_eq!(f_matrix.lines().last().unwrap(), "1 1 2");
+
+    // A CellRanger-style summary is written per feature.
+    let summary =
+        fs::read_to_string(output_dir.join("Solo.out").join("Gene").join("Summary.csv")).unwrap();
+    assert!(
+        summary.contains("Estimated Number of Cells,1"),
+        "summary:\n{summary}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 9b — STARsolo SJ (splice-junction) feature
+//
+// Spliced cDNA reads (last 25 bp of Exon1 + first 25 bp of Exon2) cross the
+// planted GT-AG intron, producing one junction. --soloFeatures SJ must write a
+// Solo.out/SJ/raw matrix whose features.tsv equals SJ.out.tab and whose single
+// junction row carries the deduped molecule count for the one cell.
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_sj_feature() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let barcode_path = tmpdir.path().join("barcode.fq");
+    let wl_path = tmpdir.path().join("whitelist.txt");
+    let cb = "AAAACCCCGGGGTTTT";
+    let umi = "ACGTACGTAC";
+    // Spliced read: 25 bp from end of Exon1 + 25 bp from start of Exon2, which
+    // aligns across the intron [10050,10250) → one GT-AG junction.
+    let mut spliced = genome[10025..10050].to_vec();
+    spliced.extend_from_slice(&genome[10250..10275]);
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&barcode_path).unwrap();
+        for i in 0..6 {
+            writeln!(cf, "@r{i}").unwrap();
+            cf.write_all(&spliced).unwrap();
+            writeln!(cf, "\n+\n{}", "I".repeat(50)).unwrap();
+            writeln!(bf, "@r{i}\n{cb}{umi}\n+\n{}", "I".repeat(26)).unwrap();
+        }
+        let mut wf = fs::File::create(&wl_path).unwrap();
+        writeln!(wf, "{cb}\nCCCCGGGGTTTTAAAA\nGGGGTTTTAAAACCCC").unwrap();
+    }
+
+    let output_dir = tmpdir.path().join("out_sj");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            barcode_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Simple",
+            "--soloCBwhitelist",
+            wl_path.to_str().unwrap(),
+            "--soloFeatures",
+            "Gene",
+            "SJ",
+            "--soloStrand",
+            "Forward",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let sj_raw = output_dir.join("Solo.out").join("SJ").join("raw");
+    let features = fs::read_to_string(sj_raw.join("features.tsv")).unwrap();
+    let sj_tab = fs::read_to_string(output_dir.join("SJ.out.tab")).unwrap();
+    // SJ feature file mirrors SJ.out.tab and contains exactly the one junction.
+    assert_eq!(features, sj_tab, "SJ features.tsv must equal SJ.out.tab");
+    assert_eq!(features.lines().count(), 1, "expected one junction");
+    assert!(
+        features.starts_with("chr1\t10051\t10250\t"),
+        "unexpected junction: {features}"
+    );
+    // Matrix: 1 junction × 3 barcodes, single entry "1 1 1" (one deduped molecule
+    // — all 6 reads share one UMI in one cell).
+    let matrix = fs::read_to_string(sj_raw.join("matrix.mtx")).unwrap();
+    let dims = matrix.lines().find(|l| !l.starts_with('%')).unwrap();
+    assert_eq!(dims, "1 3 1", "unexpected SJ matrix dims");
+    assert_eq!(matrix.lines().last().unwrap(), "1 1 1");
+}
+
+// ---------------------------------------------------------------------------
+// Test 9c — STARsolo --soloMultiMappers (gene-ambiguous distribution)
+//
+// G1 and G3 share Exon1 (so a read there is ambiguous {G1,G3}); G2 has Exon2.
+// One cell has a unique G2 molecule + one ambiguous {G1,G3} molecule. The unique
+// matrix counts only G2; UniqueAndMult-Uniform spreads the ambiguous molecule
+// 0.5/0.5 to G1 and G3 while keeping G2 at 1.
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_multimappers() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    // GTF order: G1, G3 (both Exon1), G2 (Exon2) → gene indices 0,1,2.
+    let gtf = tmpdir.path().join("multi.gtf");
+    {
+        let mut f = fs::File::create(&gtf).unwrap();
+        for g in ["G1", "G3"] {
+            writeln!(
+                f,
+                "chr1\tt\texon\t10001\t10050\t.\t+\t.\tgene_id \"{g}\"; transcript_id \"{g}t\";"
+            )
+            .unwrap();
+        }
+        writeln!(
+            f,
+            "chr1\tt\texon\t10251\t10300\t.\t+\t.\tgene_id \"G2\"; transcript_id \"G2t\";"
+        )
+        .unwrap();
+    }
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let barcode_path = tmpdir.path().join("barcode.fq");
+    let wl_path = tmpdir.path().join("whitelist.txt");
+    let cb = "AAAACCCCGGGGTTTT";
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&barcode_path).unwrap();
+        // 4 reads in Exon2 → unique G2 (UMI a); 4 reads in Exon1 → ambiguous (UMI b).
+        let exon2 = &genome[10250..10300];
+        let exon1 = &genome[10000..10050];
+        for (i, (seq, umi)) in [(exon2, "ACGTACGTAC"), (exon1, "TGCATGCATG")]
+            .iter()
+            .flat_map(|x| std::iter::repeat_n(*x, 4))
+            .enumerate()
+        {
+            writeln!(cf, "@r{i}").unwrap();
+            cf.write_all(seq).unwrap();
+            writeln!(cf, "\n+\n{}", "I".repeat(50)).unwrap();
+            writeln!(bf, "@r{i}\n{cb}{umi}\n+\n{}", "I".repeat(26)).unwrap();
+        }
+        let mut wf = fs::File::create(&wl_path).unwrap();
+        writeln!(wf, "{cb}\nCCCCGGGGTTTTAAAA\nGGGGTTTTAAAACCCC").unwrap();
+    }
+
+    let output_dir = tmpdir.path().join("out_mm");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            barcode_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Simple",
+            "--soloCBwhitelist",
+            wl_path.to_str().unwrap(),
+            "--soloFeatures",
+            "Gene",
+            "--soloStrand",
+            "Forward",
+            "--soloMultiMappers",
+            "Uniform",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    // Unique matrix: only G2 (gene index 2 → row 3), count 1.
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+    assert_eq!(
+        matrix.lines().last().unwrap(),
+        "3 1 1",
+        "unique matrix:\n{matrix}"
+    );
+    // UniqueAndMult-Uniform: G1=0.5, G3=0.5, G2=1.
+    let um = fs::read_to_string(raw.join("UniqueAndMult-Uniform.mtx")).unwrap();
+    assert!(um.contains("coordinate real general"), "um header:\n{um}");
+    let rows: Vec<&str> = um.lines().filter(|l| !l.starts_with('%')).skip(1).collect();
+    assert!(rows.contains(&"1 1 0.50000"), "expected G1 0.5, got:\n{um}");
+    assert!(rows.contains(&"2 1 0.50000"), "expected G3 0.5, got:\n{um}");
+    assert!(rows.contains(&"3 1 1"), "expected G2 1, got:\n{um}");
+}
+
+// ---------------------------------------------------------------------------
+// Test 9d — STARsolo SmartSeq (plate-based, manifest, no UMI)
+//
+// Two "cells" (manifest entries) of Exon1 reads → gene G1. With no UMIs each read
+// is a count, so the matrix is G1 × {CellA,CellB} = read counts (5, 3).
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_smartseq() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let exon1 = &genome[10000..10050];
+    let write_cell = |name: &str, n: usize| -> PathBuf {
+        let p = tmpdir.path().join(name);
+        let mut f = fs::File::create(&p).unwrap();
+        for i in 0..n {
+            writeln!(f, "@{name}_{i}").unwrap();
+            f.write_all(exon1).unwrap();
+            writeln!(f, "\n+\n{}", "I".repeat(50)).unwrap();
+        }
+        p
+    };
+    let a = write_cell("cellA.fq", 5);
+    let b = write_cell("cellB.fq", 3);
+    let manifest = tmpdir.path().join("manifest.tsv");
+    fs::write(
+        &manifest,
+        format!("{}\t-\tCellA\n{}\t-\tCellB\n", a.display(), b.display()),
+    )
+    .unwrap();
+
+    let output_dir = tmpdir.path().join("out_ss");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--soloType",
+            "SmartSeq",
+            "--readFilesManifest",
+            manifest.to_str().unwrap(),
+            "--soloStrand",
+            "Forward",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    let barcodes = fs::read_to_string(raw.join("barcodes.tsv")).unwrap();
+    assert_eq!(barcodes, "CellA\nCellB\n");
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+    let dims = matrix.lines().find(|l| !l.starts_with('%')).unwrap();
+    assert_eq!(dims, "1 2 2", "SmartSeq matrix dims:\n{matrix}");
+    let entries: Vec<&str> = matrix
+        .lines()
+        .filter(|l| !l.starts_with('%'))
+        .skip(1)
+        .collect();
+    assert!(entries.contains(&"1 1 5"), "expected CellA G1=5:\n{matrix}");
+    assert!(entries.contains(&"1 2 3"), "expected CellB G1=3:\n{matrix}");
+}
+
+// ---------------------------------------------------------------------------
+// Test 9d-PE — STARsolo SmartSeq paired-end (fragment counts)
+//
+// One cell, 4 read pairs: mate1 in Exon1, mate2 in (reverse-complement) Exon2 →
+// a proper FR pair on gene G1. Each fragment is counted once (no UMI) → G1 = 4.
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_smartseq_paired() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let r1_path = tmpdir.path().join("r1.fq");
+    let r2_path = tmpdir.path().join("r2.fq");
+    let mate1 = &genome[10000..10050]; // Exon1, forward
+    let mate2 = rc(&genome[10250..10300]); // Exon2, reverse-complement (FR mate)
+    {
+        let mut f1 = fs::File::create(&r1_path).unwrap();
+        let mut f2 = fs::File::create(&r2_path).unwrap();
+        for i in 0..4 {
+            writeln!(f1, "@p{i}").unwrap();
+            f1.write_all(mate1).unwrap();
+            writeln!(f1, "\n+\n{}", "I".repeat(50)).unwrap();
+            writeln!(f2, "@p{i}").unwrap();
+            f2.write_all(&mate2).unwrap();
+            writeln!(f2, "\n+\n{}", "I".repeat(50)).unwrap();
+        }
+    }
+    let manifest = tmpdir.path().join("manifest.tsv");
+    fs::write(
+        &manifest,
+        format!("{}\t{}\tCellPE\n", r1_path.display(), r2_path.display()),
+    )
+    .unwrap();
+
+    let output_dir = tmpdir.path().join("out_sspe");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--soloType",
+            "SmartSeq",
+            "--readFilesManifest",
+            manifest.to_str().unwrap(),
+            "--soloStrand",
+            "Unstranded",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+    let dims = matrix.lines().find(|l| !l.starts_with('%')).unwrap();
+    // One gene (G1) × one cell; 4 fragments counted.
+    assert_eq!(dims, "1 1 1", "PE SmartSeq matrix dims:\n{matrix}");
+    assert_eq!(
+        matrix.lines().last().unwrap(),
+        "1 1 4",
+        "expected G1=4 fragments:\n{matrix}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 9f — STARsolo Velocyto (spliced / unspliced / ambiguous)
+//
+// Three reads on gene G1, one per category: a junction-spanning read (spliced),
+// a purely intronic read (unspliced), and a wholly-exonic read with no junction
+// (ambiguous, per Sullivan 2025). Distinct UMIs → one molecule in each matrix.
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_velocyto() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let bc_path = tmpdir.path().join("bc.fq");
+    let wl_path = tmpdir.path().join("whitelist.txt");
+    let cb = "AAAACCCCGGGGTTTT";
+    // category → cDNA read + a distinct (non-homopolymer) 12 bp UMI.
+    let mut spliced = genome[10025..10050].to_vec(); // Exon1 end ...
+    spliced.extend_from_slice(&genome[10250..10275]); // ... + Exon2 start → junction
+    let reads: [(Vec<u8>, &str); 3] = [
+        (spliced, "ACGTACGTACGT"),                       // spliced
+        (genome[10100..10150].to_vec(), "TGCATGCATGCA"), // intronic → unspliced
+        (genome[10000..10050].to_vec(), "GATCGATCGATC"), // exonic, no junction → ambiguous
+    ];
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&bc_path).unwrap();
+        for (i, (seq, umi)) in reads.iter().enumerate() {
+            writeln!(cf, "@r{i}").unwrap();
+            cf.write_all(seq).unwrap();
+            writeln!(cf, "\n+\n{}", "I".repeat(seq.len())).unwrap();
+            writeln!(bf, "@r{i}\n{cb}{umi}\n+\n{}", "I".repeat(28)).unwrap();
+        }
+        fs::write(&wl_path, format!("{cb}\nCCCCGGGGTTTTAAAA\n")).unwrap();
+    }
+
+    let output_dir = tmpdir.path().join("out_velo");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            bc_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Simple",
+            "--soloCBwhitelist",
+            wl_path.to_str().unwrap(),
+            "--soloCBstart",
+            "1",
+            "--soloCBlen",
+            "16",
+            "--soloUMIstart",
+            "17",
+            "--soloUMIlen",
+            "12",
+            "--soloFeatures",
+            "Velocyto",
+            "--soloStrand",
+            "Forward",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Velocyto").join("raw");
+    // Each category matrix holds exactly its one molecule for G1 (row 1, col 1).
+    for name in ["spliced", "unspliced", "ambiguous"] {
+        let m = fs::read_to_string(raw.join(format!("{name}.mtx"))).unwrap();
+        assert_eq!(
+            m.lines().last().unwrap(),
+            "1 1 1",
+            "{name}.mtx should have G1=1:\n{m}"
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 9e — STARsolo CB_UMI_Complex (multi-segment barcode)
+//
+// Barcode read layout: seg1(2bp) + linker(2bp) + seg2(2bp) + UMI(2bp). The cell
+// barcode is seg1++seg2 matched against the cartesian product of two segment
+// whitelists. All reads share CB=AAGG / UMI=AT → one molecule for gene G1.
+// ---------------------------------------------------------------------------
+#[test]
+fn test_starsolo_cb_umi_complex() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let bc_path = tmpdir.path().join("bc.fq");
+    let wl1 = tmpdir.path().join("wl1.txt");
+    let wl2 = tmpdir.path().join("wl2.txt");
+    fs::write(&wl1, "AA\nCC\n").unwrap(); // seg1 whitelist
+    fs::write(&wl2, "GG\nTT\n").unwrap(); // seg2 whitelist
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&bc_path).unwrap();
+        let exon1 = &genome[10000..10050];
+        for i in 0..4 {
+            writeln!(cf, "@r{i}").unwrap();
+            cf.write_all(exon1).unwrap();
+            writeln!(cf, "\n+\n{}", "I".repeat(50)).unwrap();
+            // seg1=AA, linker=CC, seg2=GG, UMI=AT → CB "AAGG", UMI "AT".
+            writeln!(bf, "@r{i}\nAACCGGAT\n+\nIIIIIIII").unwrap();
+        }
+    }
+
+    let output_dir = tmpdir.path().join("out_cx");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            bc_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Complex",
+            "--soloCBwhitelist",
+            wl1.to_str().unwrap(),
+            wl2.to_str().unwrap(),
+            "--soloCBposition",
+            "0_0_0_1",
+            "0_4_0_5",
+            "--soloUMIposition",
+            "0_6_0_7",
+            "--soloUMIlen",
+            "2",
+            "--soloCBmatchWLtype",
+            "Exact",
+            "--soloFeatures",
+            "Gene",
+            "--soloStrand",
+            "Forward",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    // Combined whitelist = {AA,CC}×{GG,TT} = 4 barcodes. The matched cell is AAGG;
+    // all 4 reads share UMI AT → one molecule for G1.
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+    let dims = matrix.lines().find(|l| !l.starts_with('%')).unwrap();
+    let parts: Vec<&str> = dims.split_whitespace().collect();
+    assert_eq!(
+        parts[1], "4",
+        "expected 4 combined-whitelist cells, dims={dims}"
+    );
+    assert_eq!(matrix.lines().last().unwrap(), "1 1 1", "matrix:\n{matrix}");
+}
+
+// ---------------------------------------------------------------------------
+// Test 10 — CellRanger-style STARsolo run (Phase 14.5)
+//
+// Exercises the full CellRanger 4.x/5.x flag set from STARsolo.md:
+//   --clipAdapterType CellRanger4 --outFilterScoreMin 30
+//   --soloCBmatchWLtype 1MM_multi_Nbase_pseudocounts
+//   --soloUMIfiltering MultiGeneUMI_CR --soloUMIdedup 1MM_CR
+// and asserts the raw Gene matrix. The 1MM_CR UMI collapse is the key
+// CellRanger-specific behavior verified here. A live differential comparison
+// against the real STAR binary is in test/solo_cellranger_diff.py.
+// ---------------------------------------------------------------------------
+
+#[test]
+fn test_starsolo_cellranger_style_matrix() {
+    let tmpdir = TempDir::new().unwrap();
+    let genome = build_genome();
+    let fasta = write_fasta(&tmpdir, &genome);
+    let gtf = write_gtf(&tmpdir);
+
+    let genome_dir = tmpdir.path().join("genome");
+    build_index(&fasta, &genome_dir, "7", Some(&gtf));
+
+    let cdna_path = tmpdir.path().join("cdna.fq");
+    let barcode_path = tmpdir.path().join("barcode.fq");
+    let wl_path = tmpdir.path().join("whitelist.txt");
+
+    // One cell (CB sorts first), 8 reads in Exon1 of G1. UMIs: M x5 + a 1MM
+    // neighbor of M x1 (1MM_CR collapses these to ONE molecule) + N x2 (a second
+    // molecule) => 2 deduped molecules for (CB, G1).
+    let cb = "AAAACCCCGGGGTTTT";
+    let umi_m = "ACGTACGTAC"; // 10 bp (default soloUMIlen)
+    let umi_m_1mm = "ACGTACGTAG"; // 1 mismatch from umi_m (last base)
+    let umi_n = "TGCATGCATG";
+    let plan = [(umi_m, 5usize), (umi_m_1mm, 1), (umi_n, 2)];
+    {
+        let mut cf = fs::File::create(&cdna_path).unwrap();
+        let mut bf = fs::File::create(&barcode_path).unwrap();
+        let exon1 = &genome[10000..10050];
+        let mut i = 0;
+        for (umi, n) in plan {
+            for _ in 0..n {
+                writeln!(cf, "@read{i}").unwrap();
+                cf.write_all(exon1).unwrap();
+                writeln!(cf, "\n+\n{}", "I".repeat(50)).unwrap();
+                writeln!(
+                    bf,
+                    "@read{i}\n{cb}{umi}\n+\n{}",
+                    "I".repeat(cb.len() + umi.len())
+                )
+                .unwrap();
+                i += 1;
+            }
+        }
+    }
+    {
+        let mut wf = fs::File::create(&wl_path).unwrap();
+        writeln!(wf, "{cb}").unwrap();
+        writeln!(wf, "TTTTGGGGCCCCAAAA").unwrap(); // decoy (sorts after cb)
+    }
+
+    let output_dir = tmpdir.path().join("out_cr");
+    fs::create_dir_all(&output_dir).unwrap();
+    let prefix = format!("{}/", output_dir.display());
+
+    cargo_bin_cmd!("rustar-aligner")
+        .args([
+            "--runMode",
+            "alignReads",
+            "--genomeDir",
+            genome_dir.to_str().unwrap(),
+            "--readFilesIn",
+            cdna_path.to_str().unwrap(),
+            barcode_path.to_str().unwrap(),
+            "--soloType",
+            "CB_UMI_Simple",
+            "--soloCBwhitelist",
+            wl_path.to_str().unwrap(),
+            "--soloCBstart",
+            "1",
+            "--soloCBlen",
+            "16",
+            "--soloUMIstart",
+            "17",
+            "--soloUMIlen",
+            "10",
+            "--soloFeatures",
+            "Gene",
+            "--sjdbGTFfile",
+            gtf.to_str().unwrap(),
+            // CellRanger 4.x/5.x matching flags:
+            "--clipAdapterType",
+            "CellRanger4",
+            "--outFilterScoreMin",
+            "30",
+            "--soloCBmatchWLtype",
+            "1MM_multi_Nbase_pseudocounts",
+            "--soloUMIfiltering",
+            "MultiGeneUMI_CR",
+            "--soloUMIdedup",
+            "1MM_CR",
+            "--outSAMtype",
+            "SAM",
+            "--outFileNamePrefix",
+            &prefix,
+        ])
+        .assert()
+        .success();
+
+    let raw = output_dir.join("Solo.out").join("Gene").join("raw");
+    let features = fs::read_to_string(raw.join("features.tsv")).unwrap();
+    let barcodes = fs::read_to_string(raw.join("barcodes.tsv")).unwrap();
+    let matrix = fs::read_to_string(raw.join("matrix.mtx")).unwrap();
+
+    assert!(features.starts_with("G1\t"), "features.tsv: {features}");
+    assert_eq!(barcodes.lines().count(), 2);
+    assert_eq!(barcodes.lines().next().unwrap(), cb); // CB sorts first
+
+    let lines: Vec<&str> = matrix.lines().collect();
+    let dims = lines.iter().find(|l| !l.starts_with('%')).unwrap();
+    assert_eq!(
+        *dims, "1 2 1",
+        "matrix dims (1 gene x 2 barcodes x 1 entry)"
+    );
+    // 1MM_CR: M(5)+M_1mm(1) collapse to 1 molecule, N(2) is another => 2.
+    assert_eq!(
+        *lines.last().unwrap(),
+        "1 1 2",
+        "expected 2 deduped molecules"
+    );
+}