mcvickerlab · Jaureguy760 · Mar 15, 2026 · Mar 15, 2026
@@ -146,8 +146,8 @@ jobs:
 
       - name: Run cargo test
         working-directory: rust
-        run: cargo test
+        run: PYO3_PYTHON=$(command -v python3) cargo test
 
       - name: Run clippy
         working-directory: rust
-        run: cargo clippy -- -W warnings
+        run: PYO3_PYTHON=$(command -v python3) cargo clippy -- -W warnings
diff --git a/.gitignore b/.gitignore
@@ -154,6 +154,11 @@ benchmark_figures/
 # Sanity test data (downloaded from GitHub releases)
 tests/sanity/data/
 
+# Real data test files (downloaded from 1000 Genomes, ~2-3 GB)
+tests/real_data/data/
+tests/real_data/samplesheets/
+tests/real_data/configs/
+
 # Nextflow runtime
 .nextflow/
 .nextflow.log*
@@ -175,7 +180,29 @@ test-output/
 results_stub/
 pipelines/*/test-output/
 pipelines/*/results_stub/
+pipelines/*/results_*/
+pipelines/*/artifacts/
+
+# Artifacts directory
+artifacts/
+
+# Benchmark infrastructure (large data/envs/results)
+test_benchmarks/
+
+# Claude Code local state
+.claude/
+
+# Nextflow pipeline-level logs
+pipelines/*/.nextflow.log*
+pipelines/*/.nf-test.log
+
+# Nextflow reports and visualizations
+trace.txt
+timeline.html
+report.html
+dag.svg
+dag.dot
 
 # Claude Code memory files (per-directory)
 **/CLAUDE.md
-!./CLAUDE.md
+!/CLAUDE.md
@@ -82,6 +82,8 @@ LABEL maintainer="Jeff Jaureguy <jeffpjaureguy@gmail.com>"
 # Install runtime deps + temporary build deps for pybedtools C++ extension
 # Combined into one RUN to minimize layers; build tools purged at the end
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    # PID 1 init for proper signal handling (Nextflow/HPC)
+    tini \
     # Bioinformatics tools
     samtools \
     bcftools \
@@ -106,14 +108,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     pip install /tmp/*.whl \
     && rm -rf /tmp/*.whl \
     && apt-get purge -y --auto-remove g++ zlib1g-dev \
-    && rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/* \
+    && ! command -v g++
 
 WORKDIR /app
 
 # Verify non-Python tools are available (Python tools skipped during build
 # because Polars uses AVX2 instructions that fail under QEMU emulation
 # on ARM64 CI runners building linux/amd64 images)
-RUN samtools --version && bcftools --version && bedtools --version
+RUN samtools --version && bcftools --version && bedtools --version \
+    && wasp2-ipscore --help > /dev/null 2>&1
 
 # Create non-root user for security
 RUN groupadd -g 1000 wasp2 && \
@@ -147,5 +151,7 @@ WORKDIR /data
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
     CMD wasp2-count --version || exit 1
 
+ENTRYPOINT ["tini", "--"]
+
 # Default command
 CMD ["wasp2-count", "--help"]
diff --git a/Makefile b/Makefile
@@ -3,6 +3,7 @@
 
 .PHONY: all build install test test-quick test-sanity lint format clean help
 .PHONY: download-sanity-data sanity-data-local rust-build rust-test
+.PHONY: test-mapping-parity
 
 # Configuration
 PYTHON ?= python
@@ -48,7 +49,7 @@ rust-dev:  ## Build Rust extension in debug mode (faster compile)
 	$(MATURIN) develop -m $(RUST_DIR)/Cargo.toml
 
 rust-test:  ## Run Rust unit tests
-	cd $(RUST_DIR) && $(CARGO) test
+	cd $(RUST_DIR) && PYO3_PYTHON=$$($(PYTHON) -c "import sys; print(sys.executable)") $(CARGO) test
 
 rust-bench:  ## Run Rust benchmarks
 	cd $(RUST_DIR) && $(CARGO) bench
@@ -68,6 +69,9 @@ test-quick:  ## Run quick validation tests only
 test-rust:  ## Run Rust-specific tests
 	$(PYTEST) $(TESTS_DIR) -v --tb=short -m "rust"
 
+test-mapping-parity:  ## Run mapping parity tests against legacy and unified paths
+	$(PYTEST) $(TESTS_DIR)/regression/test_mapping_stage_parity.py -v --tb=short
+
 test-integration:  ## Run integration tests
 	$(PYTEST) $(TESTS_DIR) -v --tb=short -m "integration"
 

@@ -15,114 +15,100 @@
   <a href="https://mcvickerlab.github.io/WASP2/">
     <img src="https://img.shields.io/badge/docs-GitHub%20Pages-blue" alt="Documentation">
   </a>
-  <a href="https://github.com/mcvickerlab/WASP2/blob/master/LICENSE">
-    <img src="https://img.shields.io/badge/license-MIT-green" alt="License">
-  </a>
-</p>
-
-<p align="center">
-  <a href="https://mcvickerlab.github.io/WASP2/">Documentation</a> •
-  <a href="https://mcvicker.salk.edu/">McVicker Lab</a> •
-  <a href="https://github.com/bmvdgeijn/WASP">Original WASP</a>
 </p>
 
----
-
 ## Installation
 
-### Recommended: Bioconda
+### Bioconda
 
 ```bash
 mamba install -c conda-forge -c bioconda wasp2
 ```
 
-Installs WASP2 and all dependencies (samtools, bcftools, bedtools, htslib) automatically. Available for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon). Requires [miniforge](https://github.com/conda-forge/miniforge).
-
-### Via PyPI
+### PyPI
 
 ```bash
 pip install wasp2
 ```
 
-Pre-built wheels for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon) with Python 3.10-3.13. The Rust extension and htslib are bundled in the wheel. Requires samtools, bcftools, and bedtools installed separately.
+The PyPI package does not install external tools such as `samtools`,
+`bcftools`, or `bedtools`; install those separately.
 
-### For development
-
-```bash
-git clone https://github.com/mcvickerlab/WASP2.git
-cd WASP2
-pixi install        # resolves all dependencies including Rust toolchain
-pixi run verify     # build + test
-```
-
-### Via Docker
+### Docker
 
 ```bash
 docker pull ghcr.io/mcvickerlab/wasp2:1.4.0
-docker run --rm -v $PWD:/data ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
 ```
 
-Multi-platform image (linux/amd64 + linux/arm64) with all dependencies included.
-
-### Via Singularity/Apptainer (HPC)
+### Singularity/Apptainer
 
 ```bash
 singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
 singularity exec wasp2.sif wasp2-count --help
 ```
 
-### Reproducible Environment (conda-lock)
+## CLI Tools
 
-For fully pinned, reproducible installs (HPC clusters, CI, shared lab environments):
+WASP2 installs four command-line entry points:
 
-```bash
-# Recommended: mamba (fastest)
-mamba create -n WASP2 --file conda-lock.yml
+- `wasp2-map`
+- `wasp2-count`
+- `wasp2-analyze`
+- `wasp2-ipscore`
 
-# Or with conda
-conda-lock install -n WASP2 conda-lock.yml
-```
+## Quick Start
 
-`conda-lock.yml` pins every package to exact versions with checksums for `linux-64` and `osx-64`. To regenerate after updating `environment.lock.yml`:
+### 1. Correct mapping bias
 
 ```bash
-conda-lock lock -f environment.lock.yml --lockfile conda-lock.yml
-```
-
-See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed install options and development setup.
+wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir
 
-## Quick Start
+# Realign remap_dir/*_swapped_alleles_r1.fq and r2.fq with the same aligner
+# and settings used for the original BAM, then:
 
-WASP2 has three steps that run in order:
+wasp2-map filter-remapped remapped.bam \
+  -j remap_dir/input_wasp_data_files.json \
+  -o filtered.bam
+```
 
-**Step 1: Remap reads** to correct mapping bias
+### 2. Count alleles
 
 ```bash
-wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir/
-# Realign the swapped-allele reads with your aligner, then:
-wasp2-map filter-remapped remapped.bam -j remap_dir/sample1_wasp_data_files.json -o filtered.bam
+wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1 -o counts.tsv
 ```
 
-**Step 2: Count alleles** at heterozygous SNPs
+### 3. Test for imbalance
 
 ```bash
-wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1
+wasp2-analyze find-imbalance counts.tsv -o ai_results.tsv
 ```
 
-**Step 3: Test for allelic imbalance**
+## Single-Cell Example
 
 ```bash
-wasp2-analyze find-imbalance counts.tsv -o results.tsv
+wasp2-count count-variants-sc \
+  cellranger.bam \
+  variants.vcf.gz \
+  barcodes.tsv \
+  --samples sample1 \
+  --feature genes.gtf \
+  --out_file allele_counts.h5ad
+
+wasp2-analyze find-imbalance-sc \
+  allele_counts.h5ad \
+  barcode_groups.tsv \
+  --sample sample1 \
+  -o ai_results.tsv
 ```
 
-See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed usage, single-cell workflows, and supported variant formats (VCF, BCF, PGEN).
-
-## Authors
-
-- **Aaron Ho** — Creator of WASP2
-- **Jeff Jaureguy** — Developer and maintainer
-- **[McVicker Lab](https://mcvicker.salk.edu/)**, Salk Institute
+## iPSCORE Utilities
 
-## Citation
+```bash
+wasp2-ipscore inventory --output inventory.tsv
+wasp2-ipscore manifest --output manifest.csv
+wasp2-ipscore validate
+```
 
-If you use WASP2 in your research, please cite our paper (coming soon).
+See the [documentation](https://mcvickerlab.github.io/WASP2/) for complete
+usage, tutorials, and API details.