Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ jobs:

- name: Run cargo test
working-directory: rust
run: cargo test
run: PYO3_PYTHON=$(command -v python3) cargo test

- name: Run clippy
working-directory: rust
run: cargo clippy -- -W warnings
run: PYO3_PYTHON=$(command -v python3) cargo clippy -- -W warnings
29 changes: 28 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,11 @@ benchmark_figures/
# Sanity test data (downloaded from GitHub releases)
tests/sanity/data/

# Real data test files (downloaded from 1000 Genomes, ~2-3 GB)
tests/real_data/data/
tests/real_data/samplesheets/
tests/real_data/configs/

# Nextflow runtime
.nextflow/
.nextflow.log*
Expand All @@ -175,7 +180,29 @@ test-output/
results_stub/
pipelines/*/test-output/
pipelines/*/results_stub/
pipelines/*/results_*/
pipelines/*/artifacts/

# Artifacts directory
artifacts/

# Benchmark infrastructure (large data/envs/results)
test_benchmarks/

# Claude Code local state
.claude/

# Nextflow pipeline-level logs
pipelines/*/.nextflow.log*
pipelines/*/.nf-test.log

# Nextflow reports and visualizations
trace.txt
timeline.html
report.html
dag.svg
dag.dot

# Claude Code memory files (per-directory)
**/CLAUDE.md
!./CLAUDE.md
!/CLAUDE.md
10 changes: 8 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ LABEL maintainer="Jeff Jaureguy <jeffpjaureguy@gmail.com>"
# Install runtime deps + temporary build deps for pybedtools C++ extension
# Combined into one RUN to minimize layers; build tools purged at the end
RUN apt-get update && apt-get install -y --no-install-recommends \
# PID 1 init for proper signal handling (Nextflow/HPC)
tini \
# Bioinformatics tools
samtools \
bcftools \
Expand All @@ -106,14 +108,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
pip install /tmp/*.whl \
&& rm -rf /tmp/*.whl \
&& apt-get purge -y --auto-remove g++ zlib1g-dev \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
&& ! command -v g++

WORKDIR /app

# Verify non-Python tools are available (Python tools skipped during build
# because Polars uses AVX2 instructions that fail under QEMU emulation
# on ARM64 CI runners building linux/amd64 images)
RUN samtools --version && bcftools --version && bedtools --version
RUN samtools --version && bcftools --version && bedtools --version \
&& wasp2-ipscore --help > /dev/null 2>&1

# Create non-root user for security
RUN groupadd -g 1000 wasp2 && \
Expand Down Expand Up @@ -147,5 +151,7 @@ WORKDIR /data
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wasp2-count --version || exit 1

ENTRYPOINT ["tini", "--"]

# Default command
CMD ["wasp2-count", "--help"]
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

.PHONY: all build install test test-quick test-sanity lint format clean help
.PHONY: download-sanity-data sanity-data-local rust-build rust-test
.PHONY: test-mapping-parity

# Configuration
PYTHON ?= python
Expand Down Expand Up @@ -48,7 +49,7 @@ rust-dev: ## Build Rust extension in debug mode (faster compile)
$(MATURIN) develop -m $(RUST_DIR)/Cargo.toml

rust-test: ## Run Rust unit tests
cd $(RUST_DIR) && $(CARGO) test
cd $(RUST_DIR) && PYO3_PYTHON=$$($(PYTHON) -c "import sys; print(sys.executable)") $(CARGO) test

rust-bench: ## Run Rust benchmarks
cd $(RUST_DIR) && $(CARGO) bench
Expand All @@ -68,6 +69,9 @@ test-quick: ## Run quick validation tests only
test-rust: ## Run Rust-specific tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "rust"

test-mapping-parity: ## Run mapping parity tests against legacy and unified paths
$(PYTEST) $(TESTS_DIR)/regression/test_mapping_stage_parity.py -v --tb=short

test-integration: ## Run integration tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "integration"

Expand Down
110 changes: 48 additions & 62 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,114 +15,100 @@
<a href="https://mcvickerlab.github.io/WASP2/">
<img src="https://img.shields.io/badge/docs-GitHub%20Pages-blue" alt="Documentation">
</a>
<a href="https://github.com/mcvickerlab/WASP2/blob/master/LICENSE">
<img src="https://img.shields.io/badge/license-MIT-green" alt="License">
</a>
</p>

<p align="center">
<a href="https://mcvickerlab.github.io/WASP2/">Documentation</a> •
<a href="https://mcvicker.salk.edu/">McVicker Lab</a> •
<a href="https://github.com/bmvdgeijn/WASP">Original WASP</a>
</p>

---

## Installation

### Recommended: Bioconda
### Bioconda

```bash
mamba install -c conda-forge -c bioconda wasp2
```

Installs WASP2 and all dependencies (samtools, bcftools, bedtools, htslib) automatically. Available for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon). Requires [miniforge](https://github.com/conda-forge/miniforge).

### Via PyPI
### PyPI

```bash
pip install wasp2
```

Pre-built wheels for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon) with Python 3.10-3.13. The Rust extension and htslib are bundled in the wheel. Requires samtools, bcftools, and bedtools installed separately.
The PyPI package does not install external tools such as `samtools`,
`bcftools`, or `bedtools`; install those separately.

### For development

```bash
git clone https://github.com/mcvickerlab/WASP2.git
cd WASP2
pixi install # resolves all dependencies including Rust toolchain
pixi run verify # build + test
```

### Via Docker
### Docker

```bash
docker pull ghcr.io/mcvickerlab/wasp2:1.4.0
docker run --rm -v $PWD:/data ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
```

Multi-platform image (linux/amd64 + linux/arm64) with all dependencies included.

### Via Singularity/Apptainer (HPC)
### Singularity/Apptainer

```bash
singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
singularity exec wasp2.sif wasp2-count --help
```

### Reproducible Environment (conda-lock)
## CLI Tools

For fully pinned, reproducible installs (HPC clusters, CI, shared lab environments):
WASP2 installs four command-line entry points:

```bash
# Recommended: mamba (fastest)
mamba create -n WASP2 --file conda-lock.yml
- `wasp2-map`
- `wasp2-count`
- `wasp2-analyze`
- `wasp2-ipscore`

# Or with conda
conda-lock install -n WASP2 conda-lock.yml
```
## Quick Start

`conda-lock.yml` pins every package to exact versions with checksums for `linux-64` and `osx-64`. To regenerate after updating `environment.lock.yml`:
### 1. Correct mapping bias

```bash
conda-lock lock -f environment.lock.yml --lockfile conda-lock.yml
```

See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed install options and development setup.
wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir

## Quick Start
# Realign remap_dir/*_swapped_alleles_r1.fq and r2.fq with the same aligner
# and settings used for the original BAM, then:

WASP2 has three steps that run in order:
wasp2-map filter-remapped remapped.bam \
-j remap_dir/input_wasp_data_files.json \
-o filtered.bam
```

**Step 1: Remap reads** to correct mapping bias
### 2. Count alleles

```bash
wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir/
# Realign the swapped-allele reads with your aligner, then:
wasp2-map filter-remapped remapped.bam -j remap_dir/sample1_wasp_data_files.json -o filtered.bam
wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1 -o counts.tsv
```

**Step 2: Count alleles** at heterozygous SNPs
### 3. Test for imbalance

```bash
wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1
wasp2-analyze find-imbalance counts.tsv -o ai_results.tsv
```

**Step 3: Test for allelic imbalance**
## Single-Cell Example

```bash
wasp2-analyze find-imbalance counts.tsv -o results.tsv
wasp2-count count-variants-sc \
cellranger.bam \
variants.vcf.gz \
barcodes.tsv \
--samples sample1 \
--feature genes.gtf \
--out_file allele_counts.h5ad

wasp2-analyze find-imbalance-sc \
allele_counts.h5ad \
barcode_groups.tsv \
--sample sample1 \
-o ai_results.tsv
```

See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed usage, single-cell workflows, and supported variant formats (VCF, BCF, PGEN).

## Authors

- **Aaron Ho** — Creator of WASP2
- **Jeff Jaureguy** — Developer and maintainer
- **[McVicker Lab](https://mcvicker.salk.edu/)**, Salk Institute
## iPSCORE Utilities

## Citation
```bash
wasp2-ipscore inventory --output inventory.tsv
wasp2-ipscore manifest --output manifest.csv
wasp2-ipscore validate
```

If you use WASP2 in your research, please cite our paper (coming soon).
See the [documentation](https://mcvickerlab.github.io/WASP2/) for complete
usage, tutorials, and API details.
Loading
Loading