From df6f52bfda7304c37168d61197f7c62cd2ff788e Mon Sep 17 00:00:00 2001
From: Jeff Jaureguy <67065808+Jaureguy760@users.noreply.github.com>
Date: Thu, 5 Mar 2026 13:38:29 -0800
Subject: [PATCH 1/2] feat: nf-core compliance and HPC executor support for all
4 pipelines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add comprehensive nf-core compliance infrastructure and multi-scheduler
HPC support across nf-rnaseq, nf-atacseq, nf-scatac, and nf-outrider.
nf-core compliance (54-62% → 100%):
- .github/ CI/CD workflows (ci.yml, linting.yml) for all 4 pipelines
- .github/ issue templates (bug_report, feature_request) and PR template
- CODE_OF_CONDUCT.md, .gitignore, .gitattributes, .prettierrc.yml
- modules.json for atacseq and outrider
- multiqc_config.yml for rnaseq
- nf-test.config standardized (testsDir ".", env var workDir, tests/nextflow.config)
- nf_core_version: "3.0" added to all .nf-core.yml
HPC executor support (0 → 5 schedulers × 4 pipelines = 20 configs):
- SLURM, SGE, LSF, PBS Pro, AWS Batch executor profiles
- Consistent resource limits (64 cpu/256 GB on-prem, 96 cpu/768 GB cloud)
- All profiles registered in nextflow.config with includeConfig
Pipeline-specific fixes:
- nf-rnaseq: test_stub/test_full/test_local configs, process_single label,
env block, params.tracedir, nextflowVersion bumped to 23.04.0
- nf-scatac: dag report, container profile mutual exclusion, process_single/
process_long/process_high_memory labels, debug profile
- nf-outrider: resourceLimits raised to 200 GB (matches process_high_memory),
error strategy aligned to range pattern
- nf-atacseq: nf_core_modules section in .nf-core.yml
Co-Authored-By: Claude Opus 4.6
---
.../seqera_ai_integration.md | 0
docs/source/choosing_workflow.rst | 96 +++++++++++
docs/source/conf.py | 4 +-
docs/source/faq.rst | 159 ++++++++++++++++++
docs/source/index.rst | 1 -
docs/source/installation.rst | 7 +-
docs/source/quickstart.rst | 18 +-
pipelines/nf-atacseq/.gitattributes | 2 +
.../.github/ISSUE_TEMPLATE/bug_report.yml | 73 ++++++++
.../ISSUE_TEMPLATE/feature_request.yml | 51 ++++++
.../.github/PULL_REQUEST_TEMPLATE.md | 29 ++++
pipelines/nf-atacseq/.github/workflows/ci.yml | 74 ++++++++
.../nf-atacseq/.github/workflows/linting.yml | 56 ++++++
pipelines/nf-atacseq/.gitignore | 7 +
pipelines/nf-atacseq/.nf-core.yml | 16 ++
pipelines/nf-atacseq/.prettierignore | 9 +
pipelines/nf-atacseq/.prettierrc.yml | 1 +
pipelines/nf-atacseq/CODE_OF_CONDUCT.md | 36 ++++
pipelines/nf-atacseq/conf/awsbatch.config | 64 +++++++
pipelines/nf-atacseq/conf/base.config | 7 +
pipelines/nf-atacseq/conf/lsf.config | 50 ++++++
pipelines/nf-atacseq/conf/pbs.config | 53 ++++++
pipelines/nf-atacseq/conf/sge.config | 57 +++++++
pipelines/nf-atacseq/conf/slurm.config | 57 +++++++
pipelines/nf-atacseq/modules.json | 5 +
pipelines/nf-atacseq/nextflow.config | 35 ++++
pipelines/nf-atacseq/nf-test.config | 9 +-
pipelines/nf-atacseq/tests/nextflow.config | 9 +
pipelines/nf-outrider/.gitattributes | 2 +
.../.github/ISSUE_TEMPLATE/bug_report.yml | 73 ++++++++
.../ISSUE_TEMPLATE/feature_request.yml | 51 ++++++
.../.github/PULL_REQUEST_TEMPLATE.md | 29 ++++
.../nf-outrider/.github/workflows/ci.yml | 74 ++++++++
.../nf-outrider/.github/workflows/linting.yml | 56 ++++++
pipelines/nf-outrider/.gitignore | 7 +
pipelines/nf-outrider/.nf-core.yml | 1 +
pipelines/nf-outrider/.prettierignore | 9 +
pipelines/nf-outrider/.prettierrc.yml | 1 +
pipelines/nf-outrider/CODE_OF_CONDUCT.md | 36 ++++
pipelines/nf-outrider/conf/awsbatch.config | 64 +++++++
pipelines/nf-outrider/conf/base.config | 9 +-
pipelines/nf-outrider/conf/lsf.config | 50 ++++++
pipelines/nf-outrider/conf/pbs.config | 53 ++++++
pipelines/nf-outrider/conf/sge.config | 57 +++++++
pipelines/nf-outrider/conf/slurm.config | 57 +++++++
pipelines/nf-outrider/modules.json | 5 +
pipelines/nf-outrider/nextflow.config | 35 ++++
pipelines/nf-outrider/nf-test.config | 12 +-
pipelines/nf-outrider/tests/nextflow.config | 9 +
pipelines/nf-rnaseq/.gitattributes | 2 +
.../.github/ISSUE_TEMPLATE/bug_report.yml | 73 ++++++++
.../ISSUE_TEMPLATE/feature_request.yml | 51 ++++++
.../.github/PULL_REQUEST_TEMPLATE.md | 29 ++++
pipelines/nf-rnaseq/.github/workflows/ci.yml | 74 ++++++++
.../nf-rnaseq/.github/workflows/linting.yml | 56 ++++++
pipelines/nf-rnaseq/.gitignore | 7 +
pipelines/nf-rnaseq/.nf-core.yml | 1 +
pipelines/nf-rnaseq/.prettierignore | 9 +
pipelines/nf-rnaseq/.prettierrc.yml | 1 +
pipelines/nf-rnaseq/CODE_OF_CONDUCT.md | 36 ++++
pipelines/nf-rnaseq/assets/multiqc_config.yml | 80 +++++++++
pipelines/nf-rnaseq/conf/awsbatch.config | 64 +++++++
pipelines/nf-rnaseq/conf/base.config | 13 ++
pipelines/nf-rnaseq/conf/lsf.config | 49 ++++++
pipelines/nf-rnaseq/conf/pbs.config | 53 ++++++
pipelines/nf-rnaseq/conf/sge.config | 57 +++++++
pipelines/nf-rnaseq/conf/slurm.config | 57 +++++++
pipelines/nf-rnaseq/conf/test_full.config | 24 +++
pipelines/nf-rnaseq/conf/test_local.config | 27 +++
pipelines/nf-rnaseq/conf/test_stub.config | 29 ++++
pipelines/nf-rnaseq/modules.json | 5 +
pipelines/nf-rnaseq/nextflow.config | 65 ++++++-
pipelines/nf-rnaseq/nf-test.config | 14 +-
pipelines/nf-rnaseq/tests/nextflow.config | 9 +
pipelines/nf-scatac/.gitattributes | 2 +
.../.github/ISSUE_TEMPLATE/bug_report.yml | 73 ++++++++
.../ISSUE_TEMPLATE/feature_request.yml | 51 ++++++
.../.github/PULL_REQUEST_TEMPLATE.md | 29 ++++
pipelines/nf-scatac/.github/workflows/ci.yml | 74 ++++++++
.../nf-scatac/.github/workflows/linting.yml | 56 ++++++
pipelines/nf-scatac/.gitignore | 7 +
pipelines/nf-scatac/.nf-core.yml | 1 +
pipelines/nf-scatac/.prettierignore | 9 +
pipelines/nf-scatac/.prettierrc.yml | 1 +
pipelines/nf-scatac/CODE_OF_CONDUCT.md | 36 ++++
pipelines/nf-scatac/conf/awsbatch.config | 64 +++++++
pipelines/nf-scatac/conf/base.config | 34 +++-
pipelines/nf-scatac/conf/lsf.config | 48 ++++++
pipelines/nf-scatac/conf/pbs.config | 53 ++++++
pipelines/nf-scatac/conf/sge.config | 56 ++++++
pipelines/nf-scatac/conf/slurm.config | 55 ++++++
pipelines/nf-scatac/modules.json | 5 +
pipelines/nf-scatac/nextflow.config | 50 ++++++
pipelines/nf-scatac/nf-test.config | 12 +-
pipelines/nf-scatac/tests/nextflow.config | 9 +
src/wasp2/__init__.py | 2 +-
96 files changed, 3232 insertions(+), 55 deletions(-)
rename docs/{source => internal}/seqera_ai_integration.md (100%)
create mode 100644 docs/source/choosing_workflow.rst
create mode 100644 docs/source/faq.rst
create mode 100644 pipelines/nf-atacseq/.gitattributes
create mode 100644 pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/bug_report.yml
create mode 100644 pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/feature_request.yml
create mode 100644 pipelines/nf-atacseq/.github/PULL_REQUEST_TEMPLATE.md
create mode 100644 pipelines/nf-atacseq/.github/workflows/ci.yml
create mode 100644 pipelines/nf-atacseq/.github/workflows/linting.yml
create mode 100644 pipelines/nf-atacseq/.gitignore
create mode 100644 pipelines/nf-atacseq/.prettierignore
create mode 100644 pipelines/nf-atacseq/.prettierrc.yml
create mode 100644 pipelines/nf-atacseq/CODE_OF_CONDUCT.md
create mode 100644 pipelines/nf-atacseq/conf/awsbatch.config
create mode 100644 pipelines/nf-atacseq/conf/lsf.config
create mode 100644 pipelines/nf-atacseq/conf/pbs.config
create mode 100644 pipelines/nf-atacseq/conf/sge.config
create mode 100644 pipelines/nf-atacseq/conf/slurm.config
create mode 100644 pipelines/nf-atacseq/modules.json
create mode 100644 pipelines/nf-atacseq/tests/nextflow.config
create mode 100644 pipelines/nf-outrider/.gitattributes
create mode 100644 pipelines/nf-outrider/.github/ISSUE_TEMPLATE/bug_report.yml
create mode 100644 pipelines/nf-outrider/.github/ISSUE_TEMPLATE/feature_request.yml
create mode 100644 pipelines/nf-outrider/.github/PULL_REQUEST_TEMPLATE.md
create mode 100644 pipelines/nf-outrider/.github/workflows/ci.yml
create mode 100644 pipelines/nf-outrider/.github/workflows/linting.yml
create mode 100644 pipelines/nf-outrider/.gitignore
create mode 100644 pipelines/nf-outrider/.prettierignore
create mode 100644 pipelines/nf-outrider/.prettierrc.yml
create mode 100644 pipelines/nf-outrider/CODE_OF_CONDUCT.md
create mode 100644 pipelines/nf-outrider/conf/awsbatch.config
create mode 100644 pipelines/nf-outrider/conf/lsf.config
create mode 100644 pipelines/nf-outrider/conf/pbs.config
create mode 100644 pipelines/nf-outrider/conf/sge.config
create mode 100644 pipelines/nf-outrider/conf/slurm.config
create mode 100644 pipelines/nf-outrider/modules.json
create mode 100644 pipelines/nf-outrider/tests/nextflow.config
create mode 100644 pipelines/nf-rnaseq/.gitattributes
create mode 100644 pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/bug_report.yml
create mode 100644 pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/feature_request.yml
create mode 100644 pipelines/nf-rnaseq/.github/PULL_REQUEST_TEMPLATE.md
create mode 100644 pipelines/nf-rnaseq/.github/workflows/ci.yml
create mode 100644 pipelines/nf-rnaseq/.github/workflows/linting.yml
create mode 100644 pipelines/nf-rnaseq/.gitignore
create mode 100644 pipelines/nf-rnaseq/.prettierignore
create mode 100644 pipelines/nf-rnaseq/.prettierrc.yml
create mode 100644 pipelines/nf-rnaseq/CODE_OF_CONDUCT.md
create mode 100644 pipelines/nf-rnaseq/assets/multiqc_config.yml
create mode 100644 pipelines/nf-rnaseq/conf/awsbatch.config
create mode 100644 pipelines/nf-rnaseq/conf/lsf.config
create mode 100644 pipelines/nf-rnaseq/conf/pbs.config
create mode 100644 pipelines/nf-rnaseq/conf/sge.config
create mode 100644 pipelines/nf-rnaseq/conf/slurm.config
create mode 100644 pipelines/nf-rnaseq/conf/test_full.config
create mode 100644 pipelines/nf-rnaseq/conf/test_local.config
create mode 100644 pipelines/nf-rnaseq/conf/test_stub.config
create mode 100644 pipelines/nf-rnaseq/modules.json
create mode 100644 pipelines/nf-rnaseq/tests/nextflow.config
create mode 100644 pipelines/nf-scatac/.gitattributes
create mode 100644 pipelines/nf-scatac/.github/ISSUE_TEMPLATE/bug_report.yml
create mode 100644 pipelines/nf-scatac/.github/ISSUE_TEMPLATE/feature_request.yml
create mode 100644 pipelines/nf-scatac/.github/PULL_REQUEST_TEMPLATE.md
create mode 100644 pipelines/nf-scatac/.github/workflows/ci.yml
create mode 100644 pipelines/nf-scatac/.github/workflows/linting.yml
create mode 100644 pipelines/nf-scatac/.gitignore
create mode 100644 pipelines/nf-scatac/.prettierignore
create mode 100644 pipelines/nf-scatac/.prettierrc.yml
create mode 100644 pipelines/nf-scatac/CODE_OF_CONDUCT.md
create mode 100644 pipelines/nf-scatac/conf/awsbatch.config
create mode 100644 pipelines/nf-scatac/conf/lsf.config
create mode 100644 pipelines/nf-scatac/conf/pbs.config
create mode 100644 pipelines/nf-scatac/conf/sge.config
create mode 100644 pipelines/nf-scatac/conf/slurm.config
create mode 100644 pipelines/nf-scatac/modules.json
create mode 100644 pipelines/nf-scatac/tests/nextflow.config
diff --git a/docs/source/seqera_ai_integration.md b/docs/internal/seqera_ai_integration.md
similarity index 100%
rename from docs/source/seqera_ai_integration.md
rename to docs/internal/seqera_ai_integration.md
diff --git a/docs/source/choosing_workflow.rst b/docs/source/choosing_workflow.rst
new file mode 100644
index 0000000..d393b98
--- /dev/null
+++ b/docs/source/choosing_workflow.rst
@@ -0,0 +1,96 @@
+Choosing the Right Workflow
+===========================
+
+WASP2 supports four major data types. Use this guide to find your workflow.
+
+.. list-table::
+ :header-rows: 1
+ :widths: 25 25 25 25
+
+ * - Data Type
+ - Input
+ - Goal
+ - Start Here
+ * - **Bulk RNA-seq**
+ - BAM + phased VCF
+ - Allele-specific expression (ASE)
+ - :doc:`tutorials/rna_seq`
+ * - **Bulk ATAC-seq**
+ - BAM + phased VCF
+ - Allele-specific chromatin accessibility
+ - :doc:`tutorials/atac_seq_workflow`
+ * - **scRNA-seq (10x)**
+ - Cell Ranger BAM + VCF + barcodes
+ - Per-cell or per-cell-type ASE
+ - :doc:`tutorials/scrna_seq`
+ * - **scATAC-seq (10x)**
+ - Fragments/BAM + VCF + barcodes
+ - Single-cell allelic imbalance in ATAC peaks
+ - :doc:`tutorials/scatac_workflow`
+
+Decision Flowchart
+------------------
+
+**Step 1: What sequencing assay did you run?**
+
+* RNA-seq → go to Step 2
+* ATAC-seq → go to Step 3
+
+**Step 2: Bulk or single-cell RNA-seq?**
+
+* Bulk RNA-seq → :doc:`tutorials/rna_seq`
+* 10x Chromium scRNA-seq → :doc:`tutorials/scrna_seq`
+* Other single-cell protocol → see :doc:`user_guide/single_cell`
+
+**Step 3: Bulk or single-cell ATAC-seq?**
+
+* Bulk ATAC-seq → :doc:`tutorials/atac_seq_workflow`
+* 10x scATAC-seq (fragments file) → :doc:`tutorials/scatac_workflow`
+* 10x scATAC-seq (BAM with CB tag) → :doc:`tutorials/scatac_workflow` (BAM path)
+
+Do I Need to Run the WASP Remapping Step?
+------------------------------------------
+
+The remapping step (``wasp2-map``) corrects **reference mapping bias** — reads
+carrying the alternative allele are harder to map than reference-allele reads,
+causing false-positive imbalance signals.
+
+**You need remapping if:**
+
+* Your BAM was aligned with a standard aligner (BWA-MEM, STAR, HISAT2, bowtie2)
+* You want the most rigorous allele-specific analysis
+* You are studying regions near known variants (high variant density)
+
+**You can skip remapping if:**
+
+* Your BAM was already produced by an unbiased pipeline
+* You are doing a quick exploratory analysis
+* You are using simulated or controlled data
+
+See :doc:`user_guide/mapping` for the full remapping workflow.
+
+What VCF Do I Need?
+--------------------
+
+WASP2 requires a **phased VCF** with heterozygous variants for the sample(s)
+you are analyzing. Supported formats:
+
+* VCF/BCF (bgzip + tabix indexed)
+* PLINK2 PGEN files (with ``.pvar`` + ``.psam``)
+
+See :doc:`user_guide/counting` for VCF format requirements and examples using
+bcftools to subset and phase.
+
+Nextflow Pipelines
+------------------
+
+If you prefer a managed workflow with automatic parallelization, containerization,
+and output publishing, use the bundled Nextflow pipelines instead of the CLI:
+
+* **nf-rnaseq** — bulk RNA-seq allele-specific expression
+* **nf-atacseq** — bulk ATAC-seq allele-specific chromatin accessibility
+* **nf-scatac** — single-cell ATAC-seq allelic imbalance
+* **nf-outrider** — outlier expression detection with allele-aware correction
+
+See the pipeline-specific documentation for samplesheet format and parameter
+reference.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 923fd41..543451c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -21,9 +21,9 @@
author = "Aaron Ho, Jeff Jaureguy, McVicker Lab"
# The short X.Y version
-version = "1.3"
+version = "1.4"
# The full version, including alpha/beta/rc tags
-release = "1.3.0"
+release = "1.4.0"
# -- General configuration ---------------------------------------------------
diff --git a/docs/source/faq.rst b/docs/source/faq.rst
new file mode 100644
index 0000000..2779a40
--- /dev/null
+++ b/docs/source/faq.rst
@@ -0,0 +1,159 @@
+Frequently Asked Questions
+==========================
+
+Installation
+------------
+
+**Which install method should I use?**
+
+For most users: ``mamba install -c conda-forge -c bioconda wasp2`` (Bioconda).
+This installs WASP2 and all dependencies (samtools, bcftools, bedtools) in one step.
+
+Use ``pip install wasp2`` if you are on a system without conda or want a specific
+Python environment. You will need to install samtools, bcftools, and bedtools separately.
+
+Use Docker or Singularity on HPC clusters or when you need full reproducibility.
+
+**What Python versions are supported?**
+
+Python 3.10, 3.11, 3.12, and 3.13. Pre-built wheels are available for all four
+on Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon).
+
+**Why do I get an error about missing samtools/bcftools/bedtools?**
+
+The PyPI wheel bundles the Rust extension and htslib but not the system binaries.
+Install them via conda (``mamba install -c bioconda samtools bcftools bedtools``)
+or your system package manager.
+
+Input Data
+----------
+
+**Do I need phased genotypes?**
+
+Yes. WASP2 assigns reads to haplotypes using phased heterozygous variants. Without
+phase information, WASP2 cannot distinguish which allele a read came from. Use
+WhatsHap, SHAPEIT4, or Eagle2 to phase your VCF before running WASP2.
+
+**What VCF formats does WASP2 support?**
+
+* VCF or BCF (bgzip-compressed + tabix-indexed: ``.vcf.gz`` + ``.tbi``)
+* PLINK2 PGEN format (``.pgen`` + ``.pvar`` + ``.psam``)
+
+Multi-sample VCFs are supported; use ``-s SAMPLE_ID`` to specify the target sample.
+
+**Can I use an unphased VCF?**
+
+The counting step (``wasp2-count``) will still run but the allele assignments will
+be arbitrary. The statistical results will have reduced power and increased false
+positives. Always use phased genotypes when possible.
+
+**My BAM doesn't have read groups. Will WASP2 work?**
+
+Yes, for counting. Read groups are not required for allele counting. For the
+remapping step (``wasp2-map``), the sample ID is needed to look up variants in
+a multi-sample VCF — pass it explicitly with ``-s SAMPLE_ID``.
+
+Running WASP2
+-------------
+
+**How long does each step take?**
+
+Typical runtimes on a single core for a 30× whole-genome BAM (~100M reads):
+
+* ``wasp2-map make-reads``: 2–4 hours
+* Re-alignment (external): depends on aligner
+* ``wasp2-map filter-remapped``: 1–2 hours
+* ``wasp2-count count-variants``: 30–60 minutes
+* ``wasp2-analyze find-imbalance``: < 5 minutes
+
+Use the Nextflow pipelines for automatic parallelization across chromosomes/samples.
+
+**Can I run WASP2 on multiple samples at once?**
+
+Yes. WASP2 CLI processes one sample at a time; run multiple samples in parallel
+with a job scheduler (SLURM, PBS) or use the Nextflow pipelines which handle
+parallelization automatically.
+
+**What is the ``--region`` flag for?**
+
+Restrict counting to a specific genomic region (e.g., ``chr1:1000000-2000000``).
+Useful for testing on a subset of data or for chromosome-level parallelization.
+
+Single-Cell
+-----------
+
+**What single-cell chemistries are supported?**
+
+All 10x Genomics Chromium chemistries (scRNA v1/v2/v3, scATAC v1/v2) and any
+other protocol with a cell barcode tag in the BAM (CB tag by default). See
+:doc:`user_guide/single_cell` for barcode format details.
+
+**Do I need Cell Ranger output?**
+
+No, but it is the most common input. WASP2 needs:
+
+* A BAM with cell barcodes in a BAM tag (default: ``CB``)
+* A whitelist of valid barcodes (optional but recommended)
+* A phased VCF
+
+Any aligner that produces CB-tagged BAMs will work (STARsolo, Alevin-fry, etc.).
+
+**How do I get per-cell-type results?**
+
+Run WASP2 on the full BAM to get per-cell allele counts, then use the output
+with your cell type annotations in Python (AnnData/Scanpy) to aggregate by
+cell type. See :doc:`tutorials/scrna_seq` for an example.
+
+Output and Results
+------------------
+
+**What does the p-value in the output represent?**
+
+The p-value comes from a likelihood ratio test comparing the beta-binomial model
+under allelic imbalance vs. the null model of balanced expression. The test is
+calibrated for the overdispersion typical of RNA-seq count data.
+
+**What FDR threshold should I use?**
+
+The standard threshold is FDR < 0.05. For discovery analyses you may want
+FDR < 0.1. For validation or follow-up experiments, consider FDR < 0.01.
+See :doc:`methods/fdr_correction` for details on the BH procedure used.
+
+**My output has very few significant sites. What's wrong?**
+
+Common causes:
+
+* Low coverage at heterozygous sites (increase ``--min_count``)
+* Too few heterozygous variants in the VCF
+* VCF and BAM use different chromosome naming conventions (``chr1`` vs ``1``)
+* VCF is not phased
+
+**My output has too many significant sites (inflated FDR).**
+
+This typically means mapping bias is driving the signal. Run the WASP remapping
+step (``wasp2-map``) before counting. See :doc:`user_guide/mapping`.
+
+Troubleshooting
+---------------
+
+**I get "chromosome not found" errors.**
+
+VCF and BAM must use the same chromosome naming convention. If your VCF uses
+``chr1`` and your BAM uses ``1`` (or vice versa), use ``bcftools annotate --rename-chrs``
+to harmonize the VCF.
+
+**The Rust extension fails to load.**
+
+This happens if the wheel was built for a different platform or Python version.
+Try reinstalling: ``pip install --force-reinstall wasp2``. If building from source,
+run ``pixi run verify`` to rebuild.
+
+**WASP2 runs but produces an empty counts file.**
+
+Check that:
+
+* The BAM is coordinate-sorted and indexed (``.bai`` file present)
+* The VCF overlaps the regions in your BAM
+* The sample name passed with ``-s`` matches a sample in the VCF
+
+Use ``bcftools query -l variants.vcf.gz`` to list VCF sample names.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index fb0104e..1a5fc6a 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -98,7 +98,6 @@ Documentation
:caption: Development
development
- seqera_ai_integration
changelog
Indices and tables
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index 2e260c3..f039a97 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -19,7 +19,7 @@ Or with conda:
conda install -c conda-forge -c bioconda wasp2
Available for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon) with
-Python 3.11-3.12.
+Python 3.10-3.13.
Via PyPI
--------
@@ -78,9 +78,8 @@ For contributing or building from source:
git clone https://github.com/mcvickerlab/WASP2.git
cd WASP2
- pixi install # resolves all deps including Rust toolchain
- pixi run build # builds the Rust extension
- pixi run test # runs the test suite
+ pixi install # resolves all deps including Rust toolchain
+ pixi run verify # build + test suite in one step
`pixi `_ resolves Python, Rust toolchain, samtools, bcftools,
bedtools, and htslib automatically. No system packages required.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index f91211a..2973027 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -3,15 +3,16 @@ Quick Start
This 5-minute tutorial demonstrates basic WASP2 usage.
-Example Data
-------------
+Prerequisites
+-------------
-Use the included test data:
+You will need:
-.. code-block:: bash
+* A coordinate-sorted, indexed BAM file (``sample.bam`` + ``sample.bam.bai``)
+* A phased VCF file with heterozygous variants (``variants.vcf.gz`` + ``.tbi``)
- cd WASP2-exp
- ls test_data/
+These are typically produced by your alignment pipeline (BWA-MEM, STAR, etc.)
+followed by variant calling and phasing (GATK, WhatsHap, ShapeIt).
Count Alleles
-------------
@@ -21,8 +22,9 @@ Count allele-specific reads from a BAM file:
.. code-block:: bash
wasp2-count count-variants \
- test_data/CD4_ATACseq_Day1_merged_filtered.sort.bam \
- test_data/filter_chr10.vcf \
+ sample.bam \
+ variants.vcf.gz \
+ -s SAMPLE_ID \
--out_file counts.tsv
Output: ``counts.tsv`` with columns:
diff --git a/pipelines/nf-atacseq/.gitattributes b/pipelines/nf-atacseq/.gitattributes
new file mode 100644
index 0000000..ff938c9
--- /dev/null
+++ b/pipelines/nf-atacseq/.gitattributes
@@ -0,0 +1,2 @@
+*.config linguist-language=Nextflow
+*.nf linguist-language=Nextflow
diff --git a/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/bug_report.yml b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..e3a02d7
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,73 @@
+name: Bug Report
+description: Report a bug in wasp2/nf-atacseq
+labels: ["bug"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to report a bug in **wasp2/nf-atacseq**.
+ Please fill out the sections below to help us diagnose the issue.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of the bug
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+
+ - type: textarea
+ id: command
+ attributes:
+ label: Command used and target workflow
+ description: |
+ Please provide the full command you used to run the pipeline.
+ Include all parameters and profiles.
+ placeholder: |
+ nextflow run main.nf -profile docker,test --outdir results
+ validations:
+ required: true
+
+ - type: textarea
+ id: output
+ attributes:
+ label: Relevant output / error message
+ description: |
+ Please copy and paste the relevant output or error message.
+ This will be automatically formatted into code.
+ render: console
+ validations:
+ required: true
+
+ - type: textarea
+ id: system
+ attributes:
+ label: System information
+ description: |
+ Please provide details about your computing environment.
+ value: |
+ - Nextflow version:
+ - Container engine (Docker/Singularity/Conda):
+ - Container version:
+ - OS:
+ - Hardware (local/HPC/cloud):
+ validations:
+ required: true
+
+ - type: textarea
+ id: nextflow-version
+ attributes:
+ label: Nextflow log
+ description: |
+ Paste the output of `nextflow log last` or the relevant `.nextflow.log` contents.
+ render: console
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context about the problem here (screenshots, sample data, config files).
+ validations:
+ required: false
diff --git a/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/feature_request.yml b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..8bf38d5
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,51 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for wasp2/nf-atacseq
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for suggesting a feature for **wasp2/nf-atacseq**.
+ Please describe your idea clearly so we can evaluate it.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description
+ description: A clear and concise description of the feature you would like.
+ validations:
+ required: true
+
+ - type: textarea
+ id: problem
+ attributes:
+ label: Is your feature request related to a problem?
+ description: |
+ A clear and concise description of the problem.
+ e.g. "I'm always frustrated when [...]"
+ validations:
+ required: false
+
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: A clear and concise description of what you want to happen.
+ validations:
+ required: false
+
+ - type: textarea
+ id: alternatives
+ attributes:
+ label: Alternatives considered
+ description: A description of any alternative solutions or features you have considered.
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context, references, or screenshots about the feature request here.
+ validations:
+ required: false
diff --git a/pipelines/nf-atacseq/.github/PULL_REQUEST_TEMPLATE.md b/pipelines/nf-atacseq/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..80ee771
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,29 @@
+## PR checklist
+
+- [ ] This comment contains a description of changes with context.
+- [ ] Tests pass locally with `nf-test test` using the `test,docker` profile.
+- [ ] If you have added new modules/processes, they include `nf-test` tests.
+- [ ] If applicable, new parameters are documented in `nextflow_schema.json`.
+- [ ] Pipeline runs successfully with `test` and `test_stub` profiles.
+- [ ] `CHANGELOG.md` is updated with noteworthy changes.
+
+## Description
+
+
+
+Fixes # (issue)
+
+## Type of change
+
+- [ ] Bug fix (non-breaking change that fixes an issue)
+- [ ] New feature (non-breaking change that adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+
+## How has this been tested?
+
+
+
+- [ ] `nf-test test --profile test,docker`
+- [ ] `nf-test test --profile test_stub,docker`
+- [ ] Manual pipeline run with real data
diff --git a/pipelines/nf-atacseq/.github/workflows/ci.yml b/pipelines/nf-atacseq/.github/workflows/ci.yml
new file mode 100644
index 0000000..77705cf
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/workflows/ci.yml
@@ -0,0 +1,74 @@
+name: nf-atacseq CI
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+ release:
+ types: [published]
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+env:
+ NXF_ANSI_LOG: false
+ NFT_VER: "0.9.2"
+ NFT_WORKDIR: ".nf-test"
+
+jobs:
+ test:
+ name: "nf-test | ${{ matrix.NXF_VER }} | ${{ matrix.profile }}"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ NXF_VER:
+ - "23.04.0"
+ - "latest-everything"
+ profile:
+ - "test,docker"
+ - "test_stub,docker"
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "${{ matrix.NXF_VER }}"
+
+ - name: Setup nf-test
+ uses: nf-core/setup-nf-test@v1
+ with:
+ version: "${{ env.NFT_VER }}"
+
+ - name: Cache nf-test
+ uses: actions/cache@v4
+ with:
+ path: |
+ ${{ env.NFT_WORKDIR }}
+ ~/.nextflow/assets
+ key: "${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-${{ hashFiles('**/*.nf') }}"
+ restore-keys: |
+ ${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-
+ ${{ runner.os }}-nftest-
+
+ - name: Run nf-test
+ run: |
+ nf-test test \
+ --profile=${{ matrix.profile }} \
+ --tap=test-output.tap \
+ --verbose
+
+ - name: Upload test output
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: "nf-test-output-${{ matrix.NXF_VER }}-${{ strategy.job-index }}"
+ path: test-output.tap
diff --git a/pipelines/nf-atacseq/.github/workflows/linting.yml b/pipelines/nf-atacseq/.github/workflows/linting.yml
new file mode 100644
index 0000000..a4b0ada
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/workflows/linting.yml
@@ -0,0 +1,56 @@
+name: nf-core linting
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+jobs:
+ nf-core-lint:
+ name: nf-core lint
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install nf-core tools
+ run: pip install nf-core
+
+ - name: Run nf-core lint
+ run: nf-core lint --dir . --fail-warned
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+
+ nextflow-config:
+ name: Nextflow config check
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Check Nextflow config
+ run: nextflow config -show-profiles
diff --git a/pipelines/nf-atacseq/.gitignore b/pipelines/nf-atacseq/.gitignore
new file mode 100644
index 0000000..a54086b
--- /dev/null
+++ b/pipelines/nf-atacseq/.gitignore
@@ -0,0 +1,7 @@
+.nextflow*
+work/
+results/
+.nf-test/
+testing/
+testing*
+*.pyc
diff --git a/pipelines/nf-atacseq/.nf-core.yml b/pipelines/nf-atacseq/.nf-core.yml
index ae1b1a5..77c6d73 100644
--- a/pipelines/nf-atacseq/.nf-core.yml
+++ b/pipelines/nf-atacseq/.nf-core.yml
@@ -1,5 +1,16 @@
# nf-core pipeline configuration
repository_type: pipeline
+nf_core_version: "3.0"
+
+# nf-core template version this pipeline is based on
+template:
+ skip:
+ - .github/
+ - .gitignore
+ - CODE_OF_CONDUCT.md
+ - LICENSE
+ - assets/email_template.html
+ - lib/
# Linting configuration - skip checks that don't apply to custom pipelines
lint:
@@ -28,3 +39,8 @@ lint:
modules_structure: false
modules_config: false
modules_json: false
+
+# nf-core modules configuration
+nf_core_modules:
+ https://github.com/nf-core/modules.git:
+ update: true
diff --git a/pipelines/nf-atacseq/.prettierignore b/pipelines/nf-atacseq/.prettierignore
new file mode 100644
index 0000000..9cb26d6
--- /dev/null
+++ b/pipelines/nf-atacseq/.prettierignore
@@ -0,0 +1,9 @@
+email_template.html
+*.config
+.nextflow*
+work/
+results/
+.nf-test/
+nextflow_schema.json
+*.nf
+assets/
diff --git a/pipelines/nf-atacseq/.prettierrc.yml b/pipelines/nf-atacseq/.prettierrc.yml
new file mode 100644
index 0000000..c81f9a7
--- /dev/null
+++ b/pipelines/nf-atacseq/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/pipelines/nf-atacseq/CODE_OF_CONDUCT.md b/pipelines/nf-atacseq/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..3d60097
--- /dev/null
+++ b/pipelines/nf-atacseq/CODE_OF_CONDUCT.md
@@ -0,0 +1,36 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a welcoming experience for everyone.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment include:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+
+Examples of unacceptable behavior include:
+
+- Trolling, insulting or derogatory comments, and personal attacks
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement
+
+Instances of unacceptable behavior may be reported to the project team via
+[GitHub Issues](https://github.com/mcvickerlab/WASP2/issues).
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct/][v2.1].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct/
diff --git a/pipelines/nf-atacseq/conf/awsbatch.config b/pipelines/nf-atacseq/conf/awsbatch.config
new file mode 100644
index 0000000..b9b26fb
--- /dev/null
+++ b/pipelines/nf-atacseq/conf/awsbatch.config
@@ -0,0 +1,64 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ AWS Batch executor configuration — nf-atacseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to AWS Batch:
+
+ nextflow run main.nf -profile docker,awsbatch \
+ --awsbatch_queue \
+ -work-dir s3:///work [options]
+
+ Required runtime parameters:
+ --awsbatch_queue AWS Batch job queue name or ARN (no default — must be provided)
+
+ Optional runtime parameters:
+ --awsbatch_cli_path Path to AWS CLI inside the container (default: /home/ec2-user/miniconda/bin/aws)
+ --aws_region AWS region (default: us-east-1)
+ --max_cpus Max CPUs per process (default: 96)
+ --max_memory Max memory per process (default: 768.GB)
+ --max_time Max time per process (default: 168.h)
+
+ IMPORTANT: Use an S3 work directory (-work-dir s3://bucket/work) to ensure
+ intermediate files are accessible across Batch compute environments.
+
+ Example:
+ nextflow run main.nf \
+ -profile docker,awsbatch \
+ --awsbatch_queue my-batch-queue \
+ --outdir s3://my-bucket/results \
+ -work-dir s3://my-bucket/work
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'awsbatch'
+ queue = params.awsbatch_queue
+}
+
+params {
+ max_cpus = 96
+ max_memory = '768.GB'
+ max_time = '168.h'
+
+ // AWS Batch-specific (set at runtime, not hardcoded here)
+ awsbatch_queue = null // --awsbatch_queue my-queue (REQUIRED)
+ awsbatch_cli_path = '/home/ec2-user/miniconda/bin/aws'
+ aws_region = 'us-east-1'
+}
+
+aws {
+ region = params.aws_region ?: 'us-east-1'
+
+ batch {
+ cliPath = params.awsbatch_cli_path
+ }
+}
+
+// nf-core v3.x resource limits override for cloud
+process {
+ resourceLimits = [
+ cpus: 96,
+ memory: 768.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-atacseq/conf/base.config b/pipelines/nf-atacseq/conf/base.config
index b03da41..256971b 100644
--- a/pipelines/nf-atacseq/conf/base.config
+++ b/pipelines/nf-atacseq/conf/base.config
@@ -51,4 +51,11 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
+
+ // nf-core resource limits — replaces params.max_cpus/max_memory/max_time
+ resourceLimits = [
+ cpus: 16,
+ memory: 128.GB,
+ time: 240.h
+ ]
}
diff --git a/pipelines/nf-atacseq/conf/lsf.config b/pipelines/nf-atacseq/conf/lsf.config
new file mode 100644
index 0000000..8f2e680
--- /dev/null
+++ b/pipelines/nf-atacseq/conf/lsf.config
@@ -0,0 +1,50 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ LSF executor configuration — nf-atacseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to an IBM Spectrum LSF cluster:
+
+ nextflow run main.nf -profile singularity,lsf [options]
+
+ Optional runtime parameters:
+ --lsf_queue LSF queue name (default: 'normal')
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (Wellcome Sanger):
+ nextflow run main.nf \
+ -profile singularity,lsf \
+ --lsf_queue long \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'lsf'
+
+ // Optional: per-label queue routing
+ // withLabel:process_single { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_low { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_high { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_long { queue = params.lsf_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.lsf_queue ?: 'hugemem'}
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ lsf_queue = 'normal' // --lsf_queue long
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-atacseq/conf/pbs.config b/pipelines/nf-atacseq/conf/pbs.config
new file mode 100644
index 0000000..f36b46d
--- /dev/null
+++ b/pipelines/nf-atacseq/conf/pbs.config
@@ -0,0 +1,53 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PBS Pro executor configuration — nf-atacseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a PBS Pro cluster:
+
+ nextflow run main.nf -profile singularity,pbs [options]
+
+ Optional runtime parameters:
+ --pbs_account PBS account/project for job accounting (-A flag)
+ --pbs_queue Default queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example:
+ nextflow run main.nf \
+ -profile singularity,pbs \
+ --pbs_account mylab \
+ --pbs_queue workq \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'pbspro'
+ clusterOptions = {
+ def opts = []
+ if (params.pbs_account) opts << "-A ${params.pbs_account}"
+ if (params.pbs_queue) opts << "-q ${params.pbs_queue}"
+ opts.join(' ')
+ }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // PBS Pro-specific (set at runtime, not hardcoded here)
+ pbs_account = null // --pbs_account mylab
+ pbs_queue = null // --pbs_queue workq
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-atacseq/conf/sge.config b/pipelines/nf-atacseq/conf/sge.config
new file mode 100644
index 0000000..6f7b30e
--- /dev/null
+++ b/pipelines/nf-atacseq/conf/sge.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SGE/UGE executor configuration — nf-atacseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a Sun Grid Engine (SGE) or Univa Grid Engine (UGE) cluster:
+
+ nextflow run main.nf -profile singularity,sge [options]
+
+ Optional runtime parameters:
+ --sge_queue SGE queue name (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Note: SGE requires per-slot memory reservation (-l h_vmem).
+ Nextflow calculates this automatically as memory / cpus when executor = 'sge'.
+
+ Note: The 'smp' parallel environment name may differ on your cluster.
+ Common alternatives: 'orte', 'mpi', 'parallel', 'threaded'.
+ Check with: qconf -spl
+
+ Example (UCLA Hoffman2):
+ nextflow run main.nf \
+ -profile singularity,sge \
+ --sge_queue highp \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'sge'
+ penv = 'smp' // Parallel environment for multi-slot jobs
+
+ // Optional: per-label queue selection
+ // withLabel:process_single { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_low { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_medium { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high_memory { queue = params.sge_queue ?: 'highmem' }
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ sge_queue = null // --sge_queue all.q
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-atacseq/conf/slurm.config b/pipelines/nf-atacseq/conf/slurm.config
new file mode 100644
index 0000000..80fdceb
--- /dev/null
+++ b/pipelines/nf-atacseq/conf/slurm.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SLURM executor configuration — nf-atacseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a SLURM cluster:
+
+ nextflow run main.nf -profile singularity,slurm [options]
+
+ Optional runtime parameters:
+ --slurm_account SLURM account/project for job accounting (-A flag)
+ --slurm_queue Default partition/queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (NIH Biowulf):
+ nextflow run main.nf \
+ -profile singularity,slurm \
+ --slurm_account mylab \
+ --slurm_queue norm \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'slurm'
+ clusterOptions = { params.slurm_account ? "--account=${params.slurm_account}" : '' }
+
+ // Optional: per-label partition routing.
+ // Uncomment and customize partition names for your cluster.
+ // withLabel:process_single { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_low { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_high { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_long { queue = params.slurm_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.slurm_queue ?: 'highmem' }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // SLURM-specific (set at runtime, not hardcoded here)
+ slurm_account = null // --slurm_account mylab
+ slurm_queue = null // --slurm_queue norm
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-atacseq/modules.json b/pipelines/nf-atacseq/modules.json
new file mode 100644
index 0000000..0d78430
--- /dev/null
+++ b/pipelines/nf-atacseq/modules.json
@@ -0,0 +1,5 @@
+{
+ "name": "wasp2/nf-atacseq",
+ "homePage": "",
+ "repos": {}
+}
diff --git a/pipelines/nf-atacseq/nextflow.config b/pipelines/nf-atacseq/nextflow.config
index 1239242..e844cbf 100644
--- a/pipelines/nf-atacseq/nextflow.config
+++ b/pipelines/nf-atacseq/nextflow.config
@@ -65,6 +65,10 @@ params {
max_memory = '128.GB'
max_time = '240.h'
+ // Institutional config support (nf-core compatible)
+ custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
+ custom_config_version = 'master'
+
// Generic options
help = false
version = false
@@ -75,6 +79,13 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
+// Load nf-core institutional configs
+try {
+ includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+ System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+}
+
// Execution profiles
profiles {
debug {
@@ -97,6 +108,9 @@ profiles {
singularity {
singularity.enabled = true
singularity.autoMounts = true
+ singularity.cacheDir = System.getenv('NXF_SINGULARITY_CACHEDIR') ?: "${HOME}/.singularity/cache"
+ singularity.pullTimeout = '60 min'
+ singularity.envWhitelist = 'NTHREADS,OMP_NUM_THREADS'
conda.enabled = false
docker.enabled = false
}
@@ -112,6 +126,27 @@ profiles {
test_local {
includeConfig 'conf/test_local.config'
}
+
+ // HPC executor profiles — combine with container profile:
+ // nextflow run main.nf -profile singularity,slurm [options]
+ slurm {
+ includeConfig 'conf/slurm.config'
+ }
+ sge {
+ includeConfig 'conf/sge.config'
+ }
+ lsf {
+ includeConfig 'conf/lsf.config'
+ }
+ pbs {
+ includeConfig 'conf/pbs.config'
+ }
+
+ // Cloud executor profiles — combine with container profile:
+ // nextflow run main.nf -profile docker,awsbatch --awsbatch_queue -work-dir s3://bucket/work
+ awsbatch {
+ includeConfig 'conf/awsbatch.config'
+ }
}
// Execution reports
diff --git a/pipelines/nf-atacseq/nf-test.config b/pipelines/nf-atacseq/nf-test.config
index b1c1572..32f4307 100644
--- a/pipelines/nf-atacseq/nf-test.config
+++ b/pipelines/nf-atacseq/nf-test.config
@@ -1,10 +1,10 @@
config {
- testsDir "tests"
- workDir ".nf-test"
- configFile "nextflow.config"
+ // nf-core compliant nf-test configuration
+ testsDir "."
+ workDir System.getenv("NFT_WORKDIR") ?: ".nf-test"
+ configFile "tests/nextflow.config"
profile "test_stub,docker"
- // Global settings
stage {
copy "assets/**"
copy "conf/**"
@@ -12,5 +12,4 @@ config {
copy "subworkflows/**"
copy "workflows/**"
}
-
}
diff --git a/pipelines/nf-atacseq/tests/nextflow.config b/pipelines/nf-atacseq/tests/nextflow.config
new file mode 100644
index 0000000..979614b
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/nextflow.config
@@ -0,0 +1,9 @@
+/*
+ * Test configuration for nf-test
+ * Loaded via nf-test.config configFile directive
+ */
+params {
+ max_cpus = 2
+ max_memory = '6.GB'
+ max_time = '6.h'
+}
diff --git a/pipelines/nf-outrider/.gitattributes b/pipelines/nf-outrider/.gitattributes
new file mode 100644
index 0000000..ff938c9
--- /dev/null
+++ b/pipelines/nf-outrider/.gitattributes
@@ -0,0 +1,2 @@
+*.config linguist-language=Nextflow
+*.nf linguist-language=Nextflow
diff --git a/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/bug_report.yml b/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..c7c7908
--- /dev/null
+++ b/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,73 @@
+name: Bug Report
+description: Report a bug in wasp2/nf-outrider
+labels: ["bug"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to report a bug in **wasp2/nf-outrider**.
+ Please fill out the sections below to help us diagnose the issue.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of the bug
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+
+ - type: textarea
+ id: command
+ attributes:
+ label: Command used and target workflow
+ description: |
+ Please provide the full command you used to run the pipeline.
+ Include all parameters and profiles.
+ placeholder: |
+ nextflow run main.nf -profile docker,test --outdir results
+ validations:
+ required: true
+
+ - type: textarea
+ id: output
+ attributes:
+ label: Relevant output / error message
+ description: |
+ Please copy and paste the relevant output or error message.
+ This will be automatically formatted into code.
+ render: console
+ validations:
+ required: true
+
+ - type: textarea
+ id: system
+ attributes:
+ label: System information
+ description: |
+ Please provide details about your computing environment.
+ value: |
+ - Nextflow version:
+ - Container engine (Docker/Singularity/Conda):
+ - Container version:
+ - OS:
+ - Hardware (local/HPC/cloud):
+ validations:
+ required: true
+
+ - type: textarea
+ id: nextflow-version
+ attributes:
+ label: Nextflow log
+ description: |
+ Paste the output of `nextflow log last` or the relevant `.nextflow.log` contents.
+ render: console
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context about the problem here (screenshots, sample data, config files).
+ validations:
+ required: false
diff --git a/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/feature_request.yml b/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..f55f020
--- /dev/null
+++ b/pipelines/nf-outrider/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,51 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for wasp2/nf-outrider
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for suggesting a feature for **wasp2/nf-outrider**.
+ Please describe your idea clearly so we can evaluate it.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description
+ description: A clear and concise description of the feature you would like.
+ validations:
+ required: true
+
+ - type: textarea
+ id: problem
+ attributes:
+ label: Is your feature request related to a problem?
+ description: |
+ A clear and concise description of the problem.
+ e.g. "I'm always frustrated when [...]"
+ validations:
+ required: false
+
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: A clear and concise description of what you want to happen.
+ validations:
+ required: false
+
+ - type: textarea
+ id: alternatives
+ attributes:
+ label: Alternatives considered
+ description: A description of any alternative solutions or features you have considered.
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context, references, or screenshots about the feature request here.
+ validations:
+ required: false
diff --git a/pipelines/nf-outrider/.github/PULL_REQUEST_TEMPLATE.md b/pipelines/nf-outrider/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..80ee771
--- /dev/null
+++ b/pipelines/nf-outrider/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,29 @@
+## PR checklist
+
+- [ ] This comment contains a description of changes with context.
+- [ ] Tests pass locally with `nf-test test` using the `test,docker` profile.
+- [ ] If you have added new modules/processes, they include `nf-test` tests.
+- [ ] If applicable, new parameters are documented in `nextflow_schema.json`.
+- [ ] Pipeline runs successfully with `test` and `test_stub` profiles.
+- [ ] `CHANGELOG.md` is updated with noteworthy changes.
+
+## Description
+
+
+
+Fixes # (issue)
+
+## Type of change
+
+- [ ] Bug fix (non-breaking change that fixes an issue)
+- [ ] New feature (non-breaking change that adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+
+## How has this been tested?
+
+
+
+- [ ] `nf-test test --profile test,docker`
+- [ ] `nf-test test --profile test_stub,docker`
+- [ ] Manual pipeline run with real data
diff --git a/pipelines/nf-outrider/.github/workflows/ci.yml b/pipelines/nf-outrider/.github/workflows/ci.yml
new file mode 100644
index 0000000..cd352f6
--- /dev/null
+++ b/pipelines/nf-outrider/.github/workflows/ci.yml
@@ -0,0 +1,74 @@
+name: nf-outrider CI
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+ release:
+ types: [published]
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+env:
+ NXF_ANSI_LOG: false
+ NFT_VER: "0.9.2"
+ NFT_WORKDIR: ".nf-test"
+
+jobs:
+ test:
+ name: "nf-test | ${{ matrix.NXF_VER }} | ${{ matrix.profile }}"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ NXF_VER:
+ - "23.04.0"
+ - "latest-everything"
+ profile:
+ - "test,docker"
+ - "test_stub,docker"
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "${{ matrix.NXF_VER }}"
+
+ - name: Setup nf-test
+ uses: nf-core/setup-nf-test@v1
+ with:
+ version: "${{ env.NFT_VER }}"
+
+ - name: Cache nf-test
+ uses: actions/cache@v4
+ with:
+ path: |
+ ${{ env.NFT_WORKDIR }}
+ ~/.nextflow/assets
+ key: "${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-${{ hashFiles('**/*.nf') }}"
+ restore-keys: |
+ ${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-
+ ${{ runner.os }}-nftest-
+
+ - name: Run nf-test
+ run: |
+ nf-test test \
+ --profile=${{ matrix.profile }} \
+ --tap=test-output.tap \
+ --verbose
+
+ - name: Upload test output
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: "nf-test-output-${{ matrix.NXF_VER }}-${{ strategy.job-index }}"
+ path: test-output.tap
diff --git a/pipelines/nf-outrider/.github/workflows/linting.yml b/pipelines/nf-outrider/.github/workflows/linting.yml
new file mode 100644
index 0000000..a4b0ada
--- /dev/null
+++ b/pipelines/nf-outrider/.github/workflows/linting.yml
@@ -0,0 +1,56 @@
+name: nf-core linting
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+jobs:
+ nf-core-lint:
+ name: nf-core lint
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install nf-core tools
+ run: pip install nf-core
+
+ - name: Run nf-core lint
+ run: nf-core lint --dir . --fail-warned
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+
+ nextflow-config:
+ name: Nextflow config check
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Check Nextflow config
+ run: nextflow config -show-profiles
diff --git a/pipelines/nf-outrider/.gitignore b/pipelines/nf-outrider/.gitignore
new file mode 100644
index 0000000..a54086b
--- /dev/null
+++ b/pipelines/nf-outrider/.gitignore
@@ -0,0 +1,7 @@
+.nextflow*
+work/
+results/
+.nf-test/
+testing/
+testing*
+*.pyc
diff --git a/pipelines/nf-outrider/.nf-core.yml b/pipelines/nf-outrider/.nf-core.yml
index dc8c749..109d1da 100644
--- a/pipelines/nf-outrider/.nf-core.yml
+++ b/pipelines/nf-outrider/.nf-core.yml
@@ -2,6 +2,7 @@
# See: https://nf-co.re/docs/nf-core-tools/pipelines/lint
repository_type: pipeline
+nf_core_version: "3.0"
# nf-core template version this pipeline is based on
template:
diff --git a/pipelines/nf-outrider/.prettierignore b/pipelines/nf-outrider/.prettierignore
new file mode 100644
index 0000000..9cb26d6
--- /dev/null
+++ b/pipelines/nf-outrider/.prettierignore
@@ -0,0 +1,9 @@
+email_template.html
+*.config
+.nextflow*
+work/
+results/
+.nf-test/
+nextflow_schema.json
+*.nf
+assets/
diff --git a/pipelines/nf-outrider/.prettierrc.yml b/pipelines/nf-outrider/.prettierrc.yml
new file mode 100644
index 0000000..c81f9a7
--- /dev/null
+++ b/pipelines/nf-outrider/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/pipelines/nf-outrider/CODE_OF_CONDUCT.md b/pipelines/nf-outrider/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..3d60097
--- /dev/null
+++ b/pipelines/nf-outrider/CODE_OF_CONDUCT.md
@@ -0,0 +1,36 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a welcoming experience for everyone.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment include:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+
+Examples of unacceptable behavior include:
+
+- Trolling, insulting or derogatory comments, and personal attacks
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement
+
+Instances of unacceptable behavior may be reported to the project team via
+[GitHub Issues](https://github.com/mcvickerlab/WASP2/issues).
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct/][v2.1].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct/
diff --git a/pipelines/nf-outrider/conf/awsbatch.config b/pipelines/nf-outrider/conf/awsbatch.config
new file mode 100644
index 0000000..61fe689
--- /dev/null
+++ b/pipelines/nf-outrider/conf/awsbatch.config
@@ -0,0 +1,64 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ AWS Batch executor configuration — nf-outrider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to AWS Batch:
+
+ nextflow run main.nf -profile docker,awsbatch \
+ --awsbatch_queue \
+ -work-dir s3:///work [options]
+
+ Required runtime parameters:
+ --awsbatch_queue AWS Batch job queue name or ARN (no default — must be provided)
+
+ Optional runtime parameters:
+ --awsbatch_cli_path Path to AWS CLI inside the container (default: /home/ec2-user/miniconda/bin/aws)
+ --aws_region AWS region (default: us-east-1)
+ --max_cpus Max CPUs per process (default: 96)
+ --max_memory Max memory per process (default: 768.GB)
+ --max_time Max time per process (default: 168.h)
+
+ IMPORTANT: Use an S3 work directory (-work-dir s3://bucket/work) to ensure
+ intermediate files are accessible across Batch compute environments.
+
+ Example:
+ nextflow run main.nf \
+ -profile docker,awsbatch \
+ --awsbatch_queue my-batch-queue \
+ --outdir s3://my-bucket/results \
+ -work-dir s3://my-bucket/work
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'awsbatch'
+ queue = params.awsbatch_queue
+}
+
+params {
+ max_cpus = 96
+ max_memory = '768.GB'
+ max_time = '168.h'
+
+ // AWS Batch-specific (set at runtime, not hardcoded here)
+ awsbatch_queue = null // --awsbatch_queue my-queue (REQUIRED)
+ awsbatch_cli_path = '/home/ec2-user/miniconda/bin/aws'
+ aws_region = 'us-east-1'
+}
+
+aws {
+ region = params.aws_region ?: 'us-east-1'
+
+ batch {
+ cliPath = params.awsbatch_cli_path
+ }
+}
+
+// nf-core v3.x resource limits override for cloud
+process {
+ resourceLimits = [
+ cpus: 96,
+ memory: 768.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-outrider/conf/base.config b/pipelines/nf-outrider/conf/base.config
index d53f44f..8b076a3 100644
--- a/pipelines/nf-outrider/conf/base.config
+++ b/pipelines/nf-outrider/conf/base.config
@@ -13,7 +13,7 @@ process {
time = { check_max( 4.h * task.attempt, 'time' ) }
// Error handling
- errorStrategy = { task.exitStatus in [143,137,104,134,139,140] ? 'retry' : 'finish' }
+ errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 3
maxErrors = '-1'
@@ -51,4 +51,11 @@ process {
errorStrategy = 'retry'
maxRetries = 2
}
+
+ // nf-core resource limits — replaces params.max_cpus/max_memory/max_time
+ resourceLimits = [
+ cpus: 16,
+ memory: 200.GB,
+ time: 240.h
+ ]
}
diff --git a/pipelines/nf-outrider/conf/lsf.config b/pipelines/nf-outrider/conf/lsf.config
new file mode 100644
index 0000000..5cfffcb
--- /dev/null
+++ b/pipelines/nf-outrider/conf/lsf.config
@@ -0,0 +1,50 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ LSF executor configuration — nf-outrider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to an IBM Spectrum LSF cluster:
+
+ nextflow run main.nf -profile singularity,lsf [options]
+
+ Optional runtime parameters:
+ --lsf_queue LSF queue name (default: 'normal')
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (Wellcome Sanger):
+ nextflow run main.nf \
+ -profile singularity,lsf \
+ --lsf_queue long \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'lsf'
+
+ // Optional: per-label queue routing
+ // withLabel:process_single { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_low { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_high { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_long { queue = params.lsf_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.lsf_queue ?: 'hugemem'}
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ lsf_queue = 'normal' // --lsf_queue long
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-outrider/conf/pbs.config b/pipelines/nf-outrider/conf/pbs.config
new file mode 100644
index 0000000..3258a1d
--- /dev/null
+++ b/pipelines/nf-outrider/conf/pbs.config
@@ -0,0 +1,53 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PBS Pro executor configuration — nf-outrider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a PBS Pro cluster:
+
+ nextflow run main.nf -profile singularity,pbs [options]
+
+ Optional runtime parameters:
+ --pbs_account PBS account/project for job accounting (-A flag)
+ --pbs_queue Default queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example:
+ nextflow run main.nf \
+ -profile singularity,pbs \
+ --pbs_account mylab \
+ --pbs_queue workq \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'pbspro'
+ clusterOptions = {
+ def opts = []
+ if (params.pbs_account) opts << "-A ${params.pbs_account}"
+ if (params.pbs_queue) opts << "-q ${params.pbs_queue}"
+ opts.join(' ')
+ }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // PBS Pro-specific (set at runtime, not hardcoded here)
+ pbs_account = null // --pbs_account mylab
+ pbs_queue = null // --pbs_queue workq
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-outrider/conf/sge.config b/pipelines/nf-outrider/conf/sge.config
new file mode 100644
index 0000000..4e19f8b
--- /dev/null
+++ b/pipelines/nf-outrider/conf/sge.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SGE/UGE executor configuration — nf-outrider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a Sun Grid Engine (SGE) or Univa Grid Engine (UGE) cluster:
+
+ nextflow run main.nf -profile singularity,sge [options]
+
+ Optional runtime parameters:
+ --sge_queue SGE queue name (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Note: SGE requires per-slot memory reservation (-l h_vmem).
+ Nextflow calculates this automatically as memory / cpus when executor = 'sge'.
+
+ Note: The 'smp' parallel environment name may differ on your cluster.
+ Common alternatives: 'orte', 'mpi', 'parallel', 'threaded'.
+ Check with: qconf -spl
+
+ Example (UCLA Hoffman2):
+ nextflow run main.nf \
+ -profile singularity,sge \
+ --sge_queue highp \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'sge'
+ penv = 'smp' // Parallel environment for multi-slot jobs
+
+ // Optional: per-label queue selection
+ // withLabel:process_single { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_low { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_medium { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high_memory { queue = params.sge_queue ?: 'highmem' }
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ sge_queue = null // --sge_queue all.q
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-outrider/conf/slurm.config b/pipelines/nf-outrider/conf/slurm.config
new file mode 100644
index 0000000..106688a
--- /dev/null
+++ b/pipelines/nf-outrider/conf/slurm.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SLURM executor configuration — nf-outrider
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a SLURM cluster:
+
+ nextflow run main.nf -profile singularity,slurm [options]
+
+ Optional runtime parameters:
+ --slurm_account SLURM account/project for job accounting (-A flag)
+ --slurm_queue Default partition/queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (NIH Biowulf):
+ nextflow run main.nf \
+ -profile singularity,slurm \
+ --slurm_account mylab \
+ --slurm_queue norm \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'slurm'
+ clusterOptions = { params.slurm_account ? "--account=${params.slurm_account}" : '' }
+
+ // Optional: per-label partition routing.
+ // Uncomment and customize partition names for your cluster.
+ // withLabel:process_single { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_low { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_high { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_long { queue = params.slurm_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.slurm_queue ?: 'highmem' }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // SLURM-specific (set at runtime, not hardcoded here)
+ slurm_account = null // --slurm_account mylab
+ slurm_queue = null // --slurm_queue norm
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-outrider/modules.json b/pipelines/nf-outrider/modules.json
new file mode 100644
index 0000000..78f330d
--- /dev/null
+++ b/pipelines/nf-outrider/modules.json
@@ -0,0 +1,5 @@
+{
+ "name": "wasp2/nf-outrider",
+ "homePage": "",
+ "repos": {}
+}
diff --git a/pipelines/nf-outrider/nextflow.config b/pipelines/nf-outrider/nextflow.config
index d099899..a70e9d3 100644
--- a/pipelines/nf-outrider/nextflow.config
+++ b/pipelines/nf-outrider/nextflow.config
@@ -70,6 +70,10 @@ params {
max_memory = '128.GB'
max_time = '240.h'
+ // Institutional config support (nf-core compatible)
+ custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
+ custom_config_version = 'master'
+
// Generic options
help = false
version = false
@@ -80,6 +84,13 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
+// Load nf-core institutional configs
+try {
+ includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+ System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+}
+
// Execution profiles
profiles {
debug {
@@ -102,6 +113,9 @@ profiles {
singularity {
singularity.enabled = true
singularity.autoMounts = true
+ singularity.cacheDir = System.getenv('NXF_SINGULARITY_CACHEDIR') ?: "${HOME}/.singularity/cache"
+ singularity.pullTimeout = '60 min'
+ singularity.envWhitelist = 'NTHREADS,OMP_NUM_THREADS'
conda.enabled = false
docker.enabled = false
}
@@ -117,6 +131,27 @@ profiles {
test_local {
includeConfig 'conf/test_local.config'
}
+
+ // HPC executor profiles — combine with container profile:
+ // nextflow run main.nf -profile singularity,slurm [options]
+ slurm {
+ includeConfig 'conf/slurm.config'
+ }
+ sge {
+ includeConfig 'conf/sge.config'
+ }
+ lsf {
+ includeConfig 'conf/lsf.config'
+ }
+ pbs {
+ includeConfig 'conf/pbs.config'
+ }
+
+ // Cloud executor profiles — combine with container profile:
+ // nextflow run main.nf -profile docker,awsbatch --awsbatch_queue -work-dir s3://bucket/work
+ awsbatch {
+ includeConfig 'conf/awsbatch.config'
+ }
}
// Execution reports
diff --git a/pipelines/nf-outrider/nf-test.config b/pipelines/nf-outrider/nf-test.config
index 640ba2d..7f3b1e6 100644
--- a/pipelines/nf-outrider/nf-test.config
+++ b/pipelines/nf-outrider/nf-test.config
@@ -1,11 +1,7 @@
-/*
- * nf-test configuration for nf-outrider pipeline
- * Issue: #108
- */
-
config {
- testsDir "tests"
- workDir ".nf-test"
- configFile "nextflow.config"
+ // nf-core compliant nf-test configuration
+ testsDir "."
+ workDir System.getenv("NFT_WORKDIR") ?: ".nf-test"
+ configFile "tests/nextflow.config"
profile "test_stub,docker"
}
diff --git a/pipelines/nf-outrider/tests/nextflow.config b/pipelines/nf-outrider/tests/nextflow.config
new file mode 100644
index 0000000..979614b
--- /dev/null
+++ b/pipelines/nf-outrider/tests/nextflow.config
@@ -0,0 +1,9 @@
+/*
+ * Test configuration for nf-test
+ * Loaded via nf-test.config configFile directive
+ */
+params {
+ max_cpus = 2
+ max_memory = '6.GB'
+ max_time = '6.h'
+}
diff --git a/pipelines/nf-rnaseq/.gitattributes b/pipelines/nf-rnaseq/.gitattributes
new file mode 100644
index 0000000..ff938c9
--- /dev/null
+++ b/pipelines/nf-rnaseq/.gitattributes
@@ -0,0 +1,2 @@
+*.config linguist-language=Nextflow
+*.nf linguist-language=Nextflow
diff --git a/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/bug_report.yml b/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..e69051b
--- /dev/null
+++ b/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,73 @@
+name: Bug Report
+description: Report a bug in wasp2/nf-rnaseq
+labels: ["bug"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to report a bug in **wasp2/nf-rnaseq**.
+ Please fill out the sections below to help us diagnose the issue.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of the bug
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+
+ - type: textarea
+ id: command
+ attributes:
+ label: Command used and target workflow
+ description: |
+ Please provide the full command you used to run the pipeline.
+ Include all parameters and profiles.
+ placeholder: |
+ nextflow run main.nf -profile docker,test --outdir results
+ validations:
+ required: true
+
+ - type: textarea
+ id: output
+ attributes:
+ label: Relevant output / error message
+ description: |
+ Please copy and paste the relevant output or error message.
+ This will be automatically formatted into code.
+ render: console
+ validations:
+ required: true
+
+ - type: textarea
+ id: system
+ attributes:
+ label: System information
+ description: |
+ Please provide details about your computing environment.
+ value: |
+ - Nextflow version:
+ - Container engine (Docker/Singularity/Conda):
+ - Container version:
+ - OS:
+ - Hardware (local/HPC/cloud):
+ validations:
+ required: true
+
+ - type: textarea
+ id: nextflow-version
+ attributes:
+ label: Nextflow log
+ description: |
+ Paste the output of `nextflow log last` or the relevant `.nextflow.log` contents.
+ render: console
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context about the problem here (screenshots, sample data, config files).
+ validations:
+ required: false
diff --git a/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/feature_request.yml b/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..0a80a73
--- /dev/null
+++ b/pipelines/nf-rnaseq/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,51 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for wasp2/nf-rnaseq
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for suggesting a feature for **wasp2/nf-rnaseq**.
+ Please describe your idea clearly so we can evaluate it.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description
+ description: A clear and concise description of the feature you would like.
+ validations:
+ required: true
+
+ - type: textarea
+ id: problem
+ attributes:
+ label: Is your feature request related to a problem?
+ description: |
+ A clear and concise description of the problem.
+ e.g. "I'm always frustrated when [...]"
+ validations:
+ required: false
+
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: A clear and concise description of what you want to happen.
+ validations:
+ required: false
+
+ - type: textarea
+ id: alternatives
+ attributes:
+ label: Alternatives considered
+ description: A description of any alternative solutions or features you have considered.
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context, references, or screenshots about the feature request here.
+ validations:
+ required: false
diff --git a/pipelines/nf-rnaseq/.github/PULL_REQUEST_TEMPLATE.md b/pipelines/nf-rnaseq/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..80ee771
--- /dev/null
+++ b/pipelines/nf-rnaseq/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,29 @@
+## PR checklist
+
+- [ ] This comment contains a description of changes with context.
+- [ ] Tests pass locally with `nf-test test` using the `test,docker` profile.
+- [ ] If you have added new modules/processes, they include `nf-test` tests.
+- [ ] If applicable, new parameters are documented in `nextflow_schema.json`.
+- [ ] Pipeline runs successfully with `test` and `test_stub` profiles.
+- [ ] `CHANGELOG.md` is updated with noteworthy changes.
+
+## Description
+
+
+
+Fixes # (issue)
+
+## Type of change
+
+- [ ] Bug fix (non-breaking change that fixes an issue)
+- [ ] New feature (non-breaking change that adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+
+## How has this been tested?
+
+
+
+- [ ] `nf-test test --profile test,docker`
+- [ ] `nf-test test --profile test_stub,docker`
+- [ ] Manual pipeline run with real data
diff --git a/pipelines/nf-rnaseq/.github/workflows/ci.yml b/pipelines/nf-rnaseq/.github/workflows/ci.yml
new file mode 100644
index 0000000..a697d3a
--- /dev/null
+++ b/pipelines/nf-rnaseq/.github/workflows/ci.yml
@@ -0,0 +1,74 @@
+name: nf-rnaseq CI
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+ release:
+ types: [published]
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+env:
+ NXF_ANSI_LOG: false
+ NFT_VER: "0.9.2"
+ NFT_WORKDIR: ".nf-test"
+
+jobs:
+ test:
+ name: "nf-test | ${{ matrix.NXF_VER }} | ${{ matrix.profile }}"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ NXF_VER:
+ - "23.04.0"
+ - "latest-everything"
+ profile:
+ - "test,docker"
+ - "test_stub,docker"
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "${{ matrix.NXF_VER }}"
+
+ - name: Setup nf-test
+ uses: nf-core/setup-nf-test@v1
+ with:
+ version: "${{ env.NFT_VER }}"
+
+ - name: Cache nf-test
+ uses: actions/cache@v4
+ with:
+ path: |
+ ${{ env.NFT_WORKDIR }}
+ ~/.nextflow/assets
+ key: "${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-${{ hashFiles('**/*.nf') }}"
+ restore-keys: |
+ ${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-
+ ${{ runner.os }}-nftest-
+
+ - name: Run nf-test
+ run: |
+ nf-test test \
+ --profile=${{ matrix.profile }} \
+ --tap=test-output.tap \
+ --verbose
+
+ - name: Upload test output
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: "nf-test-output-${{ matrix.NXF_VER }}-${{ strategy.job-index }}"
+ path: test-output.tap
diff --git a/pipelines/nf-rnaseq/.github/workflows/linting.yml b/pipelines/nf-rnaseq/.github/workflows/linting.yml
new file mode 100644
index 0000000..a4b0ada
--- /dev/null
+++ b/pipelines/nf-rnaseq/.github/workflows/linting.yml
@@ -0,0 +1,56 @@
+name: nf-core linting
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+jobs:
+ nf-core-lint:
+ name: nf-core lint
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install nf-core tools
+ run: pip install nf-core
+
+ - name: Run nf-core lint
+ run: nf-core lint --dir . --fail-warned
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+
+ nextflow-config:
+ name: Nextflow config check
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Check Nextflow config
+ run: nextflow config -show-profiles
diff --git a/pipelines/nf-rnaseq/.gitignore b/pipelines/nf-rnaseq/.gitignore
new file mode 100644
index 0000000..a54086b
--- /dev/null
+++ b/pipelines/nf-rnaseq/.gitignore
@@ -0,0 +1,7 @@
+.nextflow*
+work/
+results/
+.nf-test/
+testing/
+testing*
+*.pyc
diff --git a/pipelines/nf-rnaseq/.nf-core.yml b/pipelines/nf-rnaseq/.nf-core.yml
index 8f9cc6c..17b2a9d 100644
--- a/pipelines/nf-rnaseq/.nf-core.yml
+++ b/pipelines/nf-rnaseq/.nf-core.yml
@@ -2,6 +2,7 @@
# See: https://nf-co.re/docs/nf-core-tools/pipelines/lint
repository_type: pipeline
+nf_core_version: "3.0"
# nf-core template version this pipeline is based on
template:
diff --git a/pipelines/nf-rnaseq/.prettierignore b/pipelines/nf-rnaseq/.prettierignore
new file mode 100644
index 0000000..9cb26d6
--- /dev/null
+++ b/pipelines/nf-rnaseq/.prettierignore
@@ -0,0 +1,9 @@
+email_template.html
+*.config
+.nextflow*
+work/
+results/
+.nf-test/
+nextflow_schema.json
+*.nf
+assets/
diff --git a/pipelines/nf-rnaseq/.prettierrc.yml b/pipelines/nf-rnaseq/.prettierrc.yml
new file mode 100644
index 0000000..c81f9a7
--- /dev/null
+++ b/pipelines/nf-rnaseq/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/pipelines/nf-rnaseq/CODE_OF_CONDUCT.md b/pipelines/nf-rnaseq/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..3d60097
--- /dev/null
+++ b/pipelines/nf-rnaseq/CODE_OF_CONDUCT.md
@@ -0,0 +1,36 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a welcoming experience for everyone.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment include:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+
+Examples of unacceptable behavior include:
+
+- Trolling, insulting or derogatory comments, and personal attacks
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement
+
+Instances of unacceptable behavior may be reported to the project team via
+[GitHub Issues](https://github.com/mcvickerlab/WASP2/issues).
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct/][v2.1].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct/
diff --git a/pipelines/nf-rnaseq/assets/multiqc_config.yml b/pipelines/nf-rnaseq/assets/multiqc_config.yml
new file mode 100644
index 0000000..24877d0
--- /dev/null
+++ b/pipelines/nf-rnaseq/assets/multiqc_config.yml
@@ -0,0 +1,80 @@
+# MultiQC configuration for nf-rnaseq
+
+report_comment: >
+ This report has been generated by the nf-rnaseq
+ pipeline. It summarizes QC metrics from RNA-seq allelic imbalance analysis with WASP2.
+
+report_section_order:
+ software_versions:
+ order: -1000
+ nf-rnaseq-methods-description:
+ order: -1001
+
+export_plots: true
+
+custom_logo: null
+custom_logo_url: null
+custom_logo_title: null
+
+# Module order - RNA-seq processing order
+module_order:
+ - fastqc
+ - fastp
+ - star
+ - salmon
+ - featurecounts
+ - samtools
+ - picard
+
+# Top modules to display
+top_modules:
+ - fastqc
+ - star
+ - salmon
+
+# Table columns
+table_columns_visible:
+ FastQC:
+ percent_duplicates: True
+ percent_gc: True
+ avg_sequence_length: True
+ total_sequences: True
+ STAR:
+ uniquely_mapped_percent: True
+ mismatch_rate: True
+ Salmon:
+ percent_mapped: True
+ num_mapped: True
+ Samtools:
+ mapped_passed: True
+ mapped_passed_pct: True
+
+# Plot defaults
+plots_force_flat: False
+plots_force_interactive: True
+
+# Sample name cleaning
+fn_clean_sample_names: true
+fn_clean_exts:
+ - '.fastp'
+ - '.sorted'
+ - '.markdup'
+ - '.wasp_filt'
+ - '_fastqc'
+ - '.bam'
+ - '.sam'
+
+# Extra config
+extra_fn_clean_exts:
+ - type: 'truncate'
+ pattern: '_R1'
+ - type: 'truncate'
+ pattern: '_R2'
+ - type: 'truncate'
+ pattern: '_1'
+ - type: 'truncate'
+ pattern: '_2'
+
+# General settings
+show_analysis_paths: false
+show_analysis_time: false
diff --git a/pipelines/nf-rnaseq/conf/awsbatch.config b/pipelines/nf-rnaseq/conf/awsbatch.config
new file mode 100644
index 0000000..129f5bf
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/awsbatch.config
@@ -0,0 +1,64 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ AWS Batch executor configuration — nf-rnaseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to AWS Batch:
+
+ nextflow run main.nf -profile docker,awsbatch \
+ --awsbatch_queue \
+ -work-dir s3:///work [options]
+
+ Required runtime parameters:
+ --awsbatch_queue AWS Batch job queue name or ARN (no default — must be provided)
+
+ Optional runtime parameters:
+ --awsbatch_cli_path Path to AWS CLI inside the container (default: /home/ec2-user/miniconda/bin/aws)
+ --aws_region AWS region (default: us-east-1)
+ --max_cpus Max CPUs per process (default: 96)
+ --max_memory Max memory per process (default: 768.GB)
+ --max_time Max time per process (default: 168.h)
+
+ IMPORTANT: Use an S3 work directory (-work-dir s3://bucket/work) to ensure
+ intermediate files are accessible across Batch compute environments.
+
+ Example:
+ nextflow run main.nf \
+ -profile docker,awsbatch \
+ --awsbatch_queue my-batch-queue \
+ --outdir s3://my-bucket/results \
+ -work-dir s3://my-bucket/work
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'awsbatch'
+ queue = params.awsbatch_queue
+}
+
+params {
+ max_cpus = 96
+ max_memory = '768.GB'
+ max_time = '168.h'
+
+ // AWS Batch-specific (set at runtime, not hardcoded here)
+ awsbatch_queue = null // --awsbatch_queue my-queue (REQUIRED)
+ awsbatch_cli_path = '/home/ec2-user/miniconda/bin/aws'
+ aws_region = 'us-east-1'
+}
+
+aws {
+ region = params.aws_region ?: 'us-east-1'
+
+ batch {
+ cliPath = params.awsbatch_cli_path
+ }
+}
+
+// nf-core v3.x resource limits override for cloud
+process {
+ resourceLimits = [
+ cpus: 96,
+ memory: 768.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/conf/base.config b/pipelines/nf-rnaseq/conf/base.config
index d35d691..9fefe96 100644
--- a/pipelines/nf-rnaseq/conf/base.config
+++ b/pipelines/nf-rnaseq/conf/base.config
@@ -16,6 +16,12 @@ process {
maxErrors = '-1'
// Process labels for resource allocation
+ withLabel:process_single {
+ cpus = { check_max( 1, 'cpus' ) }
+ memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+ time = { check_max( 4.h * task.attempt, 'time' ) }
+ }
+
withLabel:process_low {
cpus = { check_max(2 * task.attempt, 'cpus') }
memory = { check_max(8.GB * task.attempt, 'memory') }
@@ -96,4 +102,11 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
+
+ // nf-core resource limits — replaces params.max_cpus/max_memory/max_time
+ resourceLimits = [
+ cpus: 16,
+ memory: 128.GB,
+ time: 240.h
+ ]
}
diff --git a/pipelines/nf-rnaseq/conf/lsf.config b/pipelines/nf-rnaseq/conf/lsf.config
new file mode 100644
index 0000000..e73b193
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/lsf.config
@@ -0,0 +1,49 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ LSF executor configuration — nf-rnaseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to an IBM Spectrum LSF cluster:
+
+ nextflow run main.nf -profile singularity,lsf [options]
+
+ Optional runtime parameters:
+ --lsf_queue LSF queue name (default: 'normal')
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (Wellcome Sanger):
+ nextflow run main.nf \
+ -profile singularity,lsf \
+ --lsf_queue long \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'lsf'
+
+ // Optional: per-label queue routing
+ // withLabel:process_low { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_high { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_long { queue = params.lsf_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.lsf_queue ?: 'hugemem'}
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ lsf_queue = 'normal' // --lsf_queue long
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/conf/pbs.config b/pipelines/nf-rnaseq/conf/pbs.config
new file mode 100644
index 0000000..ec3b645
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/pbs.config
@@ -0,0 +1,53 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PBS Pro executor configuration — nf-rnaseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a PBS Pro cluster:
+
+ nextflow run main.nf -profile singularity,pbs [options]
+
+ Optional runtime parameters:
+ --pbs_account PBS account/project for job accounting (-A flag)
+ --pbs_queue Default queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example:
+ nextflow run main.nf \
+ -profile singularity,pbs \
+ --pbs_account mylab \
+ --pbs_queue workq \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'pbspro'
+ clusterOptions = {
+ def opts = []
+ if (params.pbs_account) opts << "-A ${params.pbs_account}"
+ if (params.pbs_queue) opts << "-q ${params.pbs_queue}"
+ opts.join(' ')
+ }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // PBS Pro-specific (set at runtime, not hardcoded here)
+ pbs_account = null // --pbs_account mylab
+ pbs_queue = null // --pbs_queue workq
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/conf/sge.config b/pipelines/nf-rnaseq/conf/sge.config
new file mode 100644
index 0000000..fa058d9
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/sge.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SGE/UGE executor configuration — nf-rnaseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a Sun Grid Engine (SGE) or Univa Grid Engine (UGE) cluster:
+
+ nextflow run main.nf -profile singularity,sge [options]
+
+ Optional runtime parameters:
+ --sge_queue SGE queue name (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Note: SGE requires per-slot memory reservation (-l h_vmem).
+ Nextflow calculates this automatically as memory / cpus when executor = 'sge'.
+
+ Note: The 'smp' parallel environment name may differ on your cluster.
+ Common alternatives: 'orte', 'mpi', 'parallel', 'threaded'.
+ Check with: qconf -spl
+
+ Example (UCLA Hoffman2):
+ nextflow run main.nf \
+ -profile singularity,sge \
+ --sge_queue highp \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'sge'
+ penv = 'smp' // Parallel environment for multi-slot jobs
+
+ // Optional: per-label queue selection
+ // withLabel:process_single { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_low { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_medium { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high_memory { queue = params.sge_queue ?: 'highmem' }
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ sge_queue = null // --sge_queue all.q
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/conf/slurm.config b/pipelines/nf-rnaseq/conf/slurm.config
new file mode 100644
index 0000000..abe857d
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/slurm.config
@@ -0,0 +1,57 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SLURM executor configuration — nf-rnaseq
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a SLURM cluster:
+
+ nextflow run main.nf -profile singularity,slurm [options]
+
+ Optional runtime parameters:
+ --slurm_account SLURM account/project for job accounting (-A flag)
+ --slurm_queue Default partition/queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (NIH Biowulf):
+ nextflow run main.nf \
+ -profile singularity,slurm \
+ --slurm_account mylab \
+ --slurm_queue norm \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'slurm'
+ clusterOptions = { params.slurm_account ? "--account=${params.slurm_account}" : '' }
+
+ // Optional: per-label partition routing.
+ // Uncomment and customize partition names for your cluster.
+ // withLabel:process_single { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_low { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_high { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_long { queue = params.slurm_queue ?: 'long' }
+ // withLabel:process_high_memory { queue = params.slurm_queue ?: 'highmem' }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // SLURM-specific (set at runtime, not hardcoded here)
+ slurm_account = null // --slurm_account mylab
+ slurm_queue = null // --slurm_queue norm
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/conf/test_full.config b/pipelines/nf-rnaseq/conf/test_full.config
new file mode 100644
index 0000000..1ebd3b5
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/test_full.config
@@ -0,0 +1,24 @@
+/*
+========================================================================================
+ Full-size Test configuration for WASP2 RNA-seq ASE Pipeline
+========================================================================================
+ Full-size test dataset to check pipeline function with realistic data volumes.
+ Not suitable for CI runners - intended for manual validation on HPC or cloud.
+----------------------------------------------------------------------------------------
+*/
+
+params {
+ config_profile_name = 'Full test profile'
+ config_profile_description = 'Full-size test dataset to check pipeline function'
+
+ // Higher resource limits for full-size data
+ max_cpus = 16
+ max_memory = '128.GB'
+ max_time = '48.h'
+
+ // TODO: replace with full-size test data paths when available
+ input = null
+ vcf = null
+ star_index = null
+ gtf = null
+}
diff --git a/pipelines/nf-rnaseq/conf/test_local.config b/pipelines/nf-rnaseq/conf/test_local.config
new file mode 100644
index 0000000..5eaae60
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/test_local.config
@@ -0,0 +1,27 @@
+/*
+========================================================================================
+ Local Test configuration for WASP2 RNA-seq ASE Pipeline
+========================================================================================
+ Reduced resource configuration for running tests on a local workstation.
+ Combines minimal test data with constrained resource limits.
+----------------------------------------------------------------------------------------
+*/
+
+params {
+ config_profile_name = 'Local test profile'
+ config_profile_description = 'Minimal test dataset for local execution'
+
+ // Reduced resources for local workstations
+ max_cpus = 2
+ max_memory = '4.GB'
+ max_time = '2.h'
+
+ // Test data paths - uses integration mini-genome data
+ input = "${projectDir}/tests/data/integration/samplesheet_integration.csv"
+ vcf = "${projectDir}/tests/data/integration/integration.vcf.gz"
+ star_index = "${projectDir}/tests/data/integration/star_index"
+ gtf = "${projectDir}/tests/data/integration/integration.gtf"
+
+ // Lower thresholds for small test dataset
+ min_count = 1
+}
diff --git a/pipelines/nf-rnaseq/conf/test_stub.config b/pipelines/nf-rnaseq/conf/test_stub.config
new file mode 100644
index 0000000..c7cac09
--- /dev/null
+++ b/pipelines/nf-rnaseq/conf/test_stub.config
@@ -0,0 +1,29 @@
+/*
+========================================================================================
+ Stub-run Test configuration for WASP2 RNA-seq ASE Pipeline
+========================================================================================
+ Enables stub-run mode for fast CI testing without executing real processes.
+ Used by nf-test via the test_stub profile.
+----------------------------------------------------------------------------------------
+*/
+
+stubRun = true
+
+params {
+ config_profile_name = 'Test stub profile'
+ config_profile_description = 'Stub-run test dataset to check pipeline wiring'
+
+ // Minimal resources for stub runs
+ max_cpus = 1
+ max_memory = '2.GB'
+ max_time = '1.h'
+
+ // Test data paths - uses integration mini-genome data
+ input = "${projectDir}/tests/data/integration/samplesheet_integration.csv"
+ vcf = "${projectDir}/tests/data/integration/integration.vcf.gz"
+ star_index = "${projectDir}/tests/data/integration/star_index"
+ gtf = "${projectDir}/tests/data/integration/integration.gtf"
+
+ // Lower thresholds for test dataset
+ min_count = 1
+}
diff --git a/pipelines/nf-rnaseq/modules.json b/pipelines/nf-rnaseq/modules.json
new file mode 100644
index 0000000..a832d71
--- /dev/null
+++ b/pipelines/nf-rnaseq/modules.json
@@ -0,0 +1,5 @@
+{
+ "name": "wasp2/nf-rnaseq",
+ "homePage": "https://github.com/mcvickerlab/WASP2",
+ "repos": {}
+}
diff --git a/pipelines/nf-rnaseq/nextflow.config b/pipelines/nf-rnaseq/nextflow.config
index 9f36916..f26865d 100644
--- a/pipelines/nf-rnaseq/nextflow.config
+++ b/pipelines/nf-rnaseq/nextflow.config
@@ -43,12 +43,26 @@ params {
max_cpus = 16
max_memory = '128.GB'
max_time = '240.h'
+
+ // Trace directory
+ tracedir = "${params.outdir}/pipeline_info"
+
+ // Institutional config support (nf-core compatible)
+ custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
+ custom_config_version = 'master'
}
// Load config files
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
+// Load nf-core institutional configs
+try {
+ includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+ System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+}
+
profiles {
debug {
dumpHashes = true
@@ -73,6 +87,9 @@ profiles {
singularity {
singularity.enabled = true
singularity.autoMounts = true
+ singularity.cacheDir = System.getenv('NXF_SINGULARITY_CACHEDIR') ?: "${HOME}/.singularity/cache"
+ singularity.pullTimeout = '60 min'
+ singularity.envWhitelist = 'NTHREADS,OMP_NUM_THREADS'
conda.enabled = false
docker.enabled = false
}
@@ -82,12 +99,41 @@ profiles {
}
test_stub {
- includeConfig 'conf/test.config'
+ includeConfig 'conf/test_stub.config'
+ }
+
+ test_full {
+ includeConfig 'conf/test_full.config'
+ }
+
+ test_local {
+ includeConfig 'conf/test_local.config'
}
test_integration {
includeConfig 'conf/test_integration.config'
}
+
+ // HPC executor profiles — combine with container profile:
+ // nextflow run main.nf -profile singularity,slurm [options]
+ slurm {
+ includeConfig 'conf/slurm.config'
+ }
+ sge {
+ includeConfig 'conf/sge.config'
+ }
+ lsf {
+ includeConfig 'conf/lsf.config'
+ }
+ pbs {
+ includeConfig 'conf/pbs.config'
+ }
+
+ // Cloud executor profiles — combine with container profile:
+ // nextflow run main.nf -profile docker,awsbatch --awsbatch_queue -work-dir s3://bucket/work
+ awsbatch {
+ includeConfig 'conf/awsbatch.config'
+ }
}
// Container overrides
@@ -110,22 +156,29 @@ def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
- file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
+ file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
}
report {
enabled = true
- file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
+ file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
}
trace {
enabled = true
- file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
+ file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
}
dag {
enabled = true
- file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html"
+ file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
+}
+
+// Export these variables to prevent local Python/Perl libs from conflicting
+env {
+ PYTHONNOUSERSITE = 1
+ R_PROFILE_USER = "/.Rprofile"
+ R_ENVIRON_USER = "/.Renviron"
}
// Pipeline manifest
@@ -135,7 +188,7 @@ manifest {
homePage = 'https://github.com/mcvickerlab/WASP2'
description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2'
mainScript = 'main.nf'
- nextflowVersion = '!>=22.10.0'
+ nextflowVersion = '!>=23.04.0'
version = '1.0.0'
}
diff --git a/pipelines/nf-rnaseq/nf-test.config b/pipelines/nf-rnaseq/nf-test.config
index d6580ea..7f3b1e6 100644
--- a/pipelines/nf-rnaseq/nf-test.config
+++ b/pipelines/nf-rnaseq/nf-test.config
@@ -1,13 +1,7 @@
config {
- // Test configuration for WASP2 RNA-seq ASE Pipeline
- testsDir "tests"
-
- // Location of test data
- configFile "nextflow.config"
-
- // Profile for running tests
+ // nf-core compliant nf-test configuration
+ testsDir "."
+ workDir System.getenv("NFT_WORKDIR") ?: ".nf-test"
+ configFile "tests/nextflow.config"
profile "test_stub,docker"
-
- // Work directory for test outputs
- workDir ".nf-test"
}
diff --git a/pipelines/nf-rnaseq/tests/nextflow.config b/pipelines/nf-rnaseq/tests/nextflow.config
new file mode 100644
index 0000000..979614b
--- /dev/null
+++ b/pipelines/nf-rnaseq/tests/nextflow.config
@@ -0,0 +1,9 @@
+/*
+ * Test configuration for nf-test
+ * Loaded via nf-test.config configFile directive
+ */
+params {
+ max_cpus = 2
+ max_memory = '6.GB'
+ max_time = '6.h'
+}
diff --git a/pipelines/nf-scatac/.gitattributes b/pipelines/nf-scatac/.gitattributes
new file mode 100644
index 0000000..ff938c9
--- /dev/null
+++ b/pipelines/nf-scatac/.gitattributes
@@ -0,0 +1,2 @@
+*.config linguist-language=Nextflow
+*.nf linguist-language=Nextflow
diff --git a/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/bug_report.yml b/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..76f2bed
--- /dev/null
+++ b/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,73 @@
+name: Bug Report
+description: Report a bug in wasp2/nf-scatac
+labels: ["bug"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for taking the time to report a bug in **wasp2/nf-scatac**.
+ Please fill out the sections below to help us diagnose the issue.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description of the bug
+ description: A clear and concise description of what the bug is.
+ validations:
+ required: true
+
+ - type: textarea
+ id: command
+ attributes:
+ label: Command used and target workflow
+ description: |
+ Please provide the full command you used to run the pipeline.
+ Include all parameters and profiles.
+ placeholder: |
+ nextflow run main.nf -profile docker,test --outdir results
+ validations:
+ required: true
+
+ - type: textarea
+ id: output
+ attributes:
+ label: Relevant output / error message
+ description: |
+ Please copy and paste the relevant output or error message.
+ This will be automatically formatted into code.
+ render: console
+ validations:
+ required: true
+
+ - type: textarea
+ id: system
+ attributes:
+ label: System information
+ description: |
+ Please provide details about your computing environment.
+ value: |
+ - Nextflow version:
+ - Container engine (Docker/Singularity/Conda):
+ - Container version:
+ - OS:
+ - Hardware (local/HPC/cloud):
+ validations:
+ required: true
+
+ - type: textarea
+ id: nextflow-version
+ attributes:
+ label: Nextflow log
+ description: |
+ Paste the output of `nextflow log last` or the relevant `.nextflow.log` contents.
+ render: console
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context about the problem here (screenshots, sample data, config files).
+ validations:
+ required: false
diff --git a/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/feature_request.yml b/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..01deb15
--- /dev/null
+++ b/pipelines/nf-scatac/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,51 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for wasp2/nf-scatac
+labels: ["enhancement"]
+body:
+ - type: markdown
+ attributes:
+ value: |
+ Thanks for suggesting a feature for **wasp2/nf-scatac**.
+ Please describe your idea clearly so we can evaluate it.
+
+ - type: textarea
+ id: description
+ attributes:
+ label: Description
+ description: A clear and concise description of the feature you would like.
+ validations:
+ required: true
+
+ - type: textarea
+ id: problem
+ attributes:
+ label: Is your feature request related to a problem?
+ description: |
+ A clear and concise description of the problem.
+ e.g. "I'm always frustrated when [...]"
+ validations:
+ required: false
+
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: A clear and concise description of what you want to happen.
+ validations:
+ required: false
+
+ - type: textarea
+ id: alternatives
+ attributes:
+ label: Alternatives considered
+ description: A description of any alternative solutions or features you have considered.
+ validations:
+ required: false
+
+ - type: textarea
+ id: additional
+ attributes:
+ label: Additional context
+ description: Add any other context, references, or screenshots about the feature request here.
+ validations:
+ required: false
diff --git a/pipelines/nf-scatac/.github/PULL_REQUEST_TEMPLATE.md b/pipelines/nf-scatac/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..80ee771
--- /dev/null
+++ b/pipelines/nf-scatac/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,29 @@
+## PR checklist
+
+- [ ] This comment contains a description of changes with context.
+- [ ] Tests pass locally with `nf-test test` using the `test,docker` profile.
+- [ ] If you have added new modules/processes, they include `nf-test` tests.
+- [ ] If applicable, new parameters are documented in `nextflow_schema.json`.
+- [ ] Pipeline runs successfully with `test` and `test_stub` profiles.
+- [ ] `CHANGELOG.md` is updated with noteworthy changes.
+
+## Description
+
+
+
+Fixes # (issue)
+
+## Type of change
+
+- [ ] Bug fix (non-breaking change that fixes an issue)
+- [ ] New feature (non-breaking change that adds functionality)
+- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
+- [ ] Documentation update
+
+## How has this been tested?
+
+
+
+- [ ] `nf-test test --profile test,docker`
+- [ ] `nf-test test --profile test_stub,docker`
+- [ ] Manual pipeline run with real data
diff --git a/pipelines/nf-scatac/.github/workflows/ci.yml b/pipelines/nf-scatac/.github/workflows/ci.yml
new file mode 100644
index 0000000..4a58487
--- /dev/null
+++ b/pipelines/nf-scatac/.github/workflows/ci.yml
@@ -0,0 +1,74 @@
+name: nf-scatac CI
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+ release:
+ types: [published]
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+env:
+ NXF_ANSI_LOG: false
+ NFT_VER: "0.9.2"
+ NFT_WORKDIR: ".nf-test"
+
+jobs:
+ test:
+ name: "nf-test | ${{ matrix.NXF_VER }} | ${{ matrix.profile }}"
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ NXF_VER:
+ - "23.04.0"
+ - "latest-everything"
+ profile:
+ - "test,docker"
+ - "test_stub,docker"
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "${{ matrix.NXF_VER }}"
+
+ - name: Setup nf-test
+ uses: nf-core/setup-nf-test@v1
+ with:
+ version: "${{ env.NFT_VER }}"
+
+ - name: Cache nf-test
+ uses: actions/cache@v4
+ with:
+ path: |
+ ${{ env.NFT_WORKDIR }}
+ ~/.nextflow/assets
+ key: "${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-${{ hashFiles('**/*.nf') }}"
+ restore-keys: |
+ ${{ runner.os }}-nftest-${{ hashFiles('nf-test.config') }}-
+ ${{ runner.os }}-nftest-
+
+ - name: Run nf-test
+ run: |
+ nf-test test \
+ --profile=${{ matrix.profile }} \
+ --tap=test-output.tap \
+ --verbose
+
+ - name: Upload test output
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: "nf-test-output-${{ matrix.NXF_VER }}-${{ strategy.job-index }}"
+ path: test-output.tap
diff --git a/pipelines/nf-scatac/.github/workflows/linting.yml b/pipelines/nf-scatac/.github/workflows/linting.yml
new file mode 100644
index 0000000..a4b0ada
--- /dev/null
+++ b/pipelines/nf-scatac/.github/workflows/linting.yml
@@ -0,0 +1,56 @@
+name: nf-core linting
+on:
+ push:
+ branches:
+ - dev
+ - main
+ pull_request:
+ branches:
+ - dev
+ - main
+
+# Cancel in-progress runs for the same branch/PR
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
+jobs:
+ nf-core-lint:
+ name: nf-core lint
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Install nf-core tools
+ run: pip install nf-core
+
+ - name: Run nf-core lint
+ run: nf-core lint --dir . --fail-warned
+ env:
+ GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+
+ nextflow-config:
+ name: Nextflow config check
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out pipeline code
+ uses: actions/checkout@v4
+
+ - name: Install Nextflow
+ uses: nf-core/setup-nextflow@v2
+ with:
+ version: "latest-everything"
+
+ - name: Check Nextflow config
+ run: nextflow config -show-profiles
diff --git a/pipelines/nf-scatac/.gitignore b/pipelines/nf-scatac/.gitignore
new file mode 100644
index 0000000..a54086b
--- /dev/null
+++ b/pipelines/nf-scatac/.gitignore
@@ -0,0 +1,7 @@
+.nextflow*
+work/
+results/
+.nf-test/
+testing/
+testing*
+*.pyc
diff --git a/pipelines/nf-scatac/.nf-core.yml b/pipelines/nf-scatac/.nf-core.yml
index 66c9850..195c6e5 100644
--- a/pipelines/nf-scatac/.nf-core.yml
+++ b/pipelines/nf-scatac/.nf-core.yml
@@ -2,6 +2,7 @@
# See: https://nf-co.re/docs/nf-core-tools/pipelines/lint
repository_type: pipeline
+nf_core_version: "3.0"
# nf-core template version this pipeline is based on
template:
diff --git a/pipelines/nf-scatac/.prettierignore b/pipelines/nf-scatac/.prettierignore
new file mode 100644
index 0000000..9cb26d6
--- /dev/null
+++ b/pipelines/nf-scatac/.prettierignore
@@ -0,0 +1,9 @@
+email_template.html
+*.config
+.nextflow*
+work/
+results/
+.nf-test/
+nextflow_schema.json
+*.nf
+assets/
diff --git a/pipelines/nf-scatac/.prettierrc.yml b/pipelines/nf-scatac/.prettierrc.yml
new file mode 100644
index 0000000..c81f9a7
--- /dev/null
+++ b/pipelines/nf-scatac/.prettierrc.yml
@@ -0,0 +1 @@
+printWidth: 120
diff --git a/pipelines/nf-scatac/CODE_OF_CONDUCT.md b/pipelines/nf-scatac/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..3d60097
--- /dev/null
+++ b/pipelines/nf-scatac/CODE_OF_CONDUCT.md
@@ -0,0 +1,36 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a welcoming experience for everyone.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment include:
+
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+
+Examples of unacceptable behavior include:
+
+- Trolling, insulting or derogatory comments, and personal attacks
+- Publishing others' private information without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Enforcement
+
+Instances of unacceptable behavior may be reported to the project team via
+[GitHub Issues](https://github.com/mcvickerlab/WASP2/issues).
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct/][v2.1].
+
+[homepage]: https://www.contributor-covenant.org
+[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct/
diff --git a/pipelines/nf-scatac/conf/awsbatch.config b/pipelines/nf-scatac/conf/awsbatch.config
new file mode 100644
index 0000000..64599f6
--- /dev/null
+++ b/pipelines/nf-scatac/conf/awsbatch.config
@@ -0,0 +1,64 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ AWS Batch executor configuration — nf-scatac
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to AWS Batch:
+
+ nextflow run main.nf -profile docker,awsbatch \
+ --awsbatch_queue \
+ -work-dir s3:///work [options]
+
+ Required runtime parameters:
+ --awsbatch_queue AWS Batch job queue name or ARN (no default — must be provided)
+
+ Optional runtime parameters:
+ --awsbatch_cli_path Path to AWS CLI inside the container (default: /home/ec2-user/miniconda/bin/aws)
+ --aws_region AWS region (default: us-east-1)
+ --max_cpus Max CPUs per process (default: 96)
+ --max_memory Max memory per process (default: 768.GB)
+ --max_time Max time per process (default: 168.h)
+
+ IMPORTANT: Use an S3 work directory (-work-dir s3://bucket/work) to ensure
+ intermediate files are accessible across Batch compute environments.
+
+ Example:
+ nextflow run main.nf \
+ -profile docker,awsbatch \
+ --awsbatch_queue my-batch-queue \
+ --outdir s3://my-bucket/results \
+ -work-dir s3://my-bucket/work
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'awsbatch'
+ queue = params.awsbatch_queue
+}
+
+params {
+ max_cpus = 96
+ max_memory = '768.GB'
+ max_time = '168.h'
+
+ // AWS Batch-specific (set at runtime, not hardcoded here)
+ awsbatch_queue = null // --awsbatch_queue my-queue (REQUIRED)
+ awsbatch_cli_path = '/home/ec2-user/miniconda/bin/aws'
+ aws_region = 'us-east-1'
+}
+
+aws {
+ region = params.aws_region ?: 'us-east-1'
+
+ batch {
+ cliPath = params.awsbatch_cli_path
+ }
+}
+
+// nf-core v3.x resource limits override for cloud
+process {
+ resourceLimits = [
+ cpus: 96,
+ memory: 768.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-scatac/conf/base.config b/pipelines/nf-scatac/conf/base.config
index 83e0d60..64419e3 100644
--- a/pipelines/nf-scatac/conf/base.config
+++ b/pipelines/nf-scatac/conf/base.config
@@ -1,5 +1,7 @@
/*
- nf-scatac base config - Resource configurations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ nf-scatac base config — Resource configurations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
process {
@@ -9,21 +11,51 @@ process {
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
+ maxErrors = '-1'
+ withLabel:process_single {
+ cpus = { check_max( 1, 'cpus' ) }
+ memory = { check_max( 4.GB * task.attempt, 'memory' ) }
+ time = { check_max( 2.h * task.attempt, 'time' ) }
+ }
withLabel:process_low {
cpus = { check_max( 2, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
+ time = { check_max( 2.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 6, 'cpus' ) }
memory = { check_max( 48.GB * task.attempt, 'memory' ) }
+ time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 12, 'cpus' ) }
memory = { check_max( 96.GB * task.attempt, 'memory' ) }
+ time = { check_max( 16.h * task.attempt, 'time' ) }
+ }
+ withLabel:process_long {
+ time = { check_max( 20.h * task.attempt, 'time' ) }
+ }
+ withLabel:process_high_memory {
+ memory = { check_max( 128.GB * task.attempt, 'memory' ) }
}
withLabel:process_wasp2 {
cpus = { check_max( 4, 'cpus' ) }
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
+ time = { check_max( 4.h * task.attempt, 'time' ) }
}
+ withLabel:error_ignore {
+ errorStrategy = 'ignore'
+ }
+ withLabel:error_retry {
+ errorStrategy = 'retry'
+ maxRetries = 2
+ }
+
+ // nf-core resource limits — replaces params.max_cpus/max_memory/max_time
+ resourceLimits = [
+ cpus: 16,
+ memory: 128.GB,
+ time: 240.h
+ ]
}
diff --git a/pipelines/nf-scatac/conf/lsf.config b/pipelines/nf-scatac/conf/lsf.config
new file mode 100644
index 0000000..3249a0d
--- /dev/null
+++ b/pipelines/nf-scatac/conf/lsf.config
@@ -0,0 +1,48 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ LSF executor configuration — nf-scatac
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to an IBM Spectrum LSF cluster:
+
+ nextflow run main.nf -profile singularity,lsf [options]
+
+ Optional runtime parameters:
+ --lsf_queue LSF queue name (default: 'normal')
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (Wellcome Sanger):
+ nextflow run main.nf \
+ -profile singularity,lsf \
+ --lsf_queue long \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'lsf'
+
+ // Optional: per-label queue routing
+ // withLabel:process_low { queue = params.lsf_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_high { queue = params.lsf_queue ?: 'normal' }
+ // withLabel:process_wasp2 { queue = params.lsf_queue ?: 'normal' }
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ lsf_queue = 'normal' // --lsf_queue long
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-scatac/conf/pbs.config b/pipelines/nf-scatac/conf/pbs.config
new file mode 100644
index 0000000..8f1820a
--- /dev/null
+++ b/pipelines/nf-scatac/conf/pbs.config
@@ -0,0 +1,53 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PBS Pro executor configuration — nf-scatac
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a PBS Pro cluster:
+
+ nextflow run main.nf -profile singularity,pbs [options]
+
+ Optional runtime parameters:
+ --pbs_account PBS account/project for job accounting (-A flag)
+ --pbs_queue Default queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example:
+ nextflow run main.nf \
+ -profile singularity,pbs \
+ --pbs_account mylab \
+ --pbs_queue workq \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'pbspro'
+ clusterOptions = {
+ def opts = []
+ if (params.pbs_account) opts << "-A ${params.pbs_account}"
+ if (params.pbs_queue) opts << "-q ${params.pbs_queue}"
+ opts.join(' ')
+ }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // PBS Pro-specific (set at runtime, not hardcoded here)
+ pbs_account = null // --pbs_account mylab
+ pbs_queue = null // --pbs_queue workq
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-scatac/conf/sge.config b/pipelines/nf-scatac/conf/sge.config
new file mode 100644
index 0000000..8ac7a51
--- /dev/null
+++ b/pipelines/nf-scatac/conf/sge.config
@@ -0,0 +1,56 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SGE/UGE executor configuration — nf-scatac
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a Sun Grid Engine (SGE) or Univa Grid Engine (UGE) cluster:
+
+ nextflow run main.nf -profile singularity,sge [options]
+
+ Optional runtime parameters:
+ --sge_queue SGE queue name (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Note: SGE requires per-slot memory reservation (-l h_vmem).
+ Nextflow calculates this automatically as memory / cpus when executor = 'sge'.
+
+ Note: The 'smp' parallel environment name may differ on your cluster.
+ Common alternatives: 'orte', 'mpi', 'parallel', 'threaded'.
+ Check with: qconf -spl
+
+ Example (UCLA Hoffman2):
+ nextflow run main.nf \
+ -profile singularity,sge \
+ --sge_queue highp \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'sge'
+ penv = 'smp' // Parallel environment for multi-slot jobs
+
+ // Optional: per-label queue selection
+ // withLabel:process_low { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_medium { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_high { queue = params.sge_queue ?: 'all.q' }
+ // withLabel:process_wasp2 { queue = params.sge_queue ?: 'all.q' }
+}
+
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h'
+
+ sge_queue = null // --sge_queue all.q
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-scatac/conf/slurm.config b/pipelines/nf-scatac/conf/slurm.config
new file mode 100644
index 0000000..9687e7f
--- /dev/null
+++ b/pipelines/nf-scatac/conf/slurm.config
@@ -0,0 +1,55 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SLURM executor configuration — nf-scatac
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Submit jobs to a SLURM cluster:
+
+ nextflow run main.nf -profile singularity,slurm [options]
+
+ Optional runtime parameters:
+ --slurm_account SLURM account/project for job accounting (-A flag)
+ --slurm_queue Default partition/queue (cluster default if unset)
+ --max_cpus Max CPUs per process (default: 64)
+ --max_memory Max memory per process (default: 256.GB)
+ --max_time Max time per process (default: 168.h)
+
+ Example (NIH Biowulf):
+ nextflow run main.nf \
+ -profile singularity,slurm \
+ --slurm_account mylab \
+ --slurm_queue norm \
+ --outdir results
+----------------------------------------------------------------------------------------
+*/
+
+process {
+ executor = 'slurm'
+ clusterOptions = { params.slurm_account ? "--account=${params.slurm_account}" : '' }
+
+ // Optional: per-label partition routing.
+ // Uncomment and customize partition names for your cluster.
+ // withLabel:process_low { queue = params.slurm_queue ?: 'short' }
+ // withLabel:process_medium { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_high { queue = params.slurm_queue ?: 'norm' }
+ // withLabel:process_wasp2 { queue = params.slurm_queue ?: 'norm' }
+}
+
+// Increase resource limits for HPC environments
+params {
+ max_cpus = 64
+ max_memory = '256.GB'
+ max_time = '168.h' // 7-day wall clock
+
+ // SLURM-specific (set at runtime, not hardcoded here)
+ slurm_account = null // --slurm_account mylab
+ slurm_queue = null // --slurm_queue norm
+}
+
+// nf-core v3.x resource limits override for HPC
+process {
+ resourceLimits = [
+ cpus: 64,
+ memory: 256.GB,
+ time: 168.h
+ ]
+}
diff --git a/pipelines/nf-scatac/modules.json b/pipelines/nf-scatac/modules.json
new file mode 100644
index 0000000..54cdd84
--- /dev/null
+++ b/pipelines/nf-scatac/modules.json
@@ -0,0 +1,5 @@
+{
+ "name": "wasp2/nf-scatac",
+ "homePage": "",
+ "repos": {}
+}
diff --git a/pipelines/nf-scatac/nextflow.config b/pipelines/nf-scatac/nextflow.config
index 4e2f9ea..679d953 100644
--- a/pipelines/nf-scatac/nextflow.config
+++ b/pipelines/nf-scatac/nextflow.config
@@ -41,6 +41,10 @@ params {
max_memory = '128.GB'
max_time = '240.h'
+ // Institutional config support (nf-core compatible)
+ custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
+ custom_config_version = 'master'
+
// Pipeline options
help = false
version = false
@@ -51,6 +55,13 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
+// Load nf-core institutional configs
+try {
+ includeConfig "${params.custom_config_base}/nfcore_custom.config"
+} catch (Exception e) {
+ System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+}
+
// Container version - override all WASP2/SCATAC processes to use 1.4.0
def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0'
@@ -61,16 +72,30 @@ process {
}
profiles {
+ debug {
+ dumpHashes = true
+ process.beforeScript = 'echo $HOSTNAME'
+ cleanup = false
+ }
docker {
docker.enabled = true
+ conda.enabled = false
+ singularity.enabled = false
docker.runOptions = '-u $(id -u):$(id -g)'
}
singularity {
singularity.enabled = true
singularity.autoMounts = true
+ singularity.cacheDir = System.getenv('NXF_SINGULARITY_CACHEDIR') ?: "${HOME}/.singularity/cache"
+ singularity.pullTimeout = '60 min'
+ singularity.envWhitelist = 'NTHREADS,OMP_NUM_THREADS'
+ conda.enabled = false
+ docker.enabled = false
}
conda {
conda.enabled = true
+ docker.enabled = false
+ singularity.enabled = false
process.conda = "${projectDir}/../../environment.yml"
}
test {
@@ -88,12 +113,37 @@ profiles {
test_local {
includeConfig 'conf/test_local.config'
}
+
+ // HPC executor profiles — combine with container profile:
+ // nextflow run main.nf -profile singularity,slurm [options]
+ slurm {
+ includeConfig 'conf/slurm.config'
+ }
+ sge {
+ includeConfig 'conf/sge.config'
+ }
+ lsf {
+ includeConfig 'conf/lsf.config'
+ }
+ pbs {
+ includeConfig 'conf/pbs.config'
+ }
+
+ // Cloud executor profiles — combine with container profile:
+ // nextflow run main.nf -profile docker,awsbatch --awsbatch_queue -work-dir s3://bucket/work
+ awsbatch {
+ includeConfig 'conf/awsbatch.config'
+ }
}
def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline { enabled = true; file = "${params.tracedir}/timeline_${trace_timestamp}.html" }
report { enabled = true; file = "${params.tracedir}/report_${trace_timestamp}.html" }
trace { enabled = true; file = "${params.tracedir}/trace_${trace_timestamp}.txt" }
+dag {
+ enabled = true
+ file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
+}
process.shell = ['/bin/bash', '-euo', 'pipefail']
diff --git a/pipelines/nf-scatac/nf-test.config b/pipelines/nf-scatac/nf-test.config
index 6453252..7f3b1e6 100644
--- a/pipelines/nf-scatac/nf-test.config
+++ b/pipelines/nf-scatac/nf-test.config
@@ -1,11 +1,7 @@
-/*
- * nf-test configuration for nf-scatac pipeline
- * Issue: #48
- */
-
config {
- testsDir "tests"
- workDir ".nf-test"
- configFile "nextflow.config"
+ // nf-core compliant nf-test configuration
+ testsDir "."
+ workDir System.getenv("NFT_WORKDIR") ?: ".nf-test"
+ configFile "tests/nextflow.config"
profile "test_stub,docker"
}
diff --git a/pipelines/nf-scatac/tests/nextflow.config b/pipelines/nf-scatac/tests/nextflow.config
new file mode 100644
index 0000000..979614b
--- /dev/null
+++ b/pipelines/nf-scatac/tests/nextflow.config
@@ -0,0 +1,9 @@
+/*
+ * Test configuration for nf-test
+ * Loaded via nf-test.config configFile directive
+ */
+params {
+ max_cpus = 2
+ max_memory = '6.GB'
+ max_time = '6.h'
+}
diff --git a/src/wasp2/__init__.py b/src/wasp2/__init__.py
index 57e3443..3b4ea3b 100644
--- a/src/wasp2/__init__.py
+++ b/src/wasp2/__init__.py
@@ -4,4 +4,4 @@
A Python package for allele-specific analysis of sequencing data.
"""
-__version__ = "1.3.3"
+__version__ = "1.4.0"
From b1aac9392ae01e173c32abbe04086961d6830931 Mon Sep 17 00:00:00 2001
From: Jeff Jaureguy <67065808+Jaureguy760@users.noreply.github.com>
Date: Sun, 15 Mar 2026 00:38:36 -0700
Subject: [PATCH 2/2] fix: revert GTF from count-variants-sc, harden
Dockerfile, fix docs
- Revert GTF/GFF3 support from count-variants-sc (sc commands are
scATAC-only; gene annotation is a downstream ArchR/Signac step)
- Bulk count-variants retains full GTF support
- Dockerfile: tini PID 1, g++ purge assertion, wasp2-ipscore check
- Smoke test: add wasp2-ipscore, fix sample name case
- Docs: rewrite counting/mapping/analysis/installation pages
- Remove stale --min-count footnote from analysis.rst
Co-Authored-By: Claude Opus 4.6 (1M context)
---
.github/workflows/ci.yml | 4 +-
.gitignore | 29 +-
Dockerfile | 10 +-
Makefile | 6 +-
README.md | 110 ++--
docs/CONTAINER_USAGE.md | 267 ++--------
docs/source/index.rst | 1 +
docs/source/installation.rst | 85 ++-
docs/source/methods/mapping_filter.rst | 7 +-
docs/source/tutorials/quickstart_mapping.rst | 12 +-
docs/source/tutorials/scrna_seq.rst | 4 +-
docs/source/user_guide/analysis.rst | 526 ++-----------------
docs/source/user_guide/counting.rst | 222 +++-----
docs/source/user_guide/ipscore.rst | 53 ++
docs/source/user_guide/mapping.rst | 257 ++++-----
pixi.toml | 1 +
pyproject.toml | 2 +-
rust/Cargo.lock | 1 +
rust/Cargo.toml | 3 +
rust/build.rs | 4 +
scripts/container_smoke_test.sh | 11 +-
src/counting/__main__.py | 9 +-
src/counting/count_alleles_sc.py | 59 ++-
src/counting/parse_gene_data.py | 43 +-
src/counting/run_counting_sc.py | 50 +-
src/mapping/intersect_variant_data.py | 11 +-
src/mapping/remap_utils.py | 12 +-
27 files changed, 582 insertions(+), 1217 deletions(-)
create mode 100644 docs/source/user_guide/ipscore.rst
create mode 100644 rust/build.rs
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3518670..2e37711 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -146,8 +146,8 @@ jobs:
- name: Run cargo test
working-directory: rust
- run: cargo test
+ run: PYO3_PYTHON=$(command -v python3) cargo test
- name: Run clippy
working-directory: rust
- run: cargo clippy -- -W warnings
+ run: PYO3_PYTHON=$(command -v python3) cargo clippy -- -W warnings
diff --git a/.gitignore b/.gitignore
index 058d27f..9418dda 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,11 @@ benchmark_figures/
# Sanity test data (downloaded from GitHub releases)
tests/sanity/data/
+# Real data test files (downloaded from 1000 Genomes, ~2-3 GB)
+tests/real_data/data/
+tests/real_data/samplesheets/
+tests/real_data/configs/
+
# Nextflow runtime
.nextflow/
.nextflow.log*
@@ -175,7 +180,29 @@ test-output/
results_stub/
pipelines/*/test-output/
pipelines/*/results_stub/
+pipelines/*/results_*/
+pipelines/*/artifacts/
+
+# Artifacts directory
+artifacts/
+
+# Benchmark infrastructure (large data/envs/results)
+test_benchmarks/
+
+# Claude Code local state
+.claude/
+
+# Nextflow pipeline-level logs
+pipelines/*/.nextflow.log*
+pipelines/*/.nf-test.log
+
+# Nextflow reports and visualizations
+trace.txt
+timeline.html
+report.html
+dag.svg
+dag.dot
# Claude Code memory files (per-directory)
**/CLAUDE.md
-!./CLAUDE.md
+!/CLAUDE.md
diff --git a/Dockerfile b/Dockerfile
index 0ade5c9..d3a4821 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -82,6 +82,8 @@ LABEL maintainer="Jeff Jaureguy "
# Install runtime deps + temporary build deps for pybedtools C++ extension
# Combined into one RUN to minimize layers; build tools purged at the end
RUN apt-get update && apt-get install -y --no-install-recommends \
+ # PID 1 init for proper signal handling (Nextflow/HPC)
+ tini \
# Bioinformatics tools
samtools \
bcftools \
@@ -106,14 +108,16 @@ RUN --mount=type=cache,target=/root/.cache/pip \
pip install /tmp/*.whl \
&& rm -rf /tmp/*.whl \
&& apt-get purge -y --auto-remove g++ zlib1g-dev \
- && rm -rf /var/lib/apt/lists/*
+ && rm -rf /var/lib/apt/lists/* \
+ && ! command -v g++
WORKDIR /app
# Verify non-Python tools are available (Python tools skipped during build
# because Polars uses AVX2 instructions that fail under QEMU emulation
# on ARM64 CI runners building linux/amd64 images)
-RUN samtools --version && bcftools --version && bedtools --version
+RUN samtools --version && bcftools --version && bedtools --version \
+ && wasp2-ipscore --help > /dev/null 2>&1
# Create non-root user for security
RUN groupadd -g 1000 wasp2 && \
@@ -147,5 +151,7 @@ WORKDIR /data
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD wasp2-count --version || exit 1
+ENTRYPOINT ["tini", "--"]
+
# Default command
CMD ["wasp2-count", "--help"]
diff --git a/Makefile b/Makefile
index d99addc..8e67b2a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,7 @@
.PHONY: all build install test test-quick test-sanity lint format clean help
.PHONY: download-sanity-data sanity-data-local rust-build rust-test
+.PHONY: test-mapping-parity
# Configuration
PYTHON ?= python
@@ -48,7 +49,7 @@ rust-dev: ## Build Rust extension in debug mode (faster compile)
$(MATURIN) develop -m $(RUST_DIR)/Cargo.toml
rust-test: ## Run Rust unit tests
- cd $(RUST_DIR) && $(CARGO) test
+ cd $(RUST_DIR) && PYO3_PYTHON=$$($(PYTHON) -c "import sys; print(sys.executable)") $(CARGO) test
rust-bench: ## Run Rust benchmarks
cd $(RUST_DIR) && $(CARGO) bench
@@ -68,6 +69,9 @@ test-quick: ## Run quick validation tests only
test-rust: ## Run Rust-specific tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "rust"
+test-mapping-parity: ## Run mapping parity tests against legacy and unified paths
+ $(PYTEST) $(TESTS_DIR)/regression/test_mapping_stage_parity.py -v --tb=short
+
test-integration: ## Run integration tests
$(PYTEST) $(TESTS_DIR) -v --tb=short -m "integration"
diff --git a/README.md b/README.md
index 29b7667..d89e29a 100644
--- a/README.md
+++ b/README.md
@@ -15,114 +15,100 @@
-
-
-
-
-
-
- Documentation •
- McVicker Lab •
- Original WASP
----
-
## Installation
-### Recommended: Bioconda
+### Bioconda
```bash
mamba install -c conda-forge -c bioconda wasp2
```
-Installs WASP2 and all dependencies (samtools, bcftools, bedtools, htslib) automatically. Available for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon). Requires [miniforge](https://github.com/conda-forge/miniforge).
-
-### Via PyPI
+### PyPI
```bash
pip install wasp2
```
-Pre-built wheels for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon) with Python 3.10-3.13. The Rust extension and htslib are bundled in the wheel. Requires samtools, bcftools, and bedtools installed separately.
+The PyPI package does not install external tools such as `samtools`,
+`bcftools`, or `bedtools`; install those separately.
-### For development
-
-```bash
-git clone https://github.com/mcvickerlab/WASP2.git
-cd WASP2
-pixi install # resolves all dependencies including Rust toolchain
-pixi run verify # build + test
-```
-
-### Via Docker
+### Docker
```bash
docker pull ghcr.io/mcvickerlab/wasp2:1.4.0
-docker run --rm -v $PWD:/data ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
```
-Multi-platform image (linux/amd64 + linux/arm64) with all dependencies included.
-
-### Via Singularity/Apptainer (HPC)
+### Singularity/Apptainer
```bash
singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
singularity exec wasp2.sif wasp2-count --help
```
-### Reproducible Environment (conda-lock)
+## CLI Tools
-For fully pinned, reproducible installs (HPC clusters, CI, shared lab environments):
+WASP2 installs four command-line entry points:
-```bash
-# Recommended: mamba (fastest)
-mamba create -n WASP2 --file conda-lock.yml
+- `wasp2-map`
+- `wasp2-count`
+- `wasp2-analyze`
+- `wasp2-ipscore`
-# Or with conda
-conda-lock install -n WASP2 conda-lock.yml
-```
+## Quick Start
-`conda-lock.yml` pins every package to exact versions with checksums for `linux-64` and `osx-64`. To regenerate after updating `environment.lock.yml`:
+### 1. Correct mapping bias
```bash
-conda-lock lock -f environment.lock.yml --lockfile conda-lock.yml
-```
-
-See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed install options and development setup.
+wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir
-## Quick Start
+# Realign remap_dir/*_swapped_alleles_r1.fq and r2.fq with the same aligner
+# and settings used for the original BAM, then:
-WASP2 has three steps that run in order:
+wasp2-map filter-remapped remapped.bam \
+ -j remap_dir/input_wasp_data_files.json \
+ -o filtered.bam
+```
-**Step 1: Remap reads** to correct mapping bias
+### 2. Count alleles
```bash
-wasp2-map make-reads input.bam variants.vcf.gz -s sample1 -o remap_dir/
-# Realign the swapped-allele reads with your aligner, then:
-wasp2-map filter-remapped remapped.bam -j remap_dir/sample1_wasp_data_files.json -o filtered.bam
+wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1 -o counts.tsv
```
-**Step 2: Count alleles** at heterozygous SNPs
+### 3. Test for imbalance
```bash
-wasp2-count count-variants filtered.bam variants.vcf.gz -s sample1
+wasp2-analyze find-imbalance counts.tsv -o ai_results.tsv
```
-**Step 3: Test for allelic imbalance**
+## Single-Cell Example
```bash
-wasp2-analyze find-imbalance counts.tsv -o results.tsv
+wasp2-count count-variants-sc \
+ cellranger.bam \
+ variants.vcf.gz \
+ barcodes.tsv \
+ --samples sample1 \
+ --feature genes.gtf \
+ --out_file allele_counts.h5ad
+
+wasp2-analyze find-imbalance-sc \
+ allele_counts.h5ad \
+ barcode_groups.tsv \
+ --sample sample1 \
+ -o ai_results.tsv
```
-See the [documentation](https://mcvickerlab.github.io/WASP2/) for detailed usage, single-cell workflows, and supported variant formats (VCF, BCF, PGEN).
-
-## Authors
-
-- **Aaron Ho** — Creator of WASP2
-- **Jeff Jaureguy** — Developer and maintainer
-- **[McVicker Lab](https://mcvicker.salk.edu/)**, Salk Institute
+## iPSCORE Utilities
-## Citation
+```bash
+wasp2-ipscore inventory --output inventory.tsv
+wasp2-ipscore manifest --output manifest.csv
+wasp2-ipscore validate
+```
-If you use WASP2 in your research, please cite our paper (coming soon).
+See the [documentation](https://mcvickerlab.github.io/WASP2/) for complete
+usage, tutorials, and API details.
diff --git a/docs/CONTAINER_USAGE.md b/docs/CONTAINER_USAGE.md
index 8dd77e7..6bda535 100644
--- a/docs/CONTAINER_USAGE.md
+++ b/docs/CONTAINER_USAGE.md
@@ -1,262 +1,81 @@
-# WASP2 Container Usage Guide
+# WASP2 Container Usage
-This guide covers how to use WASP2 containers for local development, HPC clusters, and Nextflow pipelines.
+## Verified Docker Flow
-## Container Registries
-
-WASP2 images are available from:
-
-| Registry | Image | Pull Command |
-|----------|-------|--------------|
-| **DockerHub** | `mcvickerlab/wasp2` | `docker pull mcvickerlab/wasp2:latest` |
-| **GitHub Container Registry** | `ghcr.io/mcvickerlab/wasp2` | `docker pull ghcr.io/mcvickerlab/wasp2:latest` |
-
-### Available Tags
-
-- `:latest` - Most recent release
-- `:1.3.0` - Specific version
-- `:1.3` - Minor version (tracks patches)
-- `:main` - Development builds from main branch
-
-## Docker Usage
-
-### Pull and Run
-
-```bash
-# Pull the image
-docker pull mcvickerlab/wasp2:latest
-
-# Run WASP2 commands
-docker run --rm mcvickerlab/wasp2 wasp2-count --help
-docker run --rm mcvickerlab/wasp2 wasp2-map --help
-docker run --rm mcvickerlab/wasp2 wasp2-analyze --help
-
-# Process files (mount local directory)
-docker run --rm -v $(pwd):/data mcvickerlab/wasp2 \
- wasp2-count /data/sample.bam /data/variants.vcf.gz -o /data/counts.tsv
-```
-
-### Interactive Shell
+The Docker image validated for this update is:
```bash
-docker run -it --rm -v $(pwd):/data mcvickerlab/wasp2 /bin/bash
+ghcr.io/mcvickerlab/wasp2:1.4.0
```
-## Singularity/Apptainer Usage (HPC)
-
-### Pull from Docker Registry
+Pull and inspect the available CLI tools:
```bash
-# Pull and convert to SIF
-singularity pull wasp2.sif docker://mcvickerlab/wasp2:latest
+docker pull ghcr.io/mcvickerlab/wasp2:1.4.0
-# Or from GHCR
-singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:latest
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-map --help
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-analyze --help
+docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-ipscore --help
```
-### Build from Definition File
+Mount local data when running workflows:
```bash
-# Clone the repository
-git clone https://github.com/mcvickerlab/WASP2.git
-cd WASP2
-
-# Build the container
-singularity build wasp2.sif Singularity.def
+docker run --rm -v "$PWD":/data ghcr.io/mcvickerlab/wasp2:1.4.0 \
+ wasp2-count count-variants /data/sample.bam /data/variants.vcf.gz -o /data/counts.tsv
```
-### Run Commands
+## Intended Singularity / Apptainer Flow
-```bash
-# Run WASP2 commands
-singularity exec wasp2.sif wasp2-count --help
-
-# Process files (current directory is auto-bound)
-singularity exec wasp2.sif wasp2-count sample.bam variants.vcf.gz -o counts.tsv
-
-# With explicit bindings
-singularity exec --bind /scratch:/scratch wasp2.sif \
- wasp2-map make-reads /scratch/input.bam /scratch/variants.vcf
-```
-
-### SLURM Job Script Example
+For HPC environments using SIF images:
```bash
-#!/bin/bash
-#SBATCH --job-name=wasp2
-#SBATCH --cpus-per-task=8
-#SBATCH --mem=32G
-#SBATCH --time=4:00:00
-
-module load singularity
-
-CONTAINER=/path/to/wasp2.sif
-
-# Count variants (wasp2-count does not have --threads option)
-singularity exec ${CONTAINER} wasp2-count \
- input.bam \
- variants.vcf.gz \
- -o counts.tsv
-
-# WASP mapping filter (supports --threads)
-singularity exec ${CONTAINER} wasp2-map make-reads \
- input.bam \
- variants.vcf.gz \
- --threads ${SLURM_CPUS_PER_TASK} \
- --out_dir ./wasp_output
-```
-
-## Nextflow Integration
-
-### Configuration
-
-Add to your `nextflow.config`:
-
-```groovy
-profiles {
- docker {
- docker.enabled = true
- docker.runOptions = '-u $(id -u):$(id -g)'
- }
- singularity {
- singularity.enabled = true
- singularity.autoMounts = true
- singularity.cacheDir = "${HOME}/.singularity/cache"
- }
-}
-
-process {
- withLabel: 'wasp2' {
- container = 'mcvickerlab/wasp2:latest'
- }
-}
-```
-
-### Running Pipelines
-
-```bash
-# With Docker
-nextflow run main.nf -profile docker
-
-# With Singularity
-nextflow run main.nf -profile singularity
-```
-
-## Building Locally
-
-### Docker Build
-
-```bash
-# Clone repository
-git clone https://github.com/mcvickerlab/WASP2.git
-cd WASP2
-
-# Build image
-docker build -t wasp2:local .
-
-# Test the build
-docker run --rm wasp2:local wasp2-count --version
-docker run --rm wasp2:local python -c "import wasp2_rust; print('OK')"
+singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
+singularity exec wasp2.sif wasp2-count --help
```
-### Manual Build (for maintainers)
-
-Note: Currently only `linux/amd64` is supported.
+or:
```bash
-# Set up buildx
-docker buildx create --name wasp2builder --use
-
-# Build with version argument
-docker buildx build \
- --platform linux/amd64 \
- --build-arg VERSION=1.3.0 \
- -t mcvickerlab/wasp2:1.3.0 \
- -t mcvickerlab/wasp2:latest \
- --push .
+apptainer pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
+apptainer exec wasp2.sif wasp2-count --help
```
-## Container Contents
-
-The WASP2 container includes:
+These commands are the intended container path, but they were not executed in
+this development environment because neither `singularity` nor `apptainer` was
+installed locally.
-### Python Environment
-- Python 3.10+ (container uses 3.11)
-- wasp2 package with Rust extension
-- Core: pysam, pandas (<2.0), numpy, scipy, polars
-- CLI: typer, rich
-- Single-cell: anndata, scanpy (optional)
-
-### Rust Components
-- Pre-built `wasp2_rust` Python extension
-- Compiled with release optimizations
-
-### CLI Tools
-
-Each tool has subcommands for different analysis modes:
-
-- **`wasp2-count`** - Allele counting
- - `count-variants` - Bulk allele counting at heterozygous sites (default)
- - `count-variants-sc` - Single-cell allele counting
-
-- **`wasp2-map`** - WASP mapping filter
- - `make-reads` - Generate reads with swapped alleles for remapping
- - `filter-remapped` - Filter remapped reads using WASP algorithm
-
-- **`wasp2-analyze`** - Statistical analysis
- - `find-imbalance` - Calculate allelic imbalance
- - `find-imbalance-sc` - Single-cell allelic imbalance analysis
- - `compare-imbalance` - Compare imbalance between cell types/groups
-
-### Bioinformatics Tools
-- samtools
-- bcftools
-- bedtools
-- tabix
-
-## Troubleshooting
-
-### Permission Issues (Docker)
+## Mapping Example
```bash
-# Run as current user
-docker run --rm -u $(id -u):$(id -g) -v $(pwd):/data mcvickerlab/wasp2 ...
+docker run --rm -v "$PWD":/data ghcr.io/mcvickerlab/wasp2:1.4.0 \
+ wasp2-map make-reads /data/sample.bam /data/variants.vcf.gz \
+ --samples sample1 \
+ --out_dir /data/remap_dir
```
-### Cache Issues (Singularity)
+After realigning the swapped FASTQ reads with your aligner of choice:
```bash
-# Clear Singularity cache
-singularity cache clean
-
-# Use different cache directory
-export SINGULARITY_CACHEDIR=/scratch/singularity_cache
+docker run --rm -v "$PWD":/data ghcr.io/mcvickerlab/wasp2:1.4.0 \
+ wasp2-map filter-remapped /data/remapped.bam \
+ --wasp_data_json /data/remap_dir/sample_wasp_data_files.json \
+ --out_bam /data/filtered.bam
```
-### Verify Installation
+## Counting Example
```bash
-# Docker
-docker run --rm mcvickerlab/wasp2 wasp2-count --version
-docker run --rm mcvickerlab/wasp2 python -c "import wasp2_rust; print('Rust extension OK')"
-
-# Singularity
-singularity exec wasp2.sif wasp2-count --version
-singularity exec wasp2.sif python -c "import wasp2_rust; print('Rust extension OK')"
+docker run --rm -v "$PWD":/data ghcr.io/mcvickerlab/wasp2:1.4.0 \
+ wasp2-count count-variants /data/filtered.bam /data/variants.vcf.gz \
+ --samples sample1 \
+ --region /data/genes.gtf \
+ --out_file /data/counts.tsv
```
-## GitHub Actions Secrets Setup
-
-To enable automated container builds, repository maintainers must configure:
-
-1. **DockerHub Secrets** (Settings → Secrets and variables → Actions):
- - `DOCKERHUB_USERNAME`: Your DockerHub username
- - `DOCKERHUB_TOKEN`: DockerHub access token (Account Settings → Security → Access Tokens)
-
-2. **GitHub Container Registry**: Uses `GITHUB_TOKEN` automatically (no setup needed)
-
-## Related Documentation
+## Notes
-- [Nextflow Pipelines](../pipelines/nf-atacseq/README.md)
-- [WASP2 Ecosystem](WASP2_ECOSYSTEM.md)
-- [GitHub Repository](https://github.com/mcvickerlab/WASP2)
+- The image contains the WASP2 package plus `samtools`, `bcftools`, and `bedtools`.
+- The documented public mapping workflow is `make-reads -> realign -> filter-remapped`.
+- `wasp2-ipscore` is present in the container alongside the main analysis tools.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 1a5fc6a..ab712bd 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -60,6 +60,7 @@ Documentation
user_guide/mapping
user_guide/analysis
user_guide/single_cell
+ user_guide/ipscore
.. toctree::
:maxdepth: 2
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index f039a97..51e9c15 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -1,25 +1,16 @@
Installation
============
-Via Bioconda (Recommended)
---------------------------
-
-`Bioconda `_ installs WASP2 and **all** dependencies
-(samtools, bcftools, bedtools, htslib) in one command. Requires
-`miniforge `_.
+Via Bioconda
+------------
.. code-block:: bash
mamba install -c conda-forge -c bioconda wasp2
-Or with conda:
-
-.. code-block:: bash
-
- conda install -c conda-forge -c bioconda wasp2
-
-Available for Linux (x86_64, aarch64) and macOS (Intel, Apple Silicon) with
-Python 3.10-3.13.
+Bioconda installs WASP2 together with the external command-line dependencies
+required by the workflows, including ``samtools``, ``bcftools``, and
+``bedtools``.
Via PyPI
--------
@@ -28,73 +19,58 @@ Via PyPI
pip install wasp2
-Pre-built wheels include the Rust extension and bundled htslib for Linux
-(x86_64, aarch64) and macOS (Intel, Apple Silicon) with Python 3.10-3.13.
-
-.. note::
-
- The PyPI package does not include samtools, bcftools, or bedtools.
- Install them separately before running WASP2:
-
- * On Ubuntu/Debian: ``sudo apt-get install bcftools bedtools samtools``
- * On macOS: ``brew install bcftools bedtools samtools``
- * Via conda: ``mamba install -c bioconda samtools bcftools bedtools``
+The PyPI wheel includes the WASP2 Python package and Rust extension, but it
+does not install external tools such as ``samtools``, ``bcftools``, or
+``bedtools``. Install those separately before running mapping or counting.
Via Docker
----------
-WASP2 is available as a multi-platform Docker image (linux/amd64 + linux/arm64)
-with all dependencies pre-installed:
+The Docker image is the most reproducible fully bundled option available in
+this environment.
.. code-block:: bash
docker pull ghcr.io/mcvickerlab/wasp2:1.4.0
- # Run a command
- docker run --rm -v /path/to/data:/data ghcr.io/mcvickerlab/wasp2:1.4.0 \
- wasp2-count count-variants /data/sample.bam /data/variants.vcf
-
- # Interactive shell
- docker run -it --rm -v /path/to/data:/data ghcr.io/mcvickerlab/wasp2:1.4.0 bash
-
-The image includes samtools, bcftools, bedtools, and the Rust-accelerated backend.
+ docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-count --help
+ docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-map --help
+ docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-analyze --help
+ docker run --rm ghcr.io/mcvickerlab/wasp2:1.4.0 wasp2-ipscore --help
Via Singularity/Apptainer
-------------------------
-For HPC environments that don't support Docker:
+For HPC environments that require SIF images:
.. code-block:: bash
singularity pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
singularity exec wasp2.sif wasp2-count --help
-Development Installation
-------------------------
-
-For contributing or building from source:
+or with Apptainer:
.. code-block:: bash
- git clone https://github.com/mcvickerlab/WASP2.git
- cd WASP2
- pixi install # resolves all deps including Rust toolchain
- pixi run verify # build + test suite in one step
+ apptainer pull wasp2.sif docker://ghcr.io/mcvickerlab/wasp2:1.4.0
+ apptainer exec wasp2.sif wasp2-count --help
-`pixi `_ resolves Python, Rust toolchain, samtools, bcftools,
-bedtools, and htslib automatically. No system packages required.
+.. note::
-Compiling pgenlib
-~~~~~~~~~~~~~~~~~
+ Docker was validated in this development environment. ``apptainer`` /
+ ``singularity`` binaries were not available locally during this doc update,
+ so those examples reflect the intended pull/exec workflow but were not
+ executed here.
-WASP2 optionally uses `pgenlib `_
-for PLINK2 file I/O. This requires a C compiler:
+Development Installation
+------------------------
-* On Ubuntu/Debian: ``sudo apt-get install build-essential python3-dev``
-* On macOS: ``xcode-select --install``
-* On RHEL/Fedora: ``sudo dnf install gcc gcc-c++ python3-devel``
+.. code-block:: bash
-pgenlib is installed automatically via pip when you install WASP2.
+ git clone https://github.com/mcvickerlab/WASP2.git
+ cd WASP2
+ pixi install
+ pixi run verify
Verification
------------
@@ -104,3 +80,4 @@ Verification
wasp2-count --help
wasp2-map --help
wasp2-analyze --help
+ wasp2-ipscore --help
diff --git a/docs/source/methods/mapping_filter.rst b/docs/source/methods/mapping_filter.rst
index c3257f2..37b24d7 100644
--- a/docs/source/methods/mapping_filter.rst
+++ b/docs/source/methods/mapping_filter.rst
@@ -132,7 +132,10 @@ The WASP filter compares original and remapped positions:
.. code-block:: bash
- wasp2-map filter-remapped original_to_remap.bam remapped.bam output.bam
+ wasp2-map filter-remapped \
+ remapped.bam \
+ --wasp_data_json sample_wasp_data_files.json \
+ --out_bam output.bam
A read passes if:
@@ -272,7 +275,7 @@ The typical WASP2 workflow:
# Step 2: Create swapped reads
wasp2-map make-reads sample.bam variants.vcf \
- --samples SAMPLE1 --out-dir wasp_temp/
+ --samples SAMPLE1 --out_dir wasp_temp
# Step 3: Remap swapped reads (SAME ALIGNER!)
bwa mem -M genome.fa wasp_temp/swapped_r1.fq wasp_temp/swapped_r2.fq | \
diff --git a/docs/source/tutorials/quickstart_mapping.rst b/docs/source/tutorials/quickstart_mapping.rst
index 57baffb..41317a0 100644
--- a/docs/source/tutorials/quickstart_mapping.rst
+++ b/docs/source/tutorials/quickstart_mapping.rst
@@ -68,7 +68,7 @@ Identify reads overlapping heterozygous SNPs and generate allele-swapped version
wasp2-map make-reads sample.bam variants.vcf.gz \
--samples SAMPLE1 \
- --out-dir wasp_output/
+ --out_dir wasp_output
This produces (where ``sample`` is your BAM file prefix):
@@ -103,9 +103,9 @@ The WASP filter compares original and remapped positions:
.. code-block:: bash
wasp2-map filter-remapped \
- wasp_output/sample_to_remap.bam \
wasp_output/sample_remapped.bam \
- wasp_output/sample_wasp_filtered.bam
+ --wasp_data_json wasp_output/sample_wasp_data_files.json \
+ --out_bam wasp_output/sample_wasp_filtered.bam
Understanding Filter Statistics
-------------------------------
@@ -172,7 +172,7 @@ Complete Workflow Script
echo "Step 1: Creating swapped reads..."
wasp2-map make-reads $BAM $VCF \
--samples $SAMPLE \
- --out-dir $OUTDIR/
+ --out_dir $OUTDIR
# Step 2: Remap with same aligner
echo "Step 2: Remapping swapped reads..."
@@ -185,9 +185,9 @@ Complete Workflow Script
# Step 3: Filter biased reads
echo "Step 3: Filtering biased reads..."
wasp2-map filter-remapped \
- $OUTDIR/${PREFIX}_to_remap.bam \
$OUTDIR/${PREFIX}_remapped.bam \
- $OUTDIR/${PREFIX}_wasp_filtered.bam
+ --wasp_data_json $OUTDIR/${PREFIX}_wasp_data_files.json \
+ --out_bam $OUTDIR/${PREFIX}_wasp_filtered.bam
# Step 4: Merge with non-overlapping reads
echo "Step 4: Merging final BAM..."
diff --git a/docs/source/tutorials/scrna_seq.rst b/docs/source/tutorials/scrna_seq.rst
index dcdb325..1ee8d54 100644
--- a/docs/source/tutorials/scrna_seq.rst
+++ b/docs/source/tutorials/scrna_seq.rst
@@ -162,7 +162,7 @@ Run the single-cell allele counting:
cellranger_output/outs/possorted_genome_bam.bam \
phased_variants.vcf.gz \
barcodes_celltype.tsv \
- --region genes.gtf \
+ --feature genes.gtf \
--samples SAMPLE_ID \
--out_file allele_counts.h5ad
@@ -293,7 +293,7 @@ Low Read Counts
Single-cell data is sparse. Consider:
* Using pseudo-bulk aggregation by cell type
-* Lowering ``--min-count`` threshold
+* Lowering ``--min`` / ``--min_count`` threshold
* Focusing on highly expressed genes
Memory Issues
diff --git a/docs/source/user_guide/analysis.rst b/docs/source/user_guide/analysis.rst
index f486d11..23b015e 100644
--- a/docs/source/user_guide/analysis.rst
+++ b/docs/source/user_guide/analysis.rst
@@ -4,520 +4,92 @@ Analysis Module
Overview
--------
-The analysis module detects statistically significant allelic imbalance using beta-binomial models.
+``wasp2-analyze`` runs allelic imbalance analysis on bulk or single-cell count
+outputs.
-Purpose
--------
-
-* Detect allelic imbalance at genomic regions
-* Control for biological and technical variation
-* Support single-cell and bulk RNA-seq
-* Compare imbalance between groups/conditions
-
-Statistical Models
-------------------
-
-Beta-Binomial Model
-~~~~~~~~~~~~~~~~~~~
-
-WASP2 uses beta-binomial distribution to model:
-* Overdispersion (variation beyond binomial)
-* Biological variability between regions
-* Technical noise in sequencing
-
-The model:
-* Null hypothesis: Equal expression from both alleles (p=0.5)
-* Alternative: Allelic imbalance (p ≠ 0.5)
-* FDR correction for multiple testing
+Commands:
-Dispersion Parameter
-~~~~~~~~~~~~~~~~~~~~
-
-Two models:
-1. **Single**: One dispersion parameter for all regions
-2. **Linear**: Dispersion varies with read depth
-
-CLI Usage
----------
-
-Basic Analysis
-~~~~~~~~~~~~~~
-
-.. code-block:: bash
+* ``find-imbalance``: bulk allelic imbalance from TSV counts
+* ``find-imbalance-sc``: per-group single-cell imbalance from ``.h5ad`` counts
+* ``compare-imbalance``: differential imbalance between single-cell groups
- wasp2-analyze find-imbalance counts.tsv
-
-Options
-~~~~~~~
+Bulk Analysis
+-------------
.. code-block:: bash
wasp2-analyze find-imbalance \
counts.tsv \
- --min-count 10 \
+ --min 10 \
--pseudocount 1 \
- --model single \
- --output results.tsv
-
-Parameters
-----------
-
-``--min-count``
-~~~~~~~~~~~~~~~
-
-Minimum total read count per region (default: 10):
-
-.. code-block:: bash
-
- --min-count 20 # More stringent
-
-``--pseudocount``
-~~~~~~~~~~~~~~~~~
-
-Pseudocount added to avoid zero counts (default: 1):
-
-.. code-block:: bash
-
- --pseudocount 0 # No pseudocount
-
-``--model``
-~~~~~~~~~~~
-
-Dispersion model (default: single):
-
-.. code-block:: bash
-
- --model linear # Depth-dependent dispersion
-
-``--phased``
-~~~~~~~~~~~~
-
-Use phased genotype information:
-
-.. code-block:: bash
-
- --phased # Requires phased VCF
-
-Output Format
--------------
-
-Tab-separated file with columns:
-
-Statistical Columns
-~~~~~~~~~~~~~~~~~~~
-
-* ``region``: Genomic region identifier
-* ``ref_count``: Total reference allele counts
-* ``alt_count``: Total alternate allele counts
-* ``p_value``: Likelihood ratio test p-value
-* ``fdr_pval``: FDR-corrected p-value
-* ``effect_size``: Log2 fold-change (ref/alt)
+ --output ai_results.tsv
-Model Parameters
-~~~~~~~~~~~~~~~~
+Useful options:
-* ``dispersion``: Beta-binomial dispersion parameter
-* ``log_likelihood_null``: Null model log-likelihood
-* ``log_likelihood_alt``: Alternative model log-likelihood
-
-Interpreting Results
---------------------
-
-Significant Imbalance
-~~~~~~~~~~~~~~~~~~~~~
-
-FDR < 0.05 indicates significant imbalance:
-
-* **Biological**: cis-regulatory variation, ASE
-* **Technical**: mapping bias (check WASP), PCR artifacts
-
-Effect Size
-~~~~~~~~~~~
-
-* log2FC > 1: Strong imbalance (2-fold difference)
-* log2FC > 2: Very strong imbalance (4-fold difference)
+* ``--min`` / ``--min_count``: minimum total count threshold
+* ``--pseudocount``: pseudocount added before modeling
+* ``--model``: dispersion model (currently ``single`` or ``linear`` input)
+* ``--output`` / ``--out_file`` / ``-o``: output TSV path
+* ``--region_col``: explicit region column name if auto-detection is not desired
+* ``--groupby``: group on an alternate annotation column, such as a parent gene column
Single-Cell Analysis
--------------------
-For single-cell data, WASP2 detects allelic imbalance within specific cell populations
-using aggregated counts across cells of the same type.
-
.. code-block:: bash
wasp2-analyze find-imbalance-sc \
- adata.h5ad \
- barcode_map.tsv \
- --sample donor1 \
- --min-count 10
-
-Output: Per-celltype TSV files (``ai_results_[CELLTYPE].tsv``).
-
-Single-Cell Comparative Imbalance
----------------------------------
-
-Overview
-~~~~~~~~
-
-The comparative imbalance analysis detects **differential allelic imbalance** between
-cell types, conditions, or biological groups. This is useful for identifying:
-
-* Cell-type-specific regulatory variation
-* Sex differences in chromatin accessibility
-* Condition-dependent allelic effects (e.g., treatment vs control)
-* Developmental stage-specific imbalance
-
-Statistical Model
-~~~~~~~~~~~~~~~~~
-
-The analysis uses a **likelihood ratio test (LRT)** comparing two hypotheses:
-
-* **Null (H0)**: Both groups share the same allelic imbalance (μ_combined)
-* **Alternative (H1)**: Groups have different imbalance (μ₁ ≠ μ₂)
-
-The test statistic follows a chi-squared distribution with 1 degree of freedom:
-
-.. code-block:: text
-
- LRT = -2 × (log L_null - log L_alt)
- p-value = P(χ²(df=1) > LRT)
-
-Input Format: Count Matrix (.h5ad)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The count matrix must be an AnnData object with the following structure:
-
-.. code-block:: text
-
- AnnData object (n_obs × n_vars)
- ├── .obs # SNP metadata (rows)
- │ ├── index # SNP identifiers (0, 1, 2, ...)
- │ └── [sample_name] # Genotypes: '0|1', '1|0', '0/1', '1/0'
- │
- ├── .var # Cell metadata (columns)
- │ └── group # Cell type/group assignment
- │
- ├── .layers
- │ ├── "ref" # Reference allele counts (sparse matrix)
- │ └── "alt" # Alternate allele counts (sparse matrix)
- │
- └── .uns
- ├── feature # DataFrame: SNP → region mapping
- └── samples # List of sample names
-
-**Example count matrix creation:**
-
-.. code-block:: bash
-
- # Generate counts from BAM + variants + barcodes
- wasp2-count count-variants-sc \
- aligned.bam \
- variants.vcf.gz \
- barcodes.txt \
- --samples NA12878 \
- --feature peaks.bed \
- --out_file allele_counts.h5ad
-
-Input Format: Barcode Map (TSV)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A two-column TSV file (no header) mapping cell barcodes to groups:
-
-.. code-block:: text
-
- AAACGAACAGTCAGTT-1 excitatory_neurons
- AAACGAAGTCGCTCTA-1 inhibitory_neurons
- AAACGAAGTGAACCTA-1 excitatory_neurons
- AAAGGATCATCGATGT-1 astrocytes
- AAAGGATGTGCAACGA-1 microglia
-
-**Requirements:**
-
-* Tab-separated (``\t``)
-* No header row
-* Barcodes must match those in the count matrix
-* Groups can be cell types, conditions, sex, or any categorical variable
-
-Basic Usage
-~~~~~~~~~~~
-
-Compare imbalance between all groups:
-
-.. code-block:: bash
-
- wasp2-analyze compare-imbalance \
- allele_counts.h5ad \
- barcode_map.tsv
-
-Compare specific groups only:
-
-.. code-block:: bash
-
- wasp2-analyze compare-imbalance \
allele_counts.h5ad \
- barcode_map.tsv \
- --groups "excitatory_neurons,inhibitory_neurons"
-
-Output Format
-~~~~~~~~~~~~~
-
-Results are written to ``ai_results_[GROUP1]_[GROUP2].tsv``:
-
-.. list-table::
- :header-rows: 1
- :widths: 20 80
-
- * - Column
- - Description
- * - ``region``
- - Genomic region identifier (peak or gene)
- * - ``num_snps``
- - Number of shared heterozygous SNPs in region
- * - ``combined_mu``
- - Reference allele frequency under null hypothesis (shared)
- * - ``mu1``
- - Reference allele frequency in group 1
- * - ``mu2``
- - Reference allele frequency in group 2
- * - ``null_ll``
- - Log-likelihood under null (shared μ)
- * - ``alt_ll``
- - Log-likelihood under alternative (separate μ values)
- * - ``pval``
- - Likelihood ratio test p-value
- * - ``fdr_pval``
- - FDR-corrected p-value (Benjamini-Hochberg)
-
-**Interpreting results:**
-
-* ``fdr_pval < 0.05``: Significant differential imbalance
-* ``|mu1 - mu2| > 0.1``: Meaningful effect size (~20% difference)
-* ``mu < 0.5``: Alternate allele favored; ``mu > 0.5``: Reference allele favored
-
-Parameters
-~~~~~~~~~~
-
-``--groups``
- Comma-separated list of groups to compare. If omitted, compares all pairwise
- combinations found in the barcode map.
-
-``--min``
- Minimum total allele count per region per group (default: 10). Higher values
- increase statistical power but reduce the number of testable regions.
-
-``--pseudocount``
- Pseudocount added to avoid zero counts (default: 1). Affects dispersion estimation.
-
-``--sample``
- Sample name for heterozygous SNP filtering. Required if multiple samples are
- present in the count matrix.
-
-``--phased``
- Use phased genotype information from VCF. Requires genotypes in ``0|1`` or ``1|0``
- format. Improves power when haplotype phase is known.
-
-``-z/--z_cutoff``
- Remove SNPs with counts exceeding this z-score threshold. Useful for removing
- outliers caused by mapping artifacts or copy number variation.
-
-Example: Sex Differences Analysis
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Identify chromatin accessibility regions with sex-biased allelic imbalance:
+ barcode_groups.tsv \
+ --sample SAMPLE1 \
+ --min 10 \
+ --out_file ai_results.tsv
-**Step 1: Prepare barcode map with sex labels**
+``barcode_groups.tsv`` is a two-column TSV:
.. code-block:: text
- # barcode_sex_map.tsv
- AAACGAACAGTCAGTT-1 male
- AAACGAAGTCGCTCTA-1 female
- AAACGAAGTGAACCTA-1 male
- AAAGGATCATCGATGT-1 female
-
-**Step 2: Run comparative analysis**
-
-.. code-block:: bash
-
- wasp2-analyze compare-imbalance \
- allele_counts.h5ad \
- barcode_sex_map.tsv \
- --groups "male,female" \
- --min 20 \
- --out_file ai_results_sex_comparison.tsv
-
-**Step 3: Filter significant results**
-
-.. code-block:: bash
-
- # Extract regions with significant sex differences
- awk -F'\t' 'NR==1 || $9 < 0.05' ai_results_male_female.tsv > significant_sex_diff.tsv
-
- # Find regions with large effect size
- awk -F'\t' 'NR==1 || ($4 - $5 > 0.15 || $5 - $4 > 0.15)' significant_sex_diff.tsv
-
-Example: snATAC-seq Cell Type Analysis
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Complete workflow for analyzing cell-type-specific chromatin accessibility imbalance:
-
-**Step 1: Count alleles from snATAC-seq BAM**
-
-.. code-block:: bash
-
- # Extract valid barcodes from Cell Ranger output
- zcat filtered_barcodes.tsv.gz > barcodes.txt
-
- # Count alleles at heterozygous SNPs overlapping peaks
- wasp2-count count-variants-sc \
- possorted_bam.bam \
- phased_variants.vcf.gz \
- barcodes.txt \
- --samples sample1 \
- --feature atac_peaks.bed \
- --out_file snATAC_counts.h5ad
-
-**Step 2: Create barcode-to-celltype mapping**
-
-Export cell type annotations from your clustering analysis (e.g., Seurat, ArchR):
-
-.. code-block:: r
-
- # R/Seurat example
- write.table(
- data.frame(barcode = Cells(seurat_obj),
- celltype = Idents(seurat_obj)),
- "barcode_celltype_map.tsv",
- sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE
- )
-
-**Step 3: Run single-cell imbalance analysis**
-
-.. code-block:: bash
+ BARCODEGROUP
- # Per-celltype analysis
- wasp2-analyze find-imbalance-sc \
- snATAC_counts.h5ad \
- barcode_celltype_map.tsv \
- --sample sample1 \
- --phased \
- --min 10 \
- -z 3.0
+The command writes one output file per group using the requested output prefix.
-**Step 4: Compare imbalance between cell types**
+Comparative Single-Cell Analysis
+--------------------------------
.. code-block:: bash
- # Compare specific cell types
wasp2-analyze compare-imbalance \
- snATAC_counts.h5ad \
- barcode_celltype_map.tsv \
- --sample sample1 \
- --groups "excitatory,inhibitory,astrocyte" \
- --phased \
- --min 15
-
- # This produces:
- # - ai_results_excitatory_inhibitory.tsv
- # - ai_results_excitatory_astrocyte.tsv
- # - ai_results_inhibitory_astrocyte.tsv
-
-**Step 5: Identify cell-type-specific regulatory regions**
-
-.. code-block:: bash
-
- # Find peaks with differential imbalance between excitatory and inhibitory neurons
- awk -F'\t' '$9 < 0.01 && ($4 > 0.6 || $4 < 0.4)' \
- ai_results_excitatory_inhibitory.tsv > neuron_subtype_specific_peaks.tsv
-
-Best Practices for Single-Cell Analysis
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-**Data Quality:**
-
-* Use WASP-filtered BAM files to remove mapping bias
-* Require ≥10 total counts per region per group (``--min 10``)
-* Apply z-score filtering to remove outliers (``-z 3.0``)
-
-**Statistical Power:**
-
-* Merge similar cell types if individual populations have low coverage
-* Use phased genotypes when available (``--phased``)
-* Focus on regions with multiple SNPs for better estimates
-
-**Interpretation:**
-
-* Consider biological replication across samples
-* Validate top hits with orthogonal methods (allele-specific CRISPR, etc.)
-* Integrate with eQTL data to identify causal variants
-
-Example Workflow
-----------------
-
-.. code-block:: bash
-
- # 1. Count alleles
- wasp2-count count-variants \
- wasp_filtered.bam \
- variants.vcf \
- --region genes.gtf \
- --samples NA12878 \
- --output counts.tsv
-
- # 2. Analyze imbalance
- wasp2-analyze find-imbalance \
- counts.tsv \
- --min-count 20 \
- --model single \
- --output imbalance.tsv
-
- # 3. Filter significant results
- awk '$5 < 0.05' imbalance.tsv > significant.tsv
-
-Best Practices
---------------
-
-Read Depth
-~~~~~~~~~~
-
-* Minimum 10 reads per region (use ``--min-count``)
-* Higher depth = more power
-* Consider downsampling very deep regions
-
-Quality Control
-~~~~~~~~~~~~~~~
+ allele_counts.h5ad \
+ barcode_groups.tsv \
+ --sample SAMPLE1 \
+ --groups B_cell T_cell \
+ --out_file compare_ai.tsv
-* Use WASP-filtered reads
-* Remove low-complexity regions
-* Filter low-quality SNPs
+This compares allelic imbalance between the requested groups and writes one TSV
+per comparison.
-Multiple Testing
-~~~~~~~~~~~~~~~~
+Notes
+-----
-* FDR correction is automatic
-* Consider Bonferroni for very important regions
-* Validate top hits experimentally
+* If your count file contains genotype columns for multiple samples, you must
+ provide ``--sample`` for single-cell analysis.
+* For bulk analysis, region columns are auto-detected when present in the count
+ TSV. Use ``--region_col`` only when you need to override that behavior.
-Common Issues
--------------
-
-No Significant Results
-~~~~~~~~~~~~~~~~~~~~~~
+Outputs
+-------
-* Increase sample size
-* Check read depth (use deeper sequencing)
-* Verify heterozygous SNPs present
+Typical bulk outputs include:
-Many Significant Results
-~~~~~~~~~~~~~~~~~~~~~~~~
+* region or feature identifier
+* aggregated ``ref_count`` and ``alt_count``
+* p-values and FDR-adjusted p-values
-* Check for batch effects
-* Verify WASP filtering was applied
-* Consider stricter FDR threshold
+Typical single-cell outputs include the same statistics stratified by barcode
+group.
Next Steps
----------
-* Validate results with qPCR or DNA-seq
-* Integrate with eQTL data
-* Perform pathway enrichment analysis
+* :doc:`counting` to generate bulk or single-cell counts
+* :doc:`/tutorials/comparative_imbalance` for group-comparison workflows
diff --git a/docs/source/user_guide/counting.rst b/docs/source/user_guide/counting.rst
index 54db55f..b6a0ba6 100644
--- a/docs/source/user_guide/counting.rst
+++ b/docs/source/user_guide/counting.rst
@@ -4,195 +4,135 @@ Counting Module
Overview
--------
-The counting module quantifies allele-specific read counts at heterozygous SNP positions. It's the first step in allelic imbalance analysis.
+``wasp2-count`` counts reads supporting reference and alternate alleles at
+variant positions in BAM files.
-Purpose
-~~~~~~~
+It provides two commands:
-* Count reads supporting reference vs alternate alleles
-* Filter by sample genotype (heterozygous sites)
-* Annotate with genomic regions (genes, peaks)
-* Support single-cell RNA-seq
+* ``count-variants`` for bulk data
+* ``count-variants-sc`` for single-cell data with ``CB``-tagged barcodes
-When to Use
-~~~~~~~~~~~
-
-Use counting when you have:
-* Aligned reads (BAM file)
-* Variant calls (VCF file)
-* Want to quantify allele-specific expression
-
-CLI Usage
----------
+Bulk Counting
+-------------
-Basic Command
-~~~~~~~~~~~~~
+Basic usage:
.. code-block:: bash
- wasp2-count count-variants BAM_FILE VCF_FILE
+ wasp2-count count-variants sample.bam variants.vcf.gz --out_file counts.tsv
-Full Options
-~~~~~~~~~~~~
+With sample filtering and region annotation:
.. code-block:: bash
wasp2-count count-variants \
- input.bam \
- variants.vcf \
- --samples sample1,sample2 \
+ sample.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
--region genes.gtf \
--out_file counts.tsv
-Input Requirements
-------------------
-
-BAM File
-~~~~~~~~
+Supported region files:
-* Aligned reads (single-end or paired-end)
-* Indexed (.bai file in same directory)
-* Sorted by coordinate
+* BED
+* MACS2 ``narrowPeak`` / ``broadPeak``
+* GTF
+* GFF3
-VCF File
-~~~~~~~~
+For GTF/GFF3 inputs, WASP2 derives interval annotations from feature rows and
+defaults to ``gene`` features when present.
-* Variant calls with genotype information
-* Heterozygous SNPs (GT=0|1 or 1|0)
-* Can include sample-specific genotypes
+Useful options:
-Optional: Region File
-~~~~~~~~~~~~~~~~~~~~~
+* ``--samples`` / ``-s``: select het sites for one or more samples
+* ``--region`` / ``-r``: restrict/annotate variants by overlapping regions
+* ``--gene_feature``: choose the GTF/GFF3 feature type
+* ``--gene_attribute``: choose the GTF/GFF3 attribute used as the feature ID
+* ``--gene_parent``: choose the parent/grouping attribute for gene annotations
+* ``--use_region_names``: prefer region names instead of coordinate strings
+* ``--include-indels``: count indels in addition to SNPs
-Annotate SNPs overlapping genes/peaks:
+Output columns always include:
-* GTF/GFF3 format (genes)
-* BED format (peaks, regions)
-* narrowPeak format (ATAC-seq, ChIP-seq)
+* ``chrom``
+* ``pos`` or ``pos0`` / ``pos`` depending on input path
+* ``ref``
+* ``alt``
+* ``ref_count``
+* ``alt_count``
+* ``other_count``
-Parameters
-----------
+When sample filtering is active, genotype columns are included. When region
+annotation is active, region or gene columns are included as well.
-``--samples`` / ``-s``
-~~~~~~~~~~~~~~~~~~~~~~
+Single-Cell ATAC Counting
+-------------------------
-Filter SNPs heterozygous in specified samples:
+Single-cell counting is designed for **scATAC-seq** data. It requires a BAM
+with ``CB`` tags and a positional barcode file containing one barcode per line.
.. code-block:: bash
- --samples sample1,sample2,sample3
- # or
- --samples samples.txt # one per line
-
-``--region`` / ``-r``
-~~~~~~~~~~~~~~~~~~~~~
-
-Annotate SNPs with overlapping regions:
-
-.. code-block:: bash
-
- --region genes.gtf # Gene annotations
- --region peaks.bed # ATAC-seq peaks
- --region regions.gff3 # Custom regions
-
-``--out_file`` / ``-o``
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Output file path (default: counts.tsv):
-
-.. code-block:: bash
-
- --out_file my_counts.tsv
-
-Output Format
--------------
-
-Tab-separated file with columns:
+ wasp2-count count-variants-sc \
+ sc_atac.bam \
+ variants.vcf.gz \
+ barcodes.tsv \
+ --samples sample1 \
+ --feature peaks.bed \
+ --out_file allele_counts.h5ad
-Basic Columns
-~~~~~~~~~~~~~
+Important points:
-* ``chr``: Chromosome
-* ``pos``: SNP position (1-based)
-* ``ref``: Reference allele
-* ``alt``: Alternate allele
-* ``ref_count``: Reads supporting reference
-* ``alt_count``: Reads supporting alternate
-* ``other_count``: Reads supporting other alleles
+* ``barcodes.tsv`` is a positional argument, not ``--barcode_map``
+* ``--feature`` and ``--region`` are aliases on the single-cell command
+* Accepts BED and MACS2 peak files (GTF/GFF3 are supported only by the bulk ``count-variants`` command)
-Optional Columns (with --region)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The output is an AnnData ``.h5ad`` file with:
-* ``gene_id``: Overlapping gene
-* ``gene_name``: Gene symbol
-* ``feature``: Feature type (exon, intron, etc.)
+* sparse count layers for ``ref``, ``alt``, and ``other``
+* variant metadata in ``adata.obs``
+* barcode names in ``adata.var_names``
+* feature-to-variant mapping in ``adata.uns["feature"]`` when annotations are used
-Example Workflow
-----------------
+Examples
+--------
-1. Basic Counting
-~~~~~~~~~~~~~~~~~
+Count variants without regional annotation:
.. code-block:: bash
- wasp2-count count-variants sample.bam variants.vcf
+ wasp2-count count-variants \
+ filtered.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
+ --out_file counts.tsv
-2. Filter by Sample
-~~~~~~~~~~~~~~~~~~~
+Count variants inside peaks:
.. code-block:: bash
wasp2-count count-variants \
- sample.bam \
- variants.vcf \
- --samples NA12878
+ filtered.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
+ --region peaks.bed \
+ --out_file counts_peaks.tsv
-3. Annotate with Genes
-~~~~~~~~~~~~~~~~~~~~~~
+Count variants inside genes:
.. code-block:: bash
wasp2-count count-variants \
- sample.bam \
- variants.vcf \
- --samples NA12878 \
+ filtered.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
--region genes.gtf \
- --out_file counts_annotated.tsv
-
-Single-Cell Counting
---------------------
-
-For single-cell RNA-seq:
-
-.. code-block:: bash
-
- wasp2-count count-variants-sc \
- sc_rnaseq.bam \
- variants.vcf \
- --barcode_map barcodes.tsv
-
-Output includes cell-type-specific counts.
-
-Common Issues
--------------
-
-Low Count Numbers
-~~~~~~~~~~~~~~~~~
-
-* Check BAM file coverage (``samtools depth``)
-* Verify VCF contains heterozygous SNPs
-* Ensure BAM and VCF use same reference genome
-
-No Output SNPs
-~~~~~~~~~~~~~~
-
-* Check if --samples filter is too restrictive
-* Verify VCF has genotype information (GT field)
-* Ensure BAM file is indexed
+ --gene_feature gene \
+ --gene_attribute gene_id \
+ --out_file counts_genes.tsv
Next Steps
----------
-After counting:
-* :doc:`analysis` - Detect allelic imbalance
-* :doc:`mapping` - Correct reference bias with WASP
+* :doc:`analysis` for statistical testing of allelic imbalance
+* :doc:`/user_guide/single_cell` for barcode grouping and single-cell workflows
diff --git a/docs/source/user_guide/ipscore.rst b/docs/source/user_guide/ipscore.rst
new file mode 100644
index 0000000..993fcbd
--- /dev/null
+++ b/docs/source/user_guide/ipscore.rst
@@ -0,0 +1,53 @@
+iPSCORE Utilities
+=================
+
+Overview
+--------
+
+``wasp2-ipscore`` provides helper commands for working with the iPSCORE
+multi-tissue allelic imbalance resource bundled with WASP2.
+
+Commands
+--------
+
+``inventory``
+~~~~~~~~~~~~~
+
+Validate the expected iPSCORE data inventory and optionally write a TSV report.
+
+.. code-block:: bash
+
+ wasp2-ipscore inventory --output inventory.tsv
+
+``manifest``
+~~~~~~~~~~~~
+
+Create a unified sample manifest across tissues and assays.
+
+.. code-block:: bash
+
+ wasp2-ipscore manifest --output manifest.csv --format csv
+
+``qtls``
+~~~~~~~~
+
+Load and summarize QTL resources, optionally filtering by tissue.
+
+.. code-block:: bash
+
+ wasp2-ipscore qtls --tissue iPSC --output qtls.tsv
+
+``validate``
+~~~~~~~~~~~~
+
+Run the combined inventory, manifest, and QTL validation workflow.
+
+.. code-block:: bash
+
+ wasp2-ipscore validate
+
+Notes
+-----
+
+These commands are resource-management utilities. They do not replace the main
+``wasp2-map``, ``wasp2-count``, or ``wasp2-analyze`` analysis workflows.
diff --git a/docs/source/user_guide/mapping.rst b/docs/source/user_guide/mapping.rst
index d38be18..48c8ed8 100644
--- a/docs/source/user_guide/mapping.rst
+++ b/docs/source/user_guide/mapping.rst
@@ -1,221 +1,148 @@
-Mapping Module (WASP)
-=====================
+Mapping Module
+==============
Overview
--------
-The WASP (Weighted Allele-Specific Mapping) algorithm corrects reference bias by remapping reads with all possible alleles.
+``wasp2-map`` implements the WASP remap-and-filter workflow for removing
+reference mapping bias before allele counting.
-What is Reference Bias?
-~~~~~~~~~~~~~~~~~~~~~~~~
+The public CLI has two commands:
-Reference bias occurs when reads containing alternate alleles align worse than reads with reference alleles, leading to false allelic imbalance signals.
+1. ``make-reads``: find reads overlapping sample variants and generate
+ allele-swapped FASTQ files for remapping
+2. ``filter-remapped``: keep only remapped reads that return to the same locus
-WASP Solution
-~~~~~~~~~~~~~
+There is no separate ``find-intersecting-snps`` command in WASP2. That overlap
+step is part of ``make-reads``.
-1. Identify reads overlapping heterozygous SNPs
-2. Generate alternative reads (swap alleles)
-3. Remap both original and swapped reads
-4. Keep only reads that map to the same location
-
-Purpose
--------
-
-* Correct reference bias in RNA-seq, ATAC-seq
-* Improve accuracy of allelic imbalance detection
-* Required before allele counting
-
-When to Use
-~~~~~~~~~~~
-
-Use WASP when:
-* Reads will be used for allelic analysis
-* Reference genome differs from sample genotype
-* High-confidence bias correction needed
-
-Workflow
---------
-
-Complete WASP workflow has 3 steps:
-
-Step 1: Find Intersecting SNPs
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Identify reads overlapping heterozygous SNPs:
-
-.. code-block:: bash
-
- wasp2-map find-intersecting-snps \
- input.bam \
- variants.vcf \
- --output intersecting.bam
-
-Output: BAM file with reads overlapping SNPs.
-
-Step 2: Generate Remapping Reads
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Typical Workflow
+----------------
-Create reads with swapped alleles:
+Step 1: Generate swapped reads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: bash
wasp2-map make-reads \
- intersecting.bam \
- variants.vcf \
- --samples sample1 \
- --output remap_reads.fastq
+ sample.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
+ --out_dir wasp_output
-Output: FASTQ file(s) with alternative allele sequences.
+This writes:
-Step 3: Remap and Filter
-~~~~~~~~~~~~~~~~~~~~~~~~~
+* ``sample_to_remap.bam``: original reads that must be remapped
+* ``sample_keep.bam``: reads that never overlapped eligible variants
+* ``sample_swapped_alleles_r1.fq`` and ``sample_swapped_alleles_r2.fq``:
+ swapped FASTQ reads to realign
+* ``sample_wasp_data_files.json``: metadata for ``filter-remapped``
-User remaps with their aligner (BWA, STAR, etc.):
+Step 2: Realign swapped reads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the same aligner and alignment settings used for the original BAM.
.. code-block:: bash
- # Example with BWA
- bwa mem -t 8 reference.fa remap_reads.fastq | \
- samtools sort -o remapped.bam -
+ bwa mem -M -t 8 genome.fa \
+ wasp_output/sample_swapped_alleles_r1.fq \
+ wasp_output/sample_swapped_alleles_r2.fq | \
+ samtools sort -o wasp_output/sample_remapped.bam -
+
+ samtools index wasp_output/sample_remapped.bam
-Then filter to consistent mappings:
+Step 3: Filter remapped reads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: bash
- wasp2-map filt-remapped-reads \
- intersecting.bam \
- remapped.bam \
- --output filtered.bam
+ wasp2-map filter-remapped \
+ wasp_output/sample_remapped.bam \
+ --wasp_data_json wasp_output/sample_wasp_data_files.json \
+ --out_bam wasp_output/sample_wasp_filtered.bam
-Output: BAM file with bias-corrected reads.
+You can also provide ``to_remap_bam`` and ``keep_bam`` positionally instead of
+``--wasp_data_json``.
-CLI Reference
--------------
+Command Reference
+-----------------
-find-intersecting-snps
-~~~~~~~~~~~~~~~~~~~~~~
+``make-reads``
+~~~~~~~~~~~~~~
.. code-block:: bash
- wasp2-map find-intersecting-snps [OPTIONS] BAM VCF
-
-Options:
-* ``--samples``: Filter by sample genotype
-* ``--output``: Output BAM file
+ wasp2-map make-reads [OPTIONS] BAM VARIANTS
-make-reads
-~~~~~~~~~~
+Important options:
-.. code-block:: bash
+* ``--samples`` / ``-s``: sample name(s) used to select het variants
+* ``--out_dir`` / ``-o``: output directory
+* ``--out_json`` / ``-j``: explicit metadata JSON path
+* ``--indels``: include indels as well as SNPs
+* ``--threads``: BAM I/O threads
- wasp2-map make-reads [OPTIONS] BAM VCF
+Notes:
-Options:
-* ``--samples``: Sample name(s)
-* ``--output``: Output FASTQ prefix
-* ``--paired``: Paired-end mode
+* paired-end input is required
+* phased genotypes are strongly recommended
+* supported variant formats are VCF, VCF.GZ, BCF, and PGEN
-filt-remapped-reads
+``filter-remapped``
~~~~~~~~~~~~~~~~~~~
.. code-block:: bash
- wasp2-map filt-remapped-reads [OPTIONS] ORIGINAL REMAPPED
-
-Options:
-* ``--output``: Filtered BAM file
-* ``--keep_read_file``: Save kept read IDs
+ wasp2-map filter-remapped [OPTIONS] REMAPPED_BAM [TO_REMAP_BAM] [KEEP_BAM]
-Input Requirements
-------------------
+Important options:
-* **Original BAM**: Aligned reads from initial mapping
-* **VCF File**: Phased heterozygous SNPs (recommended)
-* **Reference Genome**: Same as used for original alignment
+* ``--wasp_data_json`` / ``-j``: load ``to_remap_bam`` and ``keep_bam`` from
+ ``make-reads`` metadata
+* ``--out_bam`` / ``-o``: output BAM path
+* ``--remap_keep_bam``: optional BAM of remapped reads that passed filtering
+* ``--remap_keep_file``: optional text file of kept read names
+* ``--same-locus-slop``: positional tolerance for same-locus matching
+* ``--threads``: BAM I/O threads
-Output Interpretation
----------------------
+Interpreting Outputs
+--------------------
-WASP Filter Rate
-~~~~~~~~~~~~~~~~
+Common outcomes after ``filter-remapped``:
-Typical filter rates:
-* **Good**: 95-99% reads kept
-* **Acceptable**: 90-95% reads kept
-* **Concerning**: <90% reads kept (check data quality)
+* reads kept because they remap to the same locus
+* reads dropped because they remap elsewhere
+* reads dropped because they fail to remap cleanly
-Low filter rate may indicate:
-* Poor mapping quality
-* High SNP density
-* Problematic reference genome
-
-Complete Example
-----------------
+The final WASP-corrected BAM is the output of ``filter-remapped`` merged with
+the ``*_keep.bam`` reads that never required remapping.
-Full WASP workflow:
+Example
+-------
.. code-block:: bash
- # Step 1: Find SNP-overlapping reads
- wasp2-map find-intersecting-snps \
- original.bam \
- phased_variants.vcf \
- --samples NA12878 \
- --output intersecting.bam
-
- # Step 2: Generate remapping reads
wasp2-map make-reads \
- intersecting.bam \
- phased_variants.vcf \
- --samples NA12878 \
- --paired \
- --output remap
-
- # Step 3: Remap (user's aligner)
- bwa mem -t reference.fa \
- remap_R1.fastq remap_R2.fastq | \
- samtools sort -o remapped.bam -
- samtools index remapped.bam
-
- # Step 4: Filter
- wasp2-map filt-remapped-reads \
- intersecting.bam \
- remapped.bam \
- --output filtered_wasp.bam
-
- # Step 5: Count alleles (use filtered BAM)
- wasp2-count count-variants \
- filtered_wasp.bam \
- phased_variants.vcf \
- --samples NA12878
-
-Performance Tips
-----------------
-
-* Use multi-threading for remapping step
-* Filter VCF to high-quality SNPs only
-* Use phased genotypes when available
+ sample.bam \
+ variants.vcf.gz \
+ --samples SAMPLE1 \
+ --out_dir wasp_output
-Common Issues
--------------
+ bwa mem -M -t 8 genome.fa \
+ wasp_output/sample_swapped_alleles_r1.fq \
+ wasp_output/sample_swapped_alleles_r2.fq | \
+ samtools sort -o wasp_output/sample_remapped.bam -
-Many Reads Filtered
-~~~~~~~~~~~~~~~~~~~~
-
-* Check remapping quality (MAPQ scores)
-* Verify same reference genome used
-* Consider relaxing mapping parameters
-
-Slow Remapping
-~~~~~~~~~~~~~~
+ samtools index wasp_output/sample_remapped.bam
-* Use multi-threading (``-t`` flag)
-* Process chromosomes in parallel
-* Consider downsampling for testing
+ wasp2-map filter-remapped \
+ wasp_output/sample_remapped.bam \
+ --wasp_data_json wasp_output/sample_wasp_data_files.json \
+ --out_bam wasp_output/sample_wasp_filtered.bam
Next Steps
----------
-* :doc:`counting` - Count alleles from WASP-filtered BAM
-* :doc:`analysis` - Analyze allelic imbalance
+* :doc:`counting` to count alleles from the WASP-filtered BAM
+* :doc:`analysis` to test for allelic imbalance
diff --git a/pixi.toml b/pixi.toml
index baa7dd2..fcc8b3a 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -66,6 +66,7 @@ verify = "wasp2-count --help && wasp2-map --help && wasp2-analyze --help && pyth
build = "maturin develop --release -m rust/Cargo.toml"
# Run tests
test = "pytest tests/ -v --tb=short --ignore=tests/benchmarks -m 'not benchmark'"
+test-mapping-parity = "pytest tests/regression/test_mapping_stage_parity.py -v --tb=short"
# Run benchmarks
bench = "pytest tests/benchmarks/ -v --tb=short"
# Lint
diff --git a/pyproject.toml b/pyproject.toml
index 90f76c2..ea1740c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -126,7 +126,7 @@ python-source = "src"
module-name = "wasp2_rust"
python-packages = ["counting", "mapping", "analysis", "wasp2", "ipscore"]
bindings = "pyo3"
-strip = true
+strip = false
include = ["LICENSE", "README.md"]
[tool.pytest.ini_options]
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index d778994..fa9d689 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -1999,6 +1999,7 @@ dependencies = [
"noodles-core",
"noodles-vcf",
"pyo3",
+ "pyo3-build-config",
"rayon",
"rust-htslib",
"rustc-hash 2.1.1",
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index a375ef1..af1c3f4 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -33,6 +33,9 @@ flate2 = "1.1" # For gzip decompression
criterion = { version = "0.8", features = ["html_reports"] }
tempfile = "3.25"
+[build-dependencies]
+pyo3-build-config = "0.28.2"
+
# Benchmarks removed for clean release (benchmark files in paper branch only)
# [[bench]]
# name = "mapping_filter_bench"
diff --git a/rust/build.rs b/rust/build.rs
new file mode 100644
index 0000000..258e36c
--- /dev/null
+++ b/rust/build.rs
@@ -0,0 +1,4 @@
+fn main() {
+ pyo3_build_config::add_extension_module_link_args();
+ pyo3_build_config::use_pyo3_cfgs();
+}
diff --git a/scripts/container_smoke_test.sh b/scripts/container_smoke_test.sh
index 690ef28..575b7d5 100755
--- a/scripts/container_smoke_test.sh
+++ b/scripts/container_smoke_test.sh
@@ -64,6 +64,15 @@ for cmd in wasp2-count wasp2-map wasp2-analyze; do
FAIL=$((FAIL + 1))
fi
done
+
+# wasp2-ipscore doesn't have --version, check --help instead
+if wasp2-ipscore --help > /dev/null 2>&1; then
+ echo " PASS: wasp2-ipscore --help"
+ PASS=$((PASS + 1))
+else
+ echo " FAIL: wasp2-ipscore --help"
+ FAIL=$((FAIL + 1))
+fi
echo ""
# ─────────────────────────────────────────────────────────────────────────────
@@ -96,7 +105,7 @@ if [[ -f "$DATA_DIR/sample1.bam" && -f "$DATA_DIR/variants.vcf.gz" ]]; then
if wasp2-count count-variants \
"$DATA_DIR/sample1.bam" \
"$DATA_DIR/variants.vcf.gz" \
- --samples SAMPLE1 \
+ --samples sample1 \
--out "$TMP_DIR/counts.tsv" \
2>/dev/null; then
diff --git a/src/counting/__main__.py b/src/counting/__main__.py
index 2fb6945..15deb17 100644
--- a/src/counting/__main__.py
+++ b/src/counting/__main__.py
@@ -80,7 +80,7 @@ def count_variants(
"-r",
help=(
"Only use variants overlapping regions in file. "
- "Accepts BED or MACS2 formatted .(narrow/broad)Peak files. "
+ "Accepts BED, MACS2 .(narrow/broad)Peak, GTF, or GFF3 files. "
),
),
] = None,
@@ -126,7 +126,7 @@ def count_variants(
"--feat",
help=(
"Feature type in gtf/gff3 for counting intersecting SNPs. "
- "Defaults to 'exon' for snp counting"
+ "Defaults to 'gene' when present."
),
),
] = None,
@@ -237,10 +237,9 @@ def count_variants_sc(
"--regions",
"-r",
help=(
- "Features used in single-cell experiment. "
+ "Features used in single-cell ATAC experiment. "
"Only use variants overlapping features in file. "
- "Accepts BED or MACS2 formatted .(narrow/broad)Peak files. "
- "TODO: Implement genes gtf/gff format"
+ "Accepts BED or MACS2 .(narrow/broad)Peak files."
),
),
] = None,
diff --git a/src/counting/count_alleles_sc.py b/src/counting/count_alleles_sc.py
index 1244383..104f8ef 100644
--- a/src/counting/count_alleles_sc.py
+++ b/src/counting/count_alleles_sc.py
@@ -20,6 +20,19 @@
logger = logging.getLogger(__name__)
+def _sparse_from_counts(
+ counts: defaultdict[tuple[int, int], int],
+ shape: tuple[int, int],
+) -> csr_matrix:
+ if not counts:
+ return csr_matrix(shape, dtype=np.uint16)
+ return csr_matrix(
+ (list(counts.values()), list(zip(*counts.keys()))),
+ shape=shape,
+ dtype=np.uint16,
+ )
+
+
class CountStatsSC:
"""Container for mutable single-cell counting statistics.
@@ -101,7 +114,12 @@ def make_count_matrix(
# Maybe do this automatically and parse feature col instead?
snp_df_cols = ["chrom", "pos", "ref", "alt"]
if include_samples is not None:
- snp_df_cols.extend(include_samples)
+ sample_cols = list(include_samples)
+ missing_sample_cols = [col for col in sample_cols if col not in df.columns]
+ if missing_sample_cols and len(sample_cols) == 1 and "GT" in df.columns:
+ sample_name = sample_cols[0]
+ df = df.with_columns(pl.col("GT").alias(sample_name))
+ snp_df_cols.extend(sample_cols)
# Might be more memory efficient to use pandas index instead...
snp_df = df.select(snp_df_cols).unique(maintain_order=True).with_row_index()
@@ -135,23 +153,10 @@ def make_count_matrix(
# Create sparse matrices
# sparse array is recommended...but doesnt work with adata
- sparse_ref = csr_matrix(
- (list(sc_counts.ref_count.values()), list(zip(*sc_counts.ref_count.keys()))),
- shape=(snp_df.shape[0], len(bc_dict)),
- dtype=np.uint16,
- )
-
- sparse_alt = csr_matrix(
- (list(sc_counts.alt_count.values()), list(zip(*sc_counts.alt_count.keys()))),
- shape=(snp_df.shape[0], len(bc_dict)),
- dtype=np.uint16,
- )
-
- sparse_other = csr_matrix(
- (list(sc_counts.other_count.values()), list(zip(*sc_counts.other_count.keys()))),
- shape=(snp_df.shape[0], len(bc_dict)),
- dtype=np.uint16,
- )
+ matrix_shape = (snp_df.shape[0], len(bc_dict))
+ sparse_ref = _sparse_from_counts(sc_counts.ref_count, matrix_shape)
+ sparse_alt = _sparse_from_counts(sc_counts.alt_count, matrix_shape)
+ sparse_other = _sparse_from_counts(sc_counts.other_count, matrix_shape)
# Create anndata With total as X
adata = ad.AnnData(
@@ -171,17 +176,19 @@ def make_count_matrix(
if include_samples is not None:
adata.uns["samples"] = include_samples
- # TODO: Allow for other features besides 'region' using include_features
- # Could be case of no features, or feature is gene
- if "region" in df.columns:
- # Get unique snps and associated regions
+ feature_cols = include_features or []
+ if not feature_cols and "region" in df.columns:
+ feature_cols = ["region"]
- # Create dict during analysis step instead
- adata.uns["feature"] = (
+ if feature_cols:
+ feature_df = (
df.join(snp_df, on=["chrom", "pos", "ref", "alt"], how="left")
- .select(["region", "index"])
- .to_pandas()
+ .select([*feature_cols, "index"])
+ .unique(maintain_order=True)
)
+ if "region" not in feature_df.columns:
+ feature_df = feature_df.with_columns(pl.col(feature_cols[0]).alias("region"))
+ adata.uns["feature"] = feature_df.to_pandas()
# region_snp_dict = dict(
# df.join(snp_df, on=["chrom", "pos", "ref", "alt"], how="left"
diff --git a/src/counting/parse_gene_data.py b/src/counting/parse_gene_data.py
index 6653ed1..682192c 100644
--- a/src/counting/parse_gene_data.py
+++ b/src/counting/parse_gene_data.py
@@ -106,12 +106,12 @@ def parse_gene_file(
if feature is None:
feature_list = df.select(pl.col("feature").unique()).to_series()
- if "exon" in feature_list:
- feature = "exon"
+ if "gene" in feature_list:
+ feature = "gene"
elif "transcript" in feature_list:
feature = "transcript"
- elif "gene" in feature_list:
- feature = "gene"
+ elif "exon" in feature_list:
+ feature = "exon"
else:
logger.warning("exon, gene or transcript not found in feature list: %s", feature_list)
@@ -244,6 +244,20 @@ def parse_intersect_genes(
if parent_attribute is None:
parent_attribute = "Parent"
+ # Guard against empty intersection file (0 variants in region)
+ intersect_path = Path(intersect_file)
+ if not intersect_path.exists() or intersect_path.stat().st_size == 0:
+ return pl.DataFrame(
+ schema={
+ "chrom": pl.Categorical,
+ "pos": pl.UInt32,
+ "ref": pl.Categorical,
+ "alt": pl.Categorical,
+ attribute: pl.Utf8,
+ parent_attribute: pl.Utf8,
+ }
+ )
+
# AFTER performing gtf_to_bed and intersecting!
df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0)
@@ -287,8 +301,23 @@ def parse_intersect_genes_new(
if parent_attribute is None:
parent_attribute = "Parent"
+ # Guard against empty intersection file (0 variants in region)
+ intersect_path = Path(intersect_file)
+ if not intersect_path.exists() or intersect_path.stat().st_size == 0:
+ return pl.DataFrame(
+ schema={
+ "chrom": pl.Categorical,
+ "pos": pl.UInt32,
+ "ref": pl.Categorical,
+ "alt": pl.Categorical,
+ attribute: pl.Utf8,
+ parent_attribute: pl.Utf8,
+ }
+ )
+
# AFTER performing gtf_to_bed and intersecting!
df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0)
+ schema_names = list(df.collect_schema().names())
vcf_schema = [
pl.col("chrom").cast(pl.Categorical),
@@ -298,12 +327,12 @@ def parse_intersect_genes_new(
]
# Expect at min 10 cols, 11 if GT included
- if len(df.columns) > 10:
- subset_cols = [df.columns[i] for i in [0, 2, 3, 4, 5, -2, -1]]
+ if len(schema_names) > 10:
+ subset_cols = [schema_names[i] for i in [0, 2, 3, 4, 5, -2, -1]]
new_cols = ["chrom", "pos", "ref", "alt", "GT", attribute, parent_attribute]
vcf_schema.append(pl.col("GT").cast(pl.Categorical))
else:
- subset_cols = [df.columns[i] for i in [0, 2, 3, 4, -2, -1]]
+ subset_cols = [schema_names[i] for i in [0, 2, 3, 4, -2, -1]]
new_cols = ["chrom", "pos", "ref", "alt", attribute, parent_attribute]
# Parse dataframe columns
diff --git a/src/counting/run_counting_sc.py b/src/counting/run_counting_sc.py
index 08a0240..c9c4c20 100644
--- a/src/counting/run_counting_sc.py
+++ b/src/counting/run_counting_sc.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import logging
import re
from pathlib import Path
@@ -98,39 +99,13 @@ def __init__(
self.intersect_file = str(
Path(self.temp_loc) / f"{variant_prefix}_intersect_regions.bed"
)
- self.is_gene_file = False
- elif re.search(r"\.g[tf]f(?:\.gz)?$", f_ext, re.I):
- self.feature_type = "genes"
- self.intersect_file = str(
- Path(self.temp_loc) / f"{variant_prefix}_intersect_genes.bed"
- )
- self.is_gene_file = True
- gtf_prefix = re.split(r".g[tf]f", Path(self.feature_file).name)[0]
- self.gtf_bed = str(Path(self.temp_loc) / f"{gtf_prefix}.bed")
- self.use_feature_names = True # Use feature attributes as region names
- elif re.search(r"\.gff3(?:\.gz)?$", f_ext, re.I):
- self.feature_type = "genes"
- self.intersect_file = str(
- Path(self.temp_loc) / f"{variant_prefix}_intersect_genes.bed"
- )
- self.is_gene_file = True
- gtf_prefix = re.split(r".gff3", Path(self.feature_file).name)[0]
- self.gtf_bed = str(Path(self.temp_loc) / f"{gtf_prefix}.bed")
- self.use_feature_names = True # Use feature attributes as feature names
else:
raise ValueError(
- f"Invalid feature file type. Expected .bed, .gtf, or .gff3, got: {self.feature_file}"
+ f"Invalid feature file type. Expected BED or MACS2 peak file, got: {self.feature_file}"
)
else:
self.intersect_file = self.vcf_bed
- self.is_gene_file = False
-
- # TODO UPDATE THIS WHEN I ADD AUTOPARSERS
- if self.is_gene_file:
- # Possible edge case of vcf and gtf prefix conflict
- if self.vcf_bed == self.gtf_bed:
- self.gtf_bed = str(Path(self.temp_loc) / "genes.bed")
@tempdir_decorator
@@ -144,7 +119,7 @@ def run_count_variants_sc(
out_file: str | None = None,
temp_loc: str | None = None,
) -> None:
- """Run single-cell variant counting pipeline.
+ """Run single-cell ATAC variant counting pipeline.
Parameters
----------
@@ -155,7 +130,7 @@ def run_count_variants_sc(
barcode_file : str
Path to cell barcode file (one barcode per line).
feature_file : str | None, optional
- Path to feature/region file (BED, GTF, or GFF3).
+ Path to feature/region file (BED or MACS2 peak file).
samples : str | list[str] | None, optional
Sample ID(s) to process.
use_region_names : bool, optional
@@ -192,13 +167,13 @@ def run_count_variants_sc(
)
assert count_files.feature_file is not None
+
intersect_vcf_region(
vcf_file=count_files.vcf_bed,
region_file=count_files.feature_file,
out_file=count_files.intersect_file,
)
- # TODO: handle use_region_names better
df = parse_intersect_region_new(
intersect_file=count_files.intersect_file,
samples=count_files.samples,
@@ -206,13 +181,26 @@ def run_count_variants_sc(
region_col=None,
)
+ # Guard: if no variants survived intersection, warn and write empty output
+ if df.is_empty():
+ logging.getLogger(__name__).warning(
+ "No variants found after intersection — writing empty output file."
+ )
+ import anndata as ad
+
+ ad.AnnData().write_h5ad(count_files.out_file)
+ return
+
# TODO: handle case where barcode file contains multiple columns
with open(count_files.barcode_file) as file:
bc_dict = {line.rstrip(): i for i, line in enumerate(file)}
# Generate Output
adata = make_count_matrix(
- bam_file=count_files.bam_file, df=df, bc_dict=bc_dict, include_samples=count_files.samples
+ bam_file=count_files.bam_file,
+ df=df,
+ bc_dict=bc_dict,
+ include_samples=count_files.samples,
)
# Write outputs
diff --git a/src/mapping/intersect_variant_data.py b/src/mapping/intersect_variant_data.py
index 6d52aba..307ac64 100644
--- a/src/mapping/intersect_variant_data.py
+++ b/src/mapping/intersect_variant_data.py
@@ -10,9 +10,8 @@
import os
import subprocess
from pathlib import Path
+from typing import TYPE_CHECKING
-import numpy as np
-import polars as pl
import pysam
# Multi-format variant support
@@ -25,6 +24,9 @@
logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+ import polars as pl
+
def vcf_to_bed(
vcf_file: str | Path,
@@ -148,7 +150,7 @@ def make_intersect_df(
intersect_file: str,
samples: list[str],
is_paired: bool = True,
-) -> pl.DataFrame:
+) -> "pl.DataFrame":
"""Parse intersection file into a typed polars DataFrame.
Parameters
@@ -165,6 +167,9 @@ def make_intersect_df(
pl.DataFrame
Parsed intersection data with alleles split by sample.
"""
+ import numpy as np
+ import polars as pl
+
# Create Dataframe
df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0)
diff --git a/src/mapping/remap_utils.py b/src/mapping/remap_utils.py
index ebf8534..416ebde 100644
--- a/src/mapping/remap_utils.py
+++ b/src/mapping/remap_utils.py
@@ -1,13 +1,15 @@
import logging
from collections.abc import Generator
-from typing import Any
+from typing import TYPE_CHECKING, Any
import numpy as np
-import polars as pl
from pysam import AlignedSegment, AlignmentFile
logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+ import polars as pl
+
# Generator for iterating through bam
def paired_read_gen(
@@ -127,13 +129,13 @@ def _build_ref2read_maps(read: AlignedSegment) -> tuple[dict[int, int], dict[int
def get_read_het_data(
- read_df: pl.DataFrame,
+ read_df: "pl.DataFrame",
read: AlignedSegment,
col_list: list[str],
max_seqs: int | None = None,
include_indels: bool = False,
insert_qual: int = 30,
-) -> tuple[list[str], list[Any], list[pl.Series]] | None:
+) -> tuple[list[str], list[Any], list["pl.Series"]] | None:
"""Extract heterozygous variant data from read with indel support.
Args:
@@ -150,6 +152,8 @@ def get_read_het_data(
split_qual: List of quality score segments
allele_series: List of polars Series with allele data
"""
+ import polars as pl
+
pos_list = read_df.select(["start", "stop"]).rows()
assert read.query_sequence is not None, "Read has no query sequence"