diff --git a/pipelines/nf-atacseq/.github/.dockstore.yml b/pipelines/nf-atacseq/.github/.dockstore.yml
new file mode 100644
index 0000000..8c3f31e
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/.dockstore.yml
@@ -0,0 +1,5 @@
+version: 1.2
+workflows:
+ - subclass: nfl
+ primaryDescriptorPath: /pipelines/nf-atacseq/main.nf
+ publish: true
diff --git a/pipelines/nf-atacseq/.github/CONTRIBUTING.md b/pipelines/nf-atacseq/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..a5995f2
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/CONTRIBUTING.md
@@ -0,0 +1,29 @@
+# Contributing to nf-atacseq
+
+## Getting help
+
+For questions, bugs, or feature requests, please open an issue on [GitHub](https://github.com/mcvickerlab/WASP2/issues).
+
+## Development workflow
+
+1. Fork the repository
+2. Create a feature branch from `dev`
+3. Make your changes
+4. Run `nf-core pipelines lint` to verify compliance
+5. Submit a pull request to `dev`
+
+## Code style
+
+- Follow nf-core module conventions for new modules
+- Use `tuple val(meta), path(...)` for all process inputs/outputs
+- Include `stub:` blocks in all processes
+- Add `versions.yml` output to all processes
+- Write `meta.yml` documentation for new modules
+
+## Testing
+
+Run the test profile before submitting changes:
+
+```bash
+nextflow run main.nf -profile test,docker --outdir test_results
+```
diff --git a/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..77ae53a
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+ - name: WASP2 Documentation
+ url: https://wasp2.readthedocs.io
+ about: Check the documentation for usage help
diff --git a/pipelines/nf-atacseq/.github/actions/get-shards/action.yml b/pipelines/nf-atacseq/.github/actions/get-shards/action.yml
new file mode 100644
index 0000000..08ec9a5
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/actions/get-shards/action.yml
@@ -0,0 +1,19 @@
+name: Get test shards
+description: Get nf-test shards for parallel execution
+inputs:
+ test-path:
+ description: Path to test files
+ required: false
+ default: tests
+outputs:
+ shards:
+ description: JSON array of test shards
+ value: ${{ steps.get-shards.outputs.shards }}
+runs:
+ using: composite
+ steps:
+ - id: get-shards
+ shell: bash
+ run: |
+ shards=$(find ${{ inputs.test-path }} -name "*.nf.test" | jq -R -s -c 'split("\n") | map(select(. != ""))')
+ echo "shards=$shards" >> $GITHUB_OUTPUT
diff --git a/pipelines/nf-atacseq/.github/actions/nf-test/action.yml b/pipelines/nf-atacseq/.github/actions/nf-test/action.yml
new file mode 100644
index 0000000..8187d63
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/actions/nf-test/action.yml
@@ -0,0 +1,15 @@
+name: Run nf-test
+description: Run nf-test for a specific test file
+inputs:
+ test-file:
+ description: Path to the nf-test file
+ required: true
+ profile:
+ description: Nextflow profile to use
+ required: false
+ default: test,docker
+runs:
+ using: composite
+ steps:
+ - shell: bash
+ run: nf-test test ${{ inputs.test-file }} --profile ${{ inputs.profile }}
diff --git a/pipelines/nf-atacseq/.github/workflows/branch.yml b/pipelines/nf-atacseq/.github/workflows/branch.yml
new file mode 100644
index 0000000..27d29aa
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/workflows/branch.yml
@@ -0,0 +1,13 @@
+name: nf-core branch protection
+on:
+ pull_request_target:
+ branches: [master, main]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check PRs
+ if: github.repository == 'mcvickerlab/WASP2'
+ run: |
+ { [[ ${{github.event.pull_request.head.repo.full_name}} == mcvickerlab/WASP2 ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || echo "This PR is not from dev. It will be reviewed manually."
diff --git a/pipelines/nf-atacseq/.github/workflows/linting_comment.yml b/pipelines/nf-atacseq/.github/workflows/linting_comment.yml
new file mode 100644
index 0000000..86e12a3
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/workflows/linting_comment.yml
@@ -0,0 +1,13 @@
+name: nf-core linting comment
+on:
+ workflow_run:
+ workflows: ["nf-core linting"]
+ types: [completed]
+
+jobs:
+ linting-comment:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Post linting comment
+ run: echo "Linting workflow completed"
diff --git a/pipelines/nf-atacseq/.github/workflows/nf-test.yml b/pipelines/nf-atacseq/.github/workflows/nf-test.yml
new file mode 100644
index 0000000..fa7474d
--- /dev/null
+++ b/pipelines/nf-atacseq/.github/workflows/nf-test.yml
@@ -0,0 +1,40 @@
+name: Run nf-test
+on:
+ pull_request:
+ branches: [dev, master]
+ release:
+ types: [published]
+ merge_group:
+ workflow_dispatch:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ NXF_ANSI_LOG: false
+ NFT_VER: "0.9.2"
+
+jobs:
+ nf-test:
+ name: "nf-test | NXF ${{ matrix.NXF_VER }}"
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ NXF_VER:
+ - "23.04.0"
+ - "latest-everything"
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: nf-core/setup-nextflow@v2
+ with:
+ version: "${{ matrix.NXF_VER }}"
+
+ - name: Install nf-test
+ run: |
+ wget -qO- https://code.askimed.com/install/nf-test | bash
+ sudo mv nf-test /usr/local/bin/
+
+ - name: Run nf-test
+ run: nf-test test --profile test,docker
diff --git a/pipelines/nf-atacseq/README.md b/pipelines/nf-atacseq/README.md
index a0f718a..64d62d0 100644
--- a/pipelines/nf-atacseq/README.md
+++ b/pipelines/nf-atacseq/README.md
@@ -1,6 +1,6 @@
# nf-atacseq
-[](https://github.com/your-org/WASP2/actions/workflows/nf-atacseq-tests.yml)
+[](https://github.com/mcvickerlab/WASP2/actions/workflows/ci.yml)
ATAC-seq Allelic Imbalance (AI) Pipeline with WASP2 mapping bias correction.
@@ -60,6 +60,12 @@ nextflow run pipelines/nf-atacseq -profile test,docker
nextflow run pipelines/nf-atacseq -profile test,docker -stub-run # Workflow validation only
```
+### Local Test (chr21 data)
+
+```bash
+nextflow run pipelines/nf-atacseq -profile test_local,docker
+```
+
## Samplesheet Format
```csv
@@ -112,6 +118,19 @@ results/
See [docs/output.md](docs/output.md) for detailed output descriptions.
+## Validation with chr21 1000 Genomes Data
+
+Run a quick validation using chr21 data from the 1000 Genomes Project:
+
+```bash
+# Uses pre-configured chr21 test data (NA12878, HG00096)
+nextflow run pipelines/nf-atacseq -profile test_local,docker
+
+# Expect: ~2-5 min runtime, allele counts at chr21 het SNPs
+```
+
+This profile uses downsampled chr21 FASTQ reads and a chr21-only VCF, providing a fast end-to-end validation without downloading full genomes.
+
## Testing
### Run nf-test Suite
@@ -148,6 +167,7 @@ nextflow run . -profile test -stub-run
| `singularity` | Run with Singularity containers |
| `conda` | Run with Conda environments |
| `test` | Minimal test configuration |
+| `test_local` | Local test with chr21 1000 Genomes data |
| `test_full` | Full test with real data |
## Pipeline DAG
@@ -168,7 +188,7 @@ FASTQ → FastQC → Fastp → BWA/Bowtie2 → Samtools → Picard → MACS2 →
If you use nf-atacseq, please cite:
-- **WASP2**: [GitHub Repository](https://github.com/your-org/WASP2)
+- **WASP2**: [GitHub Repository](https://github.com/mcvickerlab/WASP2)
- **Nextflow**: Di Tommaso, P., et al. (2017). Nextflow enables reproducible computational workflows. *Nature Biotechnology*.
## License
@@ -177,5 +197,5 @@ MIT License - see [LICENSE](../../LICENSE) for details.
## Support
-- [Issues](https://github.com/your-org/WASP2/issues)
+- [Issues](https://github.com/mcvickerlab/WASP2/issues)
- [Documentation](docs/)
diff --git a/pipelines/nf-atacseq/assets/email_template.html b/pipelines/nf-atacseq/assets/email_template.html
new file mode 100644
index 0000000..161c1e4
--- /dev/null
+++ b/pipelines/nf-atacseq/assets/email_template.html
@@ -0,0 +1,48 @@
+
+
+ &1 | sed -n "s/^Version: //p"'), topic: versions, emit: versions_bwa
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${fasta.baseName}"
+ def args = task.ext.args ?: ''
"""
mkdir bwa
- bwa index $args -p bwa/${fasta.baseName} $fasta
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- bwa: \$(bwa 2>&1 | grep -o 'Version: [0-9.]*' | sed 's/Version: //')
- END_VERSIONS
+ bwa \\
+ index \\
+ $args \\
+ -p bwa/${prefix} \\
+ $fasta
"""
stub:
+ def prefix = task.ext.prefix ?: "${fasta.baseName}"
"""
mkdir bwa
- touch bwa/${fasta.baseName}.amb
- touch bwa/${fasta.baseName}.ann
- touch bwa/${fasta.baseName}.bwt
- touch bwa/${fasta.baseName}.pac
- touch bwa/${fasta.baseName}.sa
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- bwa: 0.7.18
- END_VERSIONS
+ touch bwa/${prefix}.amb
+ touch bwa/${prefix}.ann
+ touch bwa/${prefix}.bwt
+ touch bwa/${prefix}.pac
+ touch bwa/${prefix}.sa
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml
new file mode 100644
index 0000000..f5bf7f5
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml
@@ -0,0 +1,71 @@
+name: bwa_index
+description: Create BWA index for reference genome
+keywords:
+ - index
+ - fasta
+ - genome
+ - reference
+tools:
+ - bwa:
+ description: |
+ BWA is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: http://bio-bwa.sourceforge.net/
+ documentation: https://bio-bwa.sourceforge.net/bwa.shtml
+ arxiv: arXiv:1303.3997
+ licence: ["GPL-3.0-or-later"]
+ identifier: "biotools:bwa"
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Input genome fasta file
+ ontologies:
+ - edam: "http://edamontology.org/data_2044" # Sequence
+ - edam: "http://edamontology.org/format_1929" # FASTA
+output:
+ index:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - bwa:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ pattern: "*.{amb,ann,bwt,pac,sa}"
+ ontologies:
+ - edam: "http://edamontology.org/data_3210" # Genome index
+ versions_bwa:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - bwa:
+ type: string
+ description: The tool name
+ - 'bwa 2>&1 | sed -n "s/^Version: //p"':
+ type: string
+ description: The command used to generate the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - bwa:
+ type: string
+ description: The tool name
+ - 'bwa 2>&1 | sed -n "s/^Version: //p"':
+ type: string
+ description: The command used to generate the version of the tool
+authors:
+ - "@drpatelh"
+ - "@maxulysse"
+maintainers:
+ - "@maxulysse"
+ - "@gallvp"
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test
new file mode 100644
index 0000000..f0fba82
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test
@@ -0,0 +1,57 @@
+nextflow_process {
+
+ name "Test Process BWA_INDEX"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "bwa"
+ tag "bwa/index"
+ script "../main.nf"
+ process "BWA_INDEX"
+
+ test("BWA index") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("BWA index - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..21a6f73
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap
@@ -0,0 +1,108 @@
+{
+ "BWA index - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.ann:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.bwt:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.pac:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.sa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ "BWA_INDEX",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "index": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.ann:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.bwt:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.pac:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "genome.sa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions_bwa": [
+ [
+ "BWA_INDEX",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T16:58:59.966558606"
+ },
+ "BWA index": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
+ "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
+ "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
+ "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
+ "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ "BWA_INDEX",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "index": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e",
+ "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567",
+ "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da",
+ "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66",
+ "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1"
+ ]
+ ]
+ ],
+ "versions_bwa": [
+ [
+ "BWA_INDEX",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T16:58:53.330725134"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml
new file mode 100644
index 0000000..54e6794
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml
@@ -0,0 +1,13 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+
+dependencies:
+ # renovate: datasource=conda depName=bioconda/bwa
+ - bioconda::bwa=0.7.19
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf
index c94299b..e373267 100644
--- a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf
@@ -2,59 +2,62 @@ process BWA_MEM {
tag "$meta.id"
label 'process_high'
- conda "bioconda::bwa=0.7.18 bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3571' :
- 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3571' }"
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d7/d7e24dc1e4d93ca4d3a76a78d4c834a7be3985b0e1e56fddd61662e047863a8a/data' :
+ 'community.wave.seqera.io/library/bwa_htslib_samtools:83b50ff84ead50d0' }"
input:
- tuple val(meta), path(reads)
- path index
- path fasta
+ tuple val(meta) , path(reads)
+ tuple val(meta2), path(index)
+ tuple val(meta3), path(fasta)
val sort_bam
output:
- tuple val(meta), path("*.bam"), emit: bam
- path "versions.yml", emit: versions
+ tuple val(meta), path("*.bam") , emit: bam, optional: true
+ tuple val(meta), path("*.cram") , emit: cram, optional: true
+ tuple val(meta), path("*.csi") , emit: csi, optional: true
+ tuple val(meta), path("*.crai") , emit: crai, optional: true
+ tuple val("${task.process}"), val('bwa'), eval('bwa 2>&1 | sed -n "s/^Version: //p"'), topic: versions, emit: versions_bwa
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
+ def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
- def read_group = "@RG\\tID:${meta.id}\\tSM:${meta.id}\\tPL:ILLUMINA"
-
- def samtools_command = sort_bam ? "samtools sort -@ ${task.cpus} -o ${prefix}.bam -" : "samtools view -@ ${task.cpus} $args2 -o ${prefix}.bam -"
-
+ def samtools_command = sort_bam ? 'sort' : 'view'
+ def extension = args2.contains("--output-fmt sam") ? "sam" :
+ args2.contains("--output-fmt cram") ? "cram":
+ sort_bam && args2.contains("-O cram")? "cram":
+ !sort_bam && args2.contains("-C") ? "cram":
+ "bam"
+ def reference = fasta && extension=="cram" ? "--reference ${fasta}" : ""
+ if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output"
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
bwa mem \\
$args \\
- -R "$read_group" \\
-t $task.cpus \\
\$INDEX \\
$reads \\
- | $samtools_command
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- bwa: \$(bwa 2>&1 | grep -o 'Version: [0-9.]*' | sed 's/Version: //')
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
+ | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} -
"""
stub:
+ def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args2.contains("--output-fmt sam") ? "sam" :
+ args2.contains("--output-fmt cram") ? "cram":
+ sort_bam && args2.contains("-O cram")? "cram":
+ !sort_bam && args2.contains("-C") ? "cram":
+ "bam"
"""
- touch ${prefix}.bam
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- bwa: 0.7.18
- samtools: 1.19
- END_VERSIONS
+ touch ${prefix}.${extension}
+ touch ${prefix}.csi
+ touch ${prefix}.crai
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml
new file mode 100644
index 0000000..450a3fe
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml
@@ -0,0 +1,149 @@
+name: bwa_mem
+description: Performs fastq alignment to a fasta reference using BWA
+keywords:
+ - mem
+ - bwa
+ - alignment
+ - map
+ - fastq
+ - bam
+ - sam
+tools:
+ - bwa:
+ description: |
+ BWA is a software package for mapping DNA sequences against
+ a large reference genome, such as the human genome.
+ homepage: http://bio-bwa.sourceforge.net/
+ documentation: https://bio-bwa.sourceforge.net/bwa.shtml
+ arxiv: arXiv:1303.3997
+ licence:
+ - "GPL-3.0-or-later"
+ identifier: "biotools:bwa"
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ ontologies:
+ - edam: "http://edamontology.org/data_2044"
+ - edam: "http://edamontology.org/format_1930"
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information.
+ e.g. [ id:'test', single_end:false ]
+ - index:
+ type: file
+ description: BWA genome index files
+ pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
+ ontologies:
+ - edam: "http://edamontology.org/data_3210"
+ - - meta3:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Reference genome in FASTA format
+ pattern: "*.{fasta,fa}"
+ ontologies:
+ - edam: "http://edamontology.org/data_2044"
+ - edam: "http://edamontology.org/format_1929"
+ - sort_bam:
+ type: boolean
+ description: use samtools sort (true) or samtools view (false)
+ pattern: "true or false"
+output:
+ bam:
+ - - meta:
+ type: map
+ description: Groovy Map containing sample information
+ - "*.bam":
+ type: file
+ description: Output BAM file containing read alignments
+ pattern: "*.{bam}"
+ ontologies:
+ - edam: "http://edamontology.org/format_2572"
+ cram:
+ - - meta:
+ type: map
+ description: Groovy Map containing sample information
+ - "*.cram":
+ type: file
+ description: Output CRAM file containing read alignments
+ pattern: "*.{cram}"
+ ontologies:
+ - edam: "http://edamontology.org/format_3462"
+ csi:
+ - - meta:
+ type: map
+ description: Groovy Map containing sample information
+ - "*.csi":
+ type: file
+ description: Optional index file for BAM file
+ pattern: "*.{csi}"
+ ontologies: []
+ crai:
+ - - meta:
+ type: map
+ description: Groovy Map containing sample information
+ - "*.crai":
+ type: file
+ description: Optional index file for CRAM file
+ pattern: "*.{crai}"
+ ontologies: []
+ versions_bwa:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - bwa:
+ type: string
+ description: The name of the tool
+ - 'bwa 2>&1 | sed -n "s/^Version: //p"':
+ type: eval
+ description: The expression to obtain the version of the tool
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - bwa:
+ type: string
+ description: The name of the tool
+ - 'bwa 2>&1 | sed -n "s/^Version: //p"':
+ type: eval
+ description: The expression to obtain the version of the tool
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+ - "@jeremy1805"
+ - "@matthdsm"
+maintainers:
+ - "@drpatelh"
+ - "@jeremy1805"
+ - "@matthdsm"
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test
new file mode 100644
index 0000000..6486ab0
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test
@@ -0,0 +1,255 @@
+nextflow_process {
+
+ name "Test Process BWA_MEM"
+ tag "modules_nfcore"
+ tag "modules"
+ tag "bwa"
+ tag "bwa/mem"
+ tag "bwa/index"
+ script "../main.nf"
+ process "BWA_MEM"
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ test("Single-End") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith("versions") },
+ bam(process.out.bam[0][1]).getReadsMD5()
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Single-End Sort") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith("versions") },
+ bam(process.out.bam[0][1]).getReadsMD5()
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Paired-End") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith("versions") },
+ bam(process.out.bam[0][1]).getReadsMD5()
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Paired-End Sort") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith("versions") },
+ bam(process.out.bam[0][1]).getReadsMD5()
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Paired-End - no fasta") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[:],[]]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith("versions") },
+ bam(process.out.bam[0][1]).getReadsMD5()
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Single-end - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("Paired-end - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap
new file mode 100644
index 0000000..8aca4b2
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap
@@ -0,0 +1,375 @@
+{
+ "Single-End": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ },
+ "798439cbd7fd81cbcc5078022dc5479d"
+ ],
+ "timestamp": "2026-02-18T12:42:52.901827",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Single-End Sort": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ },
+ "94fcf617f5b994584c4e8d4044e16b4f"
+ ],
+ "timestamp": "2026-02-18T12:43:01.149915",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Paired-End": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ },
+ "57aeef88ed701a8ebc8e2f0a381b2a6"
+ ],
+ "timestamp": "2026-02-18T12:43:09.528042",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Paired-End Sort": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ },
+ "af8628d9df18b2d3d4f6fd47ef2bb872"
+ ],
+ "timestamp": "2026-02-18T12:43:17.876121",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Single-end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "5": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "csi": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-18T12:43:33.853248",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Paired-End - no fasta": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ },
+ "57aeef88ed701a8ebc8e2f0a381b2a6"
+ ],
+ "timestamp": "2026-02-18T12:43:26.121474",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "Paired-end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "5": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "csi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_bwa": [
+ [
+ "BWA_MEM",
+ "bwa",
+ "0.7.19-r1273"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "BWA_MEM",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-18T12:43:42.119907",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml b/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml
new file mode 100644
index 0000000..0c36eed
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/fastp
+ - bioconda::fastp=1.0.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf b/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf
index c9b1380..e13509c 100644
--- a/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf
@@ -2,23 +2,25 @@ process FASTP {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::fastp=0.23.4"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
- 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/527b18847a97451091dba07a886b24f17f742a861f9f6c9a6bfb79d4f1f3bf9d/data' :
+ 'community.wave.seqera.io/library/fastp:1.0.1--c8b87fe62dcc103c' }"
input:
- tuple val(meta), path(reads)
- path adapter_fasta
+ tuple val(meta), path(reads), path(adapter_fasta)
+ val discard_trimmed_pass
val save_trimmed_fail
val save_merged
output:
- tuple val(meta), path('*.fastp.fastq.gz'), emit: reads
- tuple val(meta), path('*.json'), emit: json
- tuple val(meta), path('*.html'), emit: html
- tuple val(meta), path('*.log'), emit: log
- path "versions.yml", emit: versions
+ tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
+ tuple val(meta), path('*.json') , emit: json
+ tuple val(meta), path('*.html') , emit: html
+ tuple val(meta), path('*.log') , emit: log
+ tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
+ tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
+ tuple val("${task.process}"), val('fastp'), eval('fastp --version 2>&1 | sed -e "s/fastp //g"'), emit: versions_fastp, topic: versions
when:
task.ext.when == null || task.ext.when
@@ -27,58 +29,76 @@ process FASTP {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
-
- if (meta.single_end) {
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_R1.fail.fastq.gz --unpaired2 ${prefix}_R2.fail.fastq.gz" : ''
+ def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_R1.fastp.fastq.gz" )
+ def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_R2.fastp.fastq.gz"
+ // Added soft-links to original fastqs for consistent naming in MultiQC
+ // Use single ended for interleaved. Add --interleaved_in in config.
+ if ( task.ext.args?.contains('--interleaved_in') ) {
"""
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
+
fastp \\
- --in1 ${reads[0]} \\
- --out1 ${prefix}.fastp.fastq.gz \\
+ --stdout \\
+ --in1 ${prefix}.fastq.gz \\
--thread $task.cpus \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
+ $fail_fastq \\
$args \\
- 2> >(tee ${prefix}.fastp.log >&2)
+ 2>| >(tee ${prefix}.fastp.log >&2) \\
+ | gzip -c > ${prefix}.fastp.fastq.gz
+ """
+ } else if (meta.single_end) {
+ """
+ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed 's/fastp //')
- END_VERSIONS
+ fastp \\
+ --in1 ${prefix}.fastq.gz \\
+ $out_fq1 \\
+ --thread $task.cpus \\
+ --json ${prefix}.fastp.json \\
+ --html ${prefix}.fastp.html \\
+ $adapter_list \\
+ $fail_fastq \\
+ $args \\
+ 2>| >(tee ${prefix}.fastp.log >&2)
"""
} else {
+ def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
"""
+ [ ! -f ${prefix}_R1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_R1.fastq.gz
+ [ ! -f ${prefix}_R2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_R2.fastq.gz
fastp \\
- --in1 ${reads[0]} \\
- --in2 ${reads[1]} \\
- --out1 ${prefix}_1.fastp.fastq.gz \\
- --out2 ${prefix}_2.fastp.fastq.gz \\
- --thread $task.cpus \\
+ --in1 ${prefix}_R1.fastq.gz \\
+ --in2 ${prefix}_R2.fastq.gz \\
+ $out_fq1 \\
+ $out_fq2 \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
- --detect_adapter_for_pe \\
$adapter_list \\
+ $fail_fastq \\
+ $merge_fastq \\
+ --thread $task.cpus \\
+ --detect_adapter_for_pe \\
$args \\
- 2> >(tee ${prefix}.fastp.log >&2)
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed 's/fastp //')
- END_VERSIONS
+ 2>| >(tee ${prefix}.fastp.log >&2)
"""
}
stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
+ def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_R1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_R2.fastp.fastq.gz"
+ def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : ""
+ def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_R1.fail.fastq.gz ; echo '' | gzip > ${prefix}_R2.fail.fastq.gz"
"""
- touch ${prefix}_1.fastp.fastq.gz
- touch ${prefix}_2.fastp.fastq.gz
- echo '{}' > ${prefix}.fastp.json
- touch ${prefix}.fastp.html
- touch ${prefix}.fastp.log
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: 0.23.4
- END_VERSIONS
+ $touch_reads
+ $touch_fail_fastq
+ $touch_merged
+ touch "${prefix}.fastp.json"
+ touch "${prefix}.fastp.html"
+ touch "${prefix}.fastp.log"
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml b/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml
new file mode 100644
index 0000000..a67be39
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml
@@ -0,0 +1,144 @@
+name: fastp
+description: Perform adapter/quality trimming on sequencing reads
+keywords:
+ - trimming
+ - quality control
+ - fastq
+tools:
+ - fastp:
+ description: |
+ A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
+ documentation: https://github.com/OpenGene/fastp
+ doi: 10.1093/bioinformatics/bty560
+ licence: ["MIT"]
+ identifier: biotools:fastp
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively. If you wish to run interleaved paired-end data, supply as single-end data
+ but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
+ ontologies: []
+ - adapter_fasta:
+ type: file
+ description: File in FASTA format containing possible adapters to remove.
+ pattern: "*.{fasta,fna,fas,fa}"
+ ontologies: []
+ - discard_trimmed_pass:
+ type: boolean
+ description: |
+ Specify true to not write any reads that pass trimming thresholds.
+ This can be used to use fastp for the output report only.
+ - save_trimmed_fail:
+ type: boolean
+ description: Specify true to save files that failed to pass trimming thresholds
+ ending in `*.fail.fastq.gz`
+ - save_merged:
+ type: boolean
+ description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz`
+output:
+ reads:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fastp.fastq.gz":
+ type: file
+ description: The trimmed/modified/unmerged fastq reads
+ pattern: "*fastp.fastq.gz"
+ ontologies:
+ - edam: http://edamontology.org/format_1930 # FASTQ
+ - edam: http://edamontology.org/format_3989 # GZIP format
+ json:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.json":
+ type: file
+ description: Results in JSON format
+ pattern: "*.json"
+ ontologies:
+ - edam: http://edamontology.org/format_3464 # JSON
+ html:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: Results in HTML format
+ pattern: "*.html"
+ ontologies: []
+ log:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.log":
+ type: file
+ description: fastq log file
+ pattern: "*.log"
+ ontologies: []
+ reads_fail:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fail.fastq.gz":
+ type: file
+ description: Reads the failed the preprocessing
+ pattern: "*fail.fastq.gz"
+ ontologies:
+ - edam: http://edamontology.org/format_1930 # FASTQ
+ - edam: http://edamontology.org/format_3989 # GZIP format
+ reads_merged:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.merged.fastq.gz":
+ type: file
+ description: Reads that were successfully merged
+ pattern: "*.{merged.fastq.gz}"
+ ontologies: []
+ versions_fastp:
+ - - "${task.process}":
+ type: string
+ description: The name of the process
+ - fastp:
+ type: string
+ description: The name of the tool
+ - 'fastp --version 2>&1 | sed -e "s/fastp //g"':
+ type: eval
+ description: The expression to obtain the version of the tool
+topics:
+ versions:
+ - - "${task.process}":
+ type: string
+ description: The name of the process
+ - fastp:
+ type: string
+ description: The name of the tool
+ - 'fastp --version 2>&1 | sed -e "s/fastp //g"':
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+ - "@kevinmenden"
+ - "@eit-maxlcummins"
+maintainers:
+ - "@drpatelh"
+ - "@kevinmenden"
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test
new file mode 100644
index 0000000..b790157
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test
@@ -0,0 +1,661 @@
+nextflow_process {
+
+ name "Test Process FASTP"
+ script "../main.nf"
+ process "FASTP"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastp"
+
+ test("test_fastp_single_end") {
+
+ when {
+
+ process {
+ """
+ adapter_fasta = [] // empty list for no adapter file!
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match()
+ }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end") {
+
+ when {
+
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("fastp test_fastp_interleaved") {
+
+ config './nextflow.interleaved.config'
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") },
+ { assert process.out.reads_fail == [] },
+ { assert process.out.reads_merged == [] },
+ { assert snapshot(
+ process.out.reads,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_trim_fail") {
+
+ when {
+
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_trim_fail") {
+
+ config './nextflow.save_failed.config'
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged") {
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() },
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged_adapterlist") {
+
+ when {
+ process {
+ """
+ adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true)
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = false
+ input[2] = false
+ input[3] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_qc_only") {
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = true
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_qc_only") {
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = true
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") },
+ { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") },
+ { assert snapshot(
+ process.out.reads,
+ process.out.reads,
+ process.out.reads_fail,
+ process.out.reads_fail,
+ process.out.reads_merged,
+ process.out.reads_merged,
+ process.out.findAll { key, val -> key.startsWith('versions') }).match() }
+ )
+ }
+ }
+
+ test("test_fastp_single_end - stub") {
+
+ options "-stub"
+
+ when {
+
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end - stub") {
+
+ options "-stub"
+
+ when {
+
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("fastp - stub test_fastp_interleaved") {
+
+ options "-stub"
+
+ config './nextflow.interleaved.config'
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_trim_fail - stub") {
+
+ options "-stub"
+
+ when {
+
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_trim_fail - stub") {
+
+ options "-stub"
+
+ config './nextflow.save_failed.config'
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = true
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_merged_adapterlist - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true)
+ discard_trimmed_pass = false
+ save_trimmed_fail = false
+ save_merged = true
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_single_end_qc_only - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = true
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_fastp_paired_end_qc_only - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ adapter_fasta = []
+ discard_trimmed_pass = true
+ save_trimmed_fail = false
+ save_merged = false
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ],
+ adapter_fasta
+ ])
+ input[1] = discard_trimmed_pass
+ input[2] = save_trimmed_fail
+ input[3] = save_merged
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..5677235
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -0,0 +1,1376 @@
+{
+ "test_fastp_single_end_qc_only - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:52.14535813"
+ },
+ "test_fastp_paired_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_R2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:26.421773402"
+ },
+ "test_fastp_paired_end_merged_adapterlist": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672",
+ "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5"
+ ]
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:59.832295907"
+ },
+ "test_fastp_single_end_qc_only": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:47:06.486959565"
+ },
+ "test_fastp_paired_end_trim_fail": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7",
+ "test_R2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a"
+ ]
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366",
+ "test_R1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6",
+ "test_R2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:46.736511024"
+ },
+ "fastp - stub test_fastp_interleaved": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:16.097071654"
+ },
+ "test_fastp_single_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:03.317192706"
+ },
+ "test_fastp_paired_end_merged_adapterlist - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:44.851708205"
+ },
+ "test_fastp_paired_end_merged - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:37.581047713"
+ },
+ "test_fastp_paired_end_merged": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672",
+ "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5"
+ ]
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:53.190202914"
+ },
+ "test_fastp_paired_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:09.585957282"
+ },
+ "test_fastp_single_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:19.624824985"
+ },
+ "test_fastp_single_end_trim_fail - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "reads_fail": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:22.800659826"
+ },
+ "test_fastp_paired_end_trim_fail - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "reads_fail": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:30.271734068"
+ },
+ "fastp test_fastp_interleaved": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39"
+ ]
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:33.4628687"
+ },
+ "test_fastp_single_end_trim_fail": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd"
+ ]
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:46:39.895973372"
+ },
+ "test_fastp_paired_end_qc_only": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-23T09:47:13.015833707"
+ },
+ "test_fastp_paired_end_qc_only - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "json": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "reads": [
+
+ ],
+ "reads_fail": [
+
+ ],
+ "reads_merged": [
+
+ ],
+ "versions_fastp": [
+ [
+ "FASTP",
+ "fastp",
+ "1.0.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-22T13:00:59.670106791"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config
new file mode 100644
index 0000000..4be8dbd
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config
@@ -0,0 +1,5 @@
+process {
+ withName: FASTP {
+ ext.args = "--interleaved_in -e 30"
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config
new file mode 100644
index 0000000..53b61b0
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config
@@ -0,0 +1,5 @@
+process {
+ withName: FASTP {
+ ext.args = "-e 30"
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml b/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml
new file mode 100644
index 0000000..f9f54ee
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fastqc=0.12.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf b/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf
index 40d10a5..f562952 100644
--- a/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf
@@ -1,8 +1,8 @@
process FASTQC {
- tag "$meta.id"
- label 'process_medium'
+ tag "${meta.id}"
+ label 'process_low'
- conda "bioconda::fastqc=0.12.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' :
'biocontainers/fastqc:0.12.1--hdfd78af_0' }"
@@ -11,34 +11,44 @@ process FASTQC {
tuple val(meta), path(reads)
output:
- tuple val(meta), path("*.html"), emit: html
- tuple val(meta), path("*.zip"), emit: zip
- path "versions.yml", emit: versions
+ tuple val(meta) , path("*.html") , emit: html
+ tuple val(meta) , path("*.zip") , emit: zip
+ tuple val("${task.process}"), val('fastqc'), eval('fastqc --version | sed "/FastQC v/!d; s/.*v//"'), emit: versions_fastqc, topic: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- """
- fastqc $args --threads $task.cpus $reads
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ // Make list of old name and new name pairs to use for renaming in the bash while loop
+ def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+ def rename_to = old_new_pairs*.join(' ').join(' ')
+ def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ')
+
+ // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory)
+ // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222
+ // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label
+ def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' )
- END_VERSIONS
+ """
+ printf "%s %s\\n" ${rename_to} | while read old_name new_name; do
+ [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+ done
+
+ fastqc \\
+ ${args} \\
+ --threads ${task.cpus} \\
+ --memory ${fastqc_memory} \\
+ ${renamed_files}
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}_fastqc.html
- touch ${prefix}_fastqc.zip
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastqc: 0.12.1
- END_VERSIONS
+ touch ${prefix}.html
+ touch ${prefix}.zip
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml b/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml
new file mode 100644
index 0000000..49164c8
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml
@@ -0,0 +1,111 @@
+name: fastqc
+description: Run FastQC on sequenced reads
+keywords:
+ - quality control
+ - qc
+ - adapters
+ - fastq
+tools:
+ - fastqc:
+ description: |
+ FastQC gives general quality metrics about your reads.
+ It provides information about the quality score distribution
+ across your reads, the per base sequence content (%A/C/G/T).
+
+ You get information about adapter contamination and other
+ overrepresented sequences.
+ homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
+ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
+ licence: ["GPL-2.0-only"]
+ identifier: biotools:fastqc
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ ontologies: []
+output:
+ html:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.html":
+ type: file
+ description: FastQC report
+ pattern: "*_{fastqc.html}"
+ ontologies: []
+ zip:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.zip":
+ type: file
+ description: FastQC report archive
+ pattern: "*_{fastqc.zip}"
+ ontologies: []
+ versions_fastqc:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - fastqc:
+ type: string
+ description: The tool name
+ - fastqc --version | sed "/FastQC v/!d; s/.*v//":
+ type: eval
+ description: The expression to obtain the version of the tool
+
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - fastqc:
+ type: string
+ description: The tool name
+ - fastqc --version | sed "/FastQC v/!d; s/.*v//":
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
+maintainers:
+ - "@drpatelh"
+ - "@grst"
+ - "@ewels"
+ - "@FelixKrueger"
+containers:
+ conda:
+ linux_amd64:
+ lock_file: https://wave.seqera.io/v1alpha1/builds/bd-af7a5314d5015c29_1/condalock
+ linux_arm64:
+ lock_file: https://wave.seqera.io/v1alpha1/builds/bd-df99cb252670875a_2/condalock
+ docker:
+ linux_amd64:
+ build_id: bd-af7a5314d5015c29_1
+ name: community.wave.seqera.io/library/fastqc:0.12.1--af7a5314d5015c29
+ scanId: sc-a618548acbee5a8a_30
+ linux_arm64:
+ build_id: bd-df99cb252670875a_2
+ name: community.wave.seqera.io/library/fastqc:0.12.1--df99cb252670875a
+ scanId: sc-b5913ed5d42b22d2_18
+ singularity:
+ linux_amd64:
+ build_id: bd-104d26ddd9519960_1
+ name: oras://community.wave.seqera.io/library/fastqc:0.12.1--104d26ddd9519960
+ https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:e0c976cb2eca5fee72618a581537a4f8ea42fcae24c9b201e2e0f764fd28648a
+ linux_arm64:
+ build_id: bd-d56b505a93aef38a_1
+ name: oras://community.wave.seqera.io/library/fastqc:0.12.1--d56b505a93aef38a
+ https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:fd39534bf298698cbe3ee4d4a6f1e73330ec4bca44c38dd9a4d06cb5ea838017
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test
new file mode 100644
index 0000000..66c44da
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test
@@ -0,0 +1,309 @@
+nextflow_process {
+
+ name "Test Process FASTQC"
+ script "../main.nf"
+ process "FASTQC"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastqc"
+
+ test("sarscov2 single-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+ // looks like this:
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("
| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("| File type | Conventional base calls |
") },
+ { assert path(process.out.html[0][1][1]).text.contains("| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 interleaved [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [bam]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 multiple [fastq]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+ { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+ { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+ { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+ { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+ { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+ { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+ { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+ { assert path(process.out.html[0][1][0]).text.contains("| File type | Conventional base calls |
") },
+ { assert path(process.out.html[0][1][1]).text.contains("| File type | Conventional base calls |
") },
+ { assert path(process.out.html[0][1][2]).text.contains("| File type | Conventional base calls |
") },
+ { assert path(process.out.html[0][1][3]).text.contains("| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 custom_prefix") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+ { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+ { assert path(process.out.html[0][1]).text.contains("| File type | Conventional base calls |
") },
+ { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastq] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [fastq] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 interleaved [fastq] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 paired-end [bam] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 multiple [fastq] - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 custom_prefix - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap
new file mode 100644
index 0000000..c8ee120
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -0,0 +1,476 @@
+{
+ "sarscov2 custom_prefix": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:14.518503"
+ },
+ "sarscov2 single-end [fastq] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:19.309008"
+ },
+ "sarscov2 custom_prefix - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "mysample",
+ "single_end": true
+ },
+ "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "mysample",
+ "single_end": true
+ },
+ "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "mysample",
+ "single_end": true
+ },
+ "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "mysample",
+ "single_end": true
+ },
+ "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:44.94888"
+ },
+ "sarscov2 interleaved [fastq]": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:38:45.168496"
+ },
+ "sarscov2 paired-end [bam]": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:38:53.268919"
+ },
+ "sarscov2 multiple [fastq]": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:05.050305"
+ },
+ "sarscov2 paired-end [fastq]": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:38:37.2373"
+ },
+ "sarscov2 paired-end [fastq] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:24.450398"
+ },
+ "sarscov2 multiple [fastq] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:39.758762"
+ },
+ "sarscov2 single-end [fastq]": {
+ "content": [
+ {
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:38:29.555068"
+ },
+ "sarscov2 interleaved [fastq] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:29.193136"
+ },
+ "sarscov2 paired-end [bam] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "html": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastqc": [
+ [
+ "FASTQC",
+ "fastqc",
+ "0.12.1"
+ ]
+ ],
+ "zip": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-28T16:39:34.144919"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml
new file mode 100644
index 0000000..7f8c3ca
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::macs2=2.2.9.1=py39hff71179_1
+ - conda-forge::python=3.9.19
+ - conda-forge::setuptools=70.0
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf
index 709500b..730845d 100644
--- a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf
@@ -2,21 +2,23 @@ process MACS2_CALLPEAK {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::macs2=2.2.9.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/macs2:2.2.9.1--py39hf95cd2a_0' :
- 'biocontainers/macs2:2.2.9.1--py39hf95cd2a_0' }"
+ 'https://depot.galaxyproject.org/singularity/macs2:2.2.9.1--py39hff71179_1':
+ 'biocontainers/macs2:2.2.9.1--py39hff71179_1' }"
input:
- tuple val(meta), path(bam)
- val gsize
+ tuple val(meta), path(ipbam), path(controlbam)
+ val macs2_gsize
output:
- tuple val(meta), path("*.narrowPeak"), emit: peak
- tuple val(meta), path("*.xls"), emit: xls
- tuple val(meta), path("*.summits.bed"), emit: summits, optional: true
- tuple val(meta), path("*.bdg"), emit: bedgraph, optional: true
- path "versions.yml", emit: versions
+ tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak
+ tuple val(meta), path("*.xls") , emit: xls
+ path "versions.yml" , emit: versions
+
+ tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped
+ tuple val(meta), path("*.bed") , optional:true, emit: bed
+ tuple val(meta), path("*.bdg") , optional:true, emit: bdg
when:
task.ext.when == null || task.ext.when
@@ -24,32 +26,43 @@ process MACS2_CALLPEAK {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
- def format = meta.single_end ? 'BAM' : 'BAMPE'
+ def args_list = args.tokenize()
+ def format = meta.single_end ? 'BAM' : 'BAMPE'
+ def control = controlbam ? "--control $controlbam" : ''
+ if(args_list.contains('--format')){
+ def id = args_list.findIndexOf{args_i -> args_i=='--format'}
+ format = args_list[id+1]
+ args_list.remove(id+1)
+ args_list.remove(id)
+ }
"""
- macs2 callpeak \\
- $args \\
- -g $gsize \\
- -f $format \\
- -t $bam \\
- -n $prefix \\
- --outdir .
+ macs2 \\
+ callpeak \\
+ ${args_list.join(' ')} \\
+ --gsize $macs2_gsize \\
+ --format $format \\
+ --name $prefix \\
+ --treatment $ipbam \\
+ $control
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- macs2: \$(macs2 --version 2>&1 | sed 's/macs2 //')
+ macs2: \$(macs2 --version | sed -e "s/macs2 //g")
END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}_peaks.narrowPeak
- touch ${prefix}_peaks.xls
- touch ${prefix}_summits.bed
+ touch ${prefix}.gappedPeak
+ touch ${prefix}.bed
+ touch ${prefix}.bdg
+ touch ${prefix}.narrowPeak
+ touch ${prefix}.xls
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- macs2: 2.2.9.1
+ macs3: \$(macs3 --version | sed -e "s/macs3 //g")
END_VERSIONS
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml
new file mode 100644
index 0000000..4e354fa
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml
@@ -0,0 +1,106 @@
+name: macs2_callpeak
+description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments
+keywords:
+ - alignment
+ - atac-seq
+ - chip-seq
+ - peak-calling
+tools:
+ - macs2:
+ description: Model Based Analysis for ChIP-Seq data
+ documentation: https://docs.csc.fi/apps/macs2/
+ tool_dev_url: https://github.com/macs3-project/MACS
+ doi: "10.1101/496521"
+ licence: ["BSD"]
+ identifier: ""
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - ipbam:
+ type: file
+ description: The ChIP-seq treatment file
+ ontologies: []
+ - controlbam:
+ type: file
+ description: The control file
+ ontologies: []
+ - macs2_gsize:
+ type: string
+ description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs'
+ for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm'
+ for fruitfly (1.2e8)
+output:
+ peak:
+ - - meta:
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ - "*.{narrowPeak,broadPeak}":
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ xls:
+ - - meta:
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ - "*.xls":
+ type: file
+ description: xls file containing annotated peaks
+ pattern: "*.xls"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software version
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+ gapped:
+ - - meta:
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ - "*.gappedPeak":
+ type: file
+ description: Optional BED file containing gapped peak
+ pattern: "*.gappedPeak"
+ ontologies: []
+ bed:
+ - - meta:
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ - "*.bed":
+ type: file
+ description: Optional BED file containing peak summits locations for every
+ peak
+ pattern: "*.bed"
+ ontologies: []
+ bdg:
+ - - meta:
+ type: file
+ description: BED file containing annotated peaks
+ pattern: "*.gappedPeak,*.narrowPeak}"
+ ontologies: []
+ - "*.bdg":
+ type: file
+ description: Optional bedGraph files for input and treatment input samples
+ pattern: "*.bdg"
+ ontologies: []
+authors:
+ - "@ntoda03"
+ - "@JoseEspinosa"
+ - "@jianhong"
+maintainers:
+ - "@ntoda03"
+ - "@JoseEspinosa"
+ - "@jianhong"
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config
new file mode 100644
index 0000000..17a7d3e
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'MACS2_CALLPEAK' {
+ ext.args = '--qval 0.1'
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config
new file mode 100644
index 0000000..aeba9a9
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'MACS2_CALLPEAK' {
+ ext.args = '--format BED --qval 10 --nomodel --extsize 200'
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test
new file mode 100644
index 0000000..b9ebc06
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test
@@ -0,0 +1,125 @@
+nextflow_process {
+
+ name "Test Process MACS2_CALLPEAK"
+ script "../main.nf"
+ process "MACS2_CALLPEAK"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "macs2"
+ tag "macs2/callpeak"
+
+ test("homo_sapiens - callpeak - bed") {
+
+ when {
+ config "./bed.config"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed', checkIfExists: true) ],
+ []
+ ]
+ input[1] = 4000
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ sanitizeOutput(process.out),
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - callpeak - bam") {
+
+ when {
+ config "./bam.config"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ],
+ []
+ ]
+ input[1] = 40000
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ sanitizeOutput(process.out),
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - callpeak - control - bam") {
+
+ when {
+ config "./bam.config"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ],
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam', checkIfExists: true) ]
+ ]
+ input[1] = 40000
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ sanitizeOutput(process.out),
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ],
+ []
+ ]
+ input[1] = 40000
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ sanitizeOutput(process.out),
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap
new file mode 100644
index 0000000..a9aa3ce
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap
@@ -0,0 +1,222 @@
+{
+ "homo_sapiens - callpeak - bam": {
+ "content": [
+ {
+ "bdg": [
+
+ ],
+ "bed": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82"
+ ]
+ ],
+ "gapped": [
+
+ ],
+ "peak": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e"
+ ],
+ "xls": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.xls:md5,dd0cbdd9520b150b3dd5f7bede0d4a1e"
+ ]
+ ]
+ },
+ {
+ "MACS2_CALLPEAK": {
+ "macs2": "2.2.9.1"
+ }
+ }
+ ],
+ "timestamp": "2026-02-16T13:06:13.194555144",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "bdg": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bed": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gapped": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "peak": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,b0ab3b36d39f9851effaf8b0d5cc0b92"
+ ],
+ "xls": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ },
+ {
+ "MACS2_CALLPEAK": {
+ "macs3": null
+ }
+ }
+ ],
+ "timestamp": "2026-02-16T13:06:28.267130581",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "homo_sapiens - callpeak - control - bam": {
+ "content": [
+ {
+ "bdg": [
+
+ ],
+ "bed": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4"
+ ]
+ ],
+ "gapped": [
+
+ ],
+ "peak": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e"
+ ],
+ "xls": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.xls:md5,ba5c031a290fc98828d7a3b9320863ac"
+ ]
+ ]
+ },
+ {
+ "MACS2_CALLPEAK": {
+ "macs2": "2.2.9.1"
+ }
+ }
+ ],
+ "timestamp": "2026-02-16T13:06:20.925967923",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "homo_sapiens - callpeak - bed": {
+ "content": [
+ {
+ "bdg": [
+
+ ],
+ "bed": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_summits.bed:md5,28833eeb7816688f0d698f51670be946"
+ ]
+ ],
+ "gapped": [
+
+ ],
+ "peak": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e"
+ ],
+ "xls": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_peaks.xls:md5,0b7c5a46179fe9d3f61c8dbc192a3c3d"
+ ]
+ ]
+ },
+ {
+ "MACS2_CALLPEAK": {
+ "macs2": "2.2.9.1"
+ }
+ }
+ ],
+ "timestamp": "2026-02-16T13:06:05.333162919",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml b/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml
new file mode 100644
index 0000000..009874d
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::multiqc=1.33
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf b/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf
index b3a9eba..5376aea 100644
--- a/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf
@@ -1,54 +1,50 @@
process MULTIQC {
+ tag "${meta.id}"
label 'process_single'
- conda "bioconda::multiqc=1.19"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
+ conda "${moduleDir}/environment.yml"
+ container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+ ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data'
+ : 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b'}"
input:
- path multiqc_files, stageAs: "?/*"
- path multiqc_config
- path extra_multiqc_config
- path multiqc_logo
+ tuple val(meta), path(multiqc_files, stageAs: "?/*"), path(multiqc_config, stageAs: "?/*"), path(multiqc_logo), path(replace_names), path(sample_names)
output:
- path "*multiqc_report.html", emit: report
- path "*_data", emit: data
- path "*_plots", emit: plots, optional: true
- path "versions.yml", emit: versions
+ tuple val(meta), path("*.html"), emit: report
+ tuple val(meta), path("*_data"), emit: data
+ tuple val(meta), path("*_plots"), emit: plots, optional: true
+ // MultiQC should not push its versions to the `versions` topic. Its input depends on the versions topic to be resolved thus outputting to the topic will let the pipeline hang forever
+ tuple val("${task.process}"), val('multiqc'), eval('multiqc --version | sed "s/.* //g"'), emit: versions
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
- def config = multiqc_config ? "--config $multiqc_config" : ""
- def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ""
- def logo = multiqc_logo ? "--cl-config 'custom_logo: $multiqc_logo'" : ""
+ def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : ''
+ def config = multiqc_config ? multiqc_config instanceof List ? "--config ${multiqc_config.join(' --config ')}" : "--config ${multiqc_config}" : ""
+ def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : ''
+ def replace = replace_names ? "--replace-names ${replace_names}" : ''
+ def samples = sample_names ? "--sample-names ${sample_names}" : ''
"""
multiqc \\
--force \\
- $args \\
- $config \\
- $extra_config \\
- $logo \\
+ ${args} \\
+ ${config} \\
+ ${prefix} \\
+ ${logo} \\
+ ${replace} \\
+ ${samples} \\
.
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- multiqc: \$( multiqc --version | sed 's/multiqc, version //' )
- END_VERSIONS
"""
stub:
"""
- touch multiqc_report.html
mkdir multiqc_data
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- multiqc: 1.19
- END_VERSIONS
+ touch multiqc_data/.stub
+ mkdir multiqc_plots
+ touch multiqc_plots/.stub
+ touch multiqc_report.html
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml b/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml
new file mode 100644
index 0000000..ef434a9
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml
@@ -0,0 +1,133 @@
+name: multiqc
+description: Aggregate results from bioinformatics analyses across many samples
+ into a single report
+keywords:
+ - QC
+ - bioinformatics tools
+ - Beautiful stand-alone HTML report
+tools:
+ - multiqc:
+ description: |
+ MultiQC searches a given directory for analysis logs and compiles a HTML report.
+ It's a general use tool, perfect for summarising the output from numerous bioinformatics tools.
+ homepage: https://multiqc.info/
+ documentation: https://multiqc.info/docs/
+ licence:
+ - "GPL-3.0-or-later"
+ identifier: biotools:multiqc
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample1', single_end:false ]
+ - multiqc_files:
+ type: file
+ description: |
+ List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+ ontologies: []
+ - multiqc_config:
+ type: file
+ description: Optional config yml for MultiQC
+ pattern: "*.{yml,yaml}"
+ ontologies:
+ - edam: http://edamontology.org/format_3750
+ - multiqc_logo:
+ type: file
+ description: Optional logo file for MultiQC
+ pattern: "*.{png}"
+ ontologies: []
+ - replace_names:
+ type: file
+ description: |
+ Optional two-column sample renaming file. First column a set of
+ patterns, second column a set of corresponding replacements. Passed via
+ MultiQC's `--replace-names` option.
+ pattern: "*.{tsv}"
+ ontologies:
+ - edam: http://edamontology.org/format_3475
+ - sample_names:
+ type: file
+ description: |
+ Optional TSV file with headers, passed to the MultiQC --sample_names
+ argument.
+ pattern: "*.{tsv}"
+ ontologies:
+ - edam: http://edamontology.org/format_3475
+output:
+ report:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample1', single_end:false ]
+ - "*.html":
+ type: file
+ description: MultiQC report file
+ pattern: ".html"
+ ontologies: []
+ data:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample1', single_end:false ]
+ - "*_data":
+ type: directory
+ description: MultiQC data dir
+ pattern: "multiqc_data"
+ plots:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'sample1', single_end:false ]
+ - "*_plots":
+ type: file
+ description: Plots created by MultiQC
+ pattern: "*_plots"
+ ontologies: []
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - multiqc:
+ type: string
+ description: The tool name
+ - multiqc --version | sed "s/.* //g":
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
+maintainers:
+ - "@abhi18av"
+ - "@bunop"
+ - "@drpatelh"
+ - "@jfy133"
+containers:
+ conda:
+ linux/amd64:
+ lock_file: https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock
+ linux/arm64:
+ lock_file: https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock
+ docker:
+ linux/amd64:
+ build_id: bd-ee7739d47738383b_1
+ name: community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b
+ scanId: sc-6ddec592dcadd583_4
+ linux/arm64:
+ build_id: bd-58d7dee710ab3aa8_1
+ name: community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8
+ scanId: sc-a04c42273e34c55c_2
+ singularity:
+ linux/amd64:
+ build_id: bd-e3576ddf588fa00d_1
+ https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data
+ name: oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d
+ linux/arm64:
+ build_id: bd-2537ca5f8445e3c2_1
+ https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data
+ name: oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config
new file mode 100644
index 0000000..b30b135
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'MULTIQC' {
+ ext.prefix = "custom_prefix"
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test
new file mode 100644
index 0000000..0e422ea
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test
@@ -0,0 +1,161 @@
+nextflow_process {
+
+ name "Test Process MULTIQC"
+ script "../main.nf"
+ process "MULTIQC"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "multiqc"
+
+ config "./nextflow.config"
+
+ test("sarscov2 single-end [fastqc]") {
+
+ when {
+ process {
+ """
+ input[0] = channel.of([
+ [ id: 'FASTQC' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true),
+ [],
+ [],
+ [],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ file(process.out.report[0][1]).name,
+ file(process.out.data[0][1]).name,
+ process.out.findAll { key, val -> key.startsWith("versions")
+ }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] - custom prefix") {
+ config "./custom_prefix.config"
+
+ when {
+ process {
+ """
+ input[0] = channel.of([
+ [ id: 'FASTQC' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true),
+ [],
+ [],
+ [],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ file(process.out.report[0][1]).name,
+ file(process.out.data[0][1]).name,
+ process.out.findAll { key, val -> key.startsWith("versions")
+ }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] [config]") {
+
+ when {
+ process {
+ """
+ input[0] = channel.of([
+ [ id: 'FASTQC' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true),
+ file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true),
+ [],
+ [],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ file(process.out.report[0][1]).name,
+ file(process.out.data[0][1]).name,
+ file(process.out.plots[0][1]).name,
+ process.out.findAll { key, val -> key.startsWith("versions")
+ }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] [multiple configs]") {
+
+ when {
+ process {
+ """
+ input[0] = channel.of([
+ [ id: 'FASTQC' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true),
+ [
+ file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true),
+ file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)
+ ],
+ [],
+ [],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ file(process.out.report[0][1]).name,
+ file(process.out.data[0][1]).name,
+ file(process.out.plots[0][1]).name,
+ process.out.findAll { key, val -> key.startsWith("versions")
+ }).match() }
+ )
+ }
+ }
+
+ test("sarscov2 single-end [fastqc] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = channel.of([
+ [ id: 'FASTQC' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true),
+ [],
+ [],
+ [],
+ []
+ ])
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap
new file mode 100644
index 0000000..c022701
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap
@@ -0,0 +1,130 @@
+{
+ "sarscov2 single-end [fastqc] [multiple configs]": {
+ "content": [
+ "multiqc_report.html",
+ "multiqc_data",
+ "multiqc_plots",
+ {
+ "versions": [
+ [
+ "MULTIQC",
+ "multiqc",
+ "1.33"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.4"
+ },
+ "timestamp": "2026-02-26T20:21:35.851707"
+ },
+ "sarscov2 single-end [fastqc]": {
+ "content": [
+ "multiqc_report.html",
+ "multiqc_data",
+ {
+ "versions": [
+ [
+ "MULTIQC",
+ "multiqc",
+ "1.33"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ },
+ "timestamp": "2026-02-26T15:10:36.019680076"
+ },
+ "sarscov2 single-end [fastqc] - stub": {
+ "content": [
+ {
+ "data": [
+ [
+ {
+ "id": "FASTQC"
+ },
+ [
+ ".stub:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "plots": [
+ [
+ {
+ "id": "FASTQC"
+ },
+ [
+ ".stub:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "report": [
+ [
+ {
+ "id": "FASTQC"
+ },
+ "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ [
+ "MULTIQC",
+ "multiqc",
+ "1.33"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ },
+ "timestamp": "2026-02-26T15:14:39.789193051"
+ },
+ "sarscov2 single-end [fastqc] [config]": {
+ "content": [
+ "multiqc_report.html",
+ "multiqc_data",
+ "multiqc_plots",
+ {
+ "versions": [
+ [
+ "MULTIQC",
+ "multiqc",
+ "1.33"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ },
+ "timestamp": "2026-02-26T15:21:29.116129274"
+ },
+ "sarscov2 single-end [fastqc] - custom prefix": {
+ "content": [
+ "custom_prefix.html",
+ "custom_prefix_data",
+ {
+ "versions": [
+ [
+ "MULTIQC",
+ "multiqc",
+ "1.33"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ },
+ "timestamp": "2026-02-26T15:10:43.419877592"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config
new file mode 100644
index 0000000..c537a6a
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'MULTIQC' {
+ ext.prefix = null
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml
new file mode 100644
index 0000000..b4ac4fe
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml
@@ -0,0 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/picard
+ - bioconda::picard=3.4.0
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf
index de90d7f..17bcf27 100644
--- a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf
@@ -1,22 +1,23 @@
process PICARD_MARKDUPLICATES {
- tag "$meta.id"
+ tag "${meta.id}"
label 'process_medium'
- conda "bioconda::picard=3.1.1"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' :
- 'biocontainers/picard:3.1.1--hdfd78af_0' }"
+ conda "${moduleDir}/environment.yml"
+ container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
+ ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/0861295baa7c01fc593a9da94e82b44a729dcaf8da92be8e565da109aa549b25/data'
+ : 'community.wave.seqera.io/library/picard:3.4.0--e9963040df0a9bf6'}"
input:
- tuple val(meta), path(bam)
- path fasta
- path fasta_fai
+ tuple val(meta), path(reads)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
output:
- tuple val(meta), path("*.markdup.bam"), emit: bam
- tuple val(meta), path("*.markdup.bam.bai"), emit: bai
- tuple val(meta), path("*.metrics.txt"), emit: metrics
- path "versions.yml", emit: versions
+ tuple val(meta), path("*.bam"), emit: bam, optional: true
+ tuple val(meta), path("*.bai"), emit: bai, optional: true
+ tuple val(meta), path("*.cram"), emit: cram, optional: true
+ tuple val(meta), path("*.metrics.txt"), emit: metrics
+ tuple val("${task.process}"), val('picard'), eval("picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'"), topic: versions, emit: versions_picard
when:
task.ext.when == null || task.ext.when
@@ -24,38 +25,39 @@ process PICARD_MARKDUPLICATES {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = task.ext.suffix ?: "${reads.getExtension()}"
+ def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def avail_mem = 3072
if (!task.memory) {
- log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB'
- } else {
- avail_mem = (task.memory.mega*0.8).intValue()
+ log.info('[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
+ }
+ else {
+ avail_mem = (task.memory.mega * 0.8).intValue()
+ }
+
+ if ("${reads}" == "${prefix}.${suffix}") {
+ error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
"""
picard \\
-Xmx${avail_mem}M \\
MarkDuplicates \\
- $args \\
- --INPUT $bam \\
- --OUTPUT ${prefix}.markdup.bam \\
- --METRICS_FILE ${prefix}.metrics.txt \\
- --CREATE_INDEX true
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- picard: \$(picard MarkDuplicates --version 2>&1 | grep -o 'Version:[0-9.]*' | sed 's/Version://')
- END_VERSIONS
+ ${args} \\
+ --INPUT ${reads} \\
+ --OUTPUT ${prefix}.${suffix} \\
+ ${reference} \\
+ --METRICS_FILE ${prefix}.metrics.txt
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = task.ext.suffix ?: "${reads.getExtension()}"
+ if ("${reads}" == "${prefix}.${suffix}") {
+ error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
+ }
"""
- touch ${prefix}.markdup.bam
- touch ${prefix}.markdup.bam.bai
+ touch ${prefix}.${suffix}
+ touch ${prefix}.${suffix}.bai
touch ${prefix}.metrics.txt
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- picard: 3.1.1
- END_VERSIONS
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml
new file mode 100644
index 0000000..aa0ddbd
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml
@@ -0,0 +1,124 @@
+name: picard_markduplicates
+description: Locate and tag duplicate reads in a BAM file
+keywords:
+ - markduplicates
+ - pcr
+ - duplicates
+ - bam
+ - sam
+ - cram
+tools:
+ - picard:
+ description: |
+ A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
+ data and formats such as SAM/BAM/CRAM and VCF.
+ homepage: https://broadinstitute.github.io/picard/
+ documentation: https://broadinstitute.github.io/picard/
+ licence: ["MIT"]
+ identifier: biotools:picard_tools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: Sequence reads file, can be SAM/BAM/CRAM format
+ pattern: "*.{bam,cram,sam}"
+ ontologies: []
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fasta:
+ type: file
+ description: Reference genome fasta file, required for CRAM input
+ pattern: "*.{fasta,fa}"
+ ontologies: []
+ - - meta3:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fai:
+ type: file
+ description: Reference genome fasta index
+ pattern: "*.{fai}"
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bam":
+ type: file
+ description: BAM file with duplicate reads marked/removed
+ pattern: "*.{bam}"
+ ontologies: []
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bai":
+ type: file
+ description: An optional BAM index file. If desired, --CREATE_INDEX must be
+ passed as a flag
+ pattern: "*.{bai}"
+ ontologies: []
+ cram:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.cram":
+ type: file
+ description: Output CRAM file
+ pattern: "*.{cram}"
+ ontologies: []
+ metrics:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.metrics.txt":
+ type: file
+ description: Duplicate metrics file generated by picard
+ pattern: "*.{metrics.txt}"
+ ontologies: []
+ versions_picard:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - picard:
+ type: string
+ description: The tool name
+ - "picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'":
+ type: string
+ description: The command used to generate the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - picard:
+ type: string
+ description: The tool name
+ - "picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'":
+ type: string
+ description: The command used to generate the version of the tool
+authors:
+ - "@drpatelh"
+ - "@projectoriented"
+ - "@ramprasadn"
+maintainers:
+ - "@drpatelh"
+ - "@projectoriented"
+ - "@ramprasadn"
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test
new file mode 100644
index 0000000..4d00645
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test
@@ -0,0 +1,173 @@
+nextflow_process {
+
+ name "Test Process PICARD_MARKDUPLICATES"
+ script "../main.nf"
+ process "PICARD_MARKDUPLICATES"
+ config "./nextflow.config"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "picard"
+ tag "picard/markduplicates"
+
+ test("sarscov2 [unsorted bam]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = [ [:], [] ]
+ input[2] = [ [:], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ path(process.out.metrics.get(0).get(1)).readLines()[0..2],
+ process.out.findAll { key, val -> key.startsWith("versions") })
+ .match() }
+ )
+ }
+ }
+
+ test("sarscov2 [sorted bam]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ input[1] = [ [:], [] ]
+ input[2] = [ [:], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ path(process.out.metrics.get(0).get(1)).readLines()[0..2],
+ process.out.findAll { key, val -> key.startsWith("versions") })
+ .match() }
+ )
+ }
+ }
+
+ test("homo_sapiens [cram]") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.cram[0][1]).name,
+ path(process.out.metrics.get(0).get(1)).readLines()[0..2],
+ process.out.findAll { key, val -> key.startsWith("versions") })
+ .match() }
+ )
+ }
+ }
+
+ test("sarscov2 [unsorted bam] - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = [ [:], [] ]
+ input[2] = [ [:], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(sanitizeOutput(process.out)).match() }
+ )
+ }
+ }
+
+ test("sarscov2 [sorted bam] - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ input[1] = [ [:], [] ]
+ input[2] = [ [:], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(sanitizeOutput(process.out)).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens [cram] - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(sanitizeOutput(process.out)).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap
new file mode 100644
index 0000000..4ea479a
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap
@@ -0,0 +1,218 @@
+{
+ "sarscov2 [sorted bam] - stub": {
+ "content": [
+ {
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:43:13.544887277",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 [unsorted bam] - stub": {
+ "content": [
+ {
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:43:06.193033248",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 [unsorted bam]": {
+ "content": [
+ "test.md.bam",
+ [
+ "## htsjdk.samtools.metrics.StringHeader",
+ "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.md.bam --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false",
+ "## htsjdk.samtools.metrics.StringHeader"
+ ],
+ {
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:42:40.574463587",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 [sorted bam]": {
+ "content": [
+ "test.md.bam",
+ [
+ "## htsjdk.samtools.metrics.StringHeader",
+ "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.md.bam --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false",
+ "## htsjdk.samtools.metrics.StringHeader"
+ ],
+ {
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:42:49.374645492",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "homo_sapiens [cram]": {
+ "content": [
+ "test.md.cram",
+ [
+ "## htsjdk.samtools.metrics.StringHeader",
+ "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.md.cram --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false",
+ "## htsjdk.samtools.metrics.StringHeader"
+ ],
+ {
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:42:59.07843756",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "homo_sapiens [cram] - stub": {
+ "content": [
+ {
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.cram.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+
+ ],
+ "cram": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_picard": [
+ [
+ "PICARD_MARKDUPLICATES",
+ "picard",
+ "3.4.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T17:43:20.676018462",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config
new file mode 100644
index 0000000..f8dd0f1
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+ withName: PICARD_MARKDUPLICATES {
+ ext.prefix = { "${meta.id}.md" }
+ ext.args = '--ASSUME_SORT_ORDER queryname'
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf
index bb37a19..97bfb57 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf
@@ -2,42 +2,48 @@ process SAMTOOLS_FAIDX {
tag "$fasta"
label 'process_single'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
- tuple val(meta), path(fasta)
+ tuple val(meta), path(fasta), path(fai)
+ val get_sizes
output:
- tuple val(meta), path("*.fai"), emit: fai
- tuple val(meta), path("*.gzi"), emit: gzi, optional: true
- path "versions.yml", emit: versions
+ tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true
+ tuple val(meta), path ("*.sizes") , emit: sizes, optional: true
+ tuple val(meta), path ("*.fai") , emit: fai, optional: true
+ tuple val(meta), path ("*.gzi") , emit: gzi, optional: true
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
+ def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : ''
"""
- samtools faidx \\
- $args \\
- $fasta
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
+ samtools \\
+ faidx \\
+ $fasta \\
+ $args
+
+ ${get_sizes_command}
"""
stub:
+ def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll()
+ def fastacmd = match[0] ? "touch ${match[0][1]}" : ''
+ def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : ''
"""
+ ${fastacmd}
touch ${fasta}.fai
+ if [[ "${fasta.extension}" == "gz" ]]; then
+ touch ${fasta}.gzi
+ fi
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
+ ${get_sizes_command}
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml
new file mode 100644
index 0000000..80aae1d
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml
@@ -0,0 +1,112 @@
+name: samtools_faidx
+description: Index FASTA file, and optionally generate a file of chromosome
+ sizes
+keywords:
+ - index
+ - fasta
+ - faidx
+ - chromosome
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence:
+ - "MIT"
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fasta:
+ type: file
+ description: FASTA file
+ pattern: "*.{fa,fasta}"
+ ontologies: []
+ - fai:
+ type: file
+ description: FASTA index file
+ pattern: "*.{fai}"
+ ontologies: []
+ - get_sizes:
+ type: boolean
+ description: use cut to get the sizes of the index (true) or not (false)
+output:
+ fa:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.{fa,fasta}":
+ type: file
+ description: FASTA file
+ pattern: "*.{fa}"
+ ontologies: []
+ sizes:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.sizes":
+ type: file
+ description: File containing chromosome lengths
+ pattern: "*.{sizes}"
+ ontologies: []
+ fai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.fai":
+ type: file
+ description: FASTA index file
+ pattern: "*.{fai}"
+ ontologies: []
+ gzi:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.gzi":
+ type: file
+ description: Optional gzip index file for compressed inputs
+ pattern: "*.gzi"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - samtools:
+ type: string
+ description: The tool name
+ - "samtools version | sed '1!d;s/.* //'":
+ type: eval
+ description: The command used to generate the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - samtools:
+ type: string
+ description: The tool name
+ - "samtools version | sed '1!d;s/.* //'":
+ type: eval
+ description: The command used to generate the version of the tool
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@phue"
+maintainers:
+ - "@maxulysse"
+ - "@phue"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test
new file mode 100644
index 0000000..9a86db8
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test
@@ -0,0 +1,253 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_FAIDX"
+ script "../main.nf"
+ process "SAMTOOLS_FAIDX"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/faidx"
+ config "./nextflow.config"
+
+ test("test_samtools_faidx") {
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ []
+ ]
+ input[1] = false
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_bgzip") {
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = false
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_fasta") {
+
+ when {
+ params {
+ module_args = 'MT192765.1 -o extract.fa'
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[1] = false
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_stub_fasta") {
+
+ options "-stub"
+ when {
+ params {
+ module_args = '-o extract.fa'
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[1] = false
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_stub_fai") {
+
+ options "-stub"
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ []
+ ]
+ input[1] = false
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_get_sizes") {
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ []
+ ]
+ input[1] = true
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_get_sizes_bgzip") {
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = true
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_get_sizes - stub") {
+
+ options "-stub"
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ []
+ ]
+ input[1] = true
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+ test("test_samtools_faidx_get_sizes_bgzip - stub") {
+
+ options "-stub"
+
+ when {
+ params {
+ module_args = ''
+ }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true),
+ []
+ ]
+ input[1] = true
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(sanitizeOutput(process.out)).match()}
+ )
+ }
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap
new file mode 100644
index 0000000..4169744
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap
@@ -0,0 +1,352 @@
+{
+ "test_samtools_faidx": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:39:12.541649151"
+ },
+ "test_samtools_faidx_get_sizes_bgzip - stub": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:41:44.040426987"
+ },
+ "test_samtools_faidx_get_sizes": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:47:03.653912015"
+ },
+ "test_samtools_faidx_bgzip": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "sizes": [
+
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:50:04.023566795"
+ },
+ "test_samtools_faidx_fasta": {
+ "content": [
+ {
+ "fa": [
+ [
+ {
+ "id": "test"
+ },
+ "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36"
+ ]
+ ],
+ "fai": [
+
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:39:23.529404162"
+ },
+ "test_samtools_faidx_get_sizes - stub": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:41:39.039834304"
+ },
+ "test_samtools_faidx_stub_fasta": {
+ "content": [
+ {
+ "fa": [
+ [
+ {
+ "id": "test"
+ },
+ "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "fai": [
+
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:39:28.961701609"
+ },
+ "test_samtools_faidx_stub_fai": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "sizes": [
+
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:39:34.471028474"
+ },
+ "test_samtools_faidx_get_sizes_bgzip": {
+ "content": [
+ {
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "sizes": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FAIDX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-10T15:39:45.439016495"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config
new file mode 100644
index 0000000..202c036
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SAMTOOLS_FAIDX {
+ ext.args = params.module_args
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf
index 38465a3..0cfb7e8 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf
@@ -1,46 +1,47 @@
process SAMTOOLS_FLAGSTAT {
tag "$meta.id"
- label 'process_low'
+ label 'process_single'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
tuple val(meta), path(bam), path(bai)
output:
tuple val(meta), path("*.flagstat"), emit: flagstat
- path "versions.yml", emit: versions
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- samtools flagstat \\
- $args \\
- -@ $task.cpus \\
+ samtools \\
+ flagstat \\
+ --threads ${task.cpus} \\
$bam \\
> ${prefix}.flagstat
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}.flagstat
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
+ cat <<-END_FLAGSTAT > ${prefix}.flagstat
+ 1000000 + 0 in total (QC-passed reads + QC-failed reads)
+ 0 + 0 secondary
+ 0 + 0 supplementary
+ 0 + 0 duplicates
+ 900000 + 0 mapped (90.00% : N/A)
+ 1000000 + 0 paired in sequencing
+ 500000 + 0 read1
+ 500000 + 0 read2
+ 800000 + 0 properly paired (80.00% : N/A)
+ 850000 + 0 with mate mapped to a different chr
+ 50000 + 0 with mate mapped to a different chr (mapQ>=5)
+ END_FLAGSTAT
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml
new file mode 100644
index 0000000..8caa1bc
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml
@@ -0,0 +1,75 @@
+name: samtools_flagstat
+description: Counts the number of alignments in a BAM/CRAM/SAM file for each
+ FLAG type
+keywords:
+ - stats
+ - mapping
+ - counts
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence:
+ - "MIT"
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ ontologies: []
+ - bai:
+ type: file
+ description: Index for BAM/CRAM/SAM file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+output:
+ flagstat:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.flagstat":
+ type: file
+ description: File containing samtools flagstat output
+ pattern: "*.{flagstat}"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+maintainers:
+ - "@drpatelh"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test
new file mode 100644
index 0000000..3b648a3
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_FLAGSTAT"
+ script "../main.nf"
+ process "SAMTOOLS_FLAGSTAT"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/flagstat"
+
+ test("BAM") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("BAM - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap
new file mode 100644
index 0000000..f5c882d
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap
@@ -0,0 +1,88 @@
+{
+ "BAM - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_FLAGSTAT",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FLAGSTAT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:14:30.820969684"
+ },
+ "BAM": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_FLAGSTAT",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_FLAGSTAT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:14:25.581619424"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf
index 7b76f0d..d5b70a7 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf
@@ -2,44 +2,37 @@ process SAMTOOLS_IDXSTATS {
tag "$meta.id"
label 'process_single'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
tuple val(meta), path(bam), path(bai)
output:
tuple val(meta), path("*.idxstats"), emit: idxstats
- path "versions.yml", emit: versions
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+
"""
- samtools idxstats \\
- $args \\
+ # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads).
+ samtools \\
+ idxstats \\
+ --threads ${task.cpus-1} \\
$bam \\
> ${prefix}.idxstats
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
+
"""
touch ${prefix}.idxstats
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml
new file mode 100644
index 0000000..fd15384
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml
@@ -0,0 +1,75 @@
+name: samtools_idxstats
+description: Reports alignment summary statistics for a BAM/CRAM/SAM file
+keywords:
+ - stats
+ - mapping
+ - counts
+ - chromosome
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence:
+ - "MIT"
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ ontologies: []
+ - bai:
+ type: file
+ description: Index for BAM/CRAM/SAM file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+output:
+ idxstats:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.idxstats":
+ type: file
+ description: File containing samtools idxstats output
+ pattern: "*.{idxstats}"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+maintainers:
+ - "@drpatelh"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test
new file mode 100644
index 0000000..c990cd5
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test
@@ -0,0 +1,59 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_IDXSTATS"
+ script "../main.nf"
+ process "SAMTOOLS_IDXSTATS"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/idxstats"
+
+ test("bam") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.idxstats,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("bam - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.idxstats,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap
new file mode 100644
index 0000000..19a54c7
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap
@@ -0,0 +1,56 @@
+{
+ "bam - stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_IDXSTATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-02T16:21:46.333090477"
+ },
+ "bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_IDXSTATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-02T16:21:41.063422521"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf
index 343f905..e2a0e56 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf
@@ -2,17 +2,19 @@ process SAMTOOLS_INDEX {
tag "$meta.id"
label 'process_low'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
- tuple val(meta), path(bam)
+ tuple val(meta), path(input)
output:
- tuple val(meta), path("*.bai"), emit: bai
- path "versions.yml", emit: versions
+ tuple val(meta), path("*.bai") , optional:true, emit: bai
+ tuple val(meta), path("*.csi") , optional:true, emit: csi
+ tuple val(meta), path("*.crai"), optional:true, emit: crai
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions
when:
task.ext.when == null || task.ext.when
@@ -20,21 +22,18 @@ process SAMTOOLS_INDEX {
script:
def args = task.ext.args ?: ''
"""
- samtools index $args -@ $task.cpus $bam
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
+ samtools \\
+ index \\
+ -@ ${task.cpus} \\
+ $args \\
+ $input
"""
stub:
+ def args = task.ext.args ?: ''
+ def extension = file(input).getExtension() == 'cram' ?
+ "crai" : args.contains("-c") ? "csi" : "bai"
"""
- touch ${bam}.bai
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
+ touch ${input}.${extension}
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml
new file mode 100644
index 0000000..c6d4ce2
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml
@@ -0,0 +1,92 @@
+name: samtools_index
+description: Index SAM/BAM/CRAM file
+keywords:
+ - index
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence:
+ - "MIT"
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: input file
+ ontologies: []
+output:
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.bai":
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+ csi:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.csi":
+ type: file
+ description: CSI index file
+ pattern: "*.{csi}"
+ ontologies: []
+ crai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.crai":
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - samtools:
+ type: string
+ description: The name of the tool
+ - samtools version | sed '1!d;s/.* //':
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@maxulysse"
+maintainers:
+ - "@drpatelh"
+ - "@ewels"
+ - "@maxulysse"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config
new file mode 100644
index 0000000..0ed260e
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SAMTOOLS_INDEX {
+ ext.args = '-c'
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test
new file mode 100644
index 0000000..c96cec8
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test
@@ -0,0 +1,155 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_INDEX"
+ script "../main.nf"
+ process "SAMTOOLS_INDEX"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/index"
+
+ test("bai") {
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bai,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("crai") {
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("csi") {
+ config "./csi.nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.csi[0][1]).name,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("bai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bai,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("crai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.crai,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+
+ test("csi - stub") {
+ options "-stub"
+ config "./csi.nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.csi,
+ process.out.findAll { key, val -> key.startsWith('versions') }
+ ).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..afc8a1f
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap
@@ -0,0 +1,156 @@
+{
+ "csi - stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:52:10.030187"
+ },
+ "crai - stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:51:59.125484"
+ },
+ "bai - stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:51:47.277042"
+ },
+ "csi": {
+ "content": [
+ "test.paired_end.sorted.bam.csi",
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:51:35.758735"
+ },
+ "crai": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:51:26.561965"
+ },
+ "bai": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_INDEX",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.2"
+ },
+ "timestamp": "2026-01-28T17:51:15.299035"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf
index 3215395..6b5aa31 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf
@@ -2,46 +2,77 @@ process SAMTOOLS_SORT {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
- tuple val(meta), path(bam)
- path fasta
+ tuple val(meta) , path(bam)
+ tuple val(meta2), path(fasta)
+ val index_format
output:
- tuple val(meta), path("*.sorted.bam"), emit: bam
- path "versions.yml", emit: versions
+ tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true
+ tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true
+ tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true
+ tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true
+ tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true
+ tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true
+ tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools
when:
task.ext.when == null || task.ext.when
script:
def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
+ prefix = task.ext.prefix ?: "${meta.id}"
+ extension = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ "bam"
+ def reference = fasta ? "--reference ${fasta}" : ""
+ output_file = index_format ? "${prefix}.${extension}##idx##${prefix}.${extension}.${index_format} --write-index" : "${prefix}.${extension}"
+ if (index_format) {
+ if (!index_format.matches('bai|csi|crai')) {
+ error "Index format not one of bai, csi, crai."
+ } else if (extension == "sam") {
+ error "Indexing not compatible with SAM output"
+ }
+ }
+ if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+
"""
+ samtools cat \\
+ ${bam} \\
+ | \\
samtools sort \\
$args \\
- -@ $task.cpus \\
- -o ${prefix}.sorted.bam \\
- $bam
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
+ -T ${prefix} \\
+ --threads $task.cpus \\
+ ${reference} \\
+ -o ${output_file} \\
+ -
+
"""
stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ extension = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ "bam"
+ if (index_format) {
+ if (!index_format.matches('bai|csi|crai')) {
+ error "Index format not one of bai, csi, crai."
+ } else if (extension == "sam") {
+ error "Indexing not compatible with SAM output"
+ }
+ }
+ index = index_format ? "touch ${prefix}.${extension}.${index_format}" : ""
+
"""
- touch ${prefix}.sorted.bam
+ touch ${prefix}.${extension}
+ ${index}
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml
new file mode 100644
index 0000000..6996830
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml
@@ -0,0 +1,142 @@
+name: samtools_sort
+description: Sort SAM/BAM/CRAM file
+keywords:
+ - sort
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: BAM/CRAM/SAM file(s)
+ pattern: "*.{bam,cram,sam}"
+ ontologies: []
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fasta:
+ type: file
+ description: Reference genome FASTA file
+ pattern: "*.{fa,fasta,fna}"
+ optional: true
+ ontologies: []
+ - index_format:
+ type: string
+ description: Index format to use (optional)
+ pattern: "bai|csi|crai"
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.bam":
+ type: file
+ description: Sorted BAM file
+ pattern: "*.{bam}"
+ ontologies: []
+ cram:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.cram":
+ type: file
+ description: Sorted CRAM file
+ pattern: "*.{cram}"
+ ontologies: []
+ sam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.sam":
+ type: file
+ description: Sorted SAM file
+ pattern: "*.{sam}"
+ ontologies: []
+ crai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.${extension}.crai":
+ type: file
+ description: CRAM index file (optional)
+ pattern: "*.crai"
+ ontologies: []
+ csi:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.${extension}.csi":
+ type: file
+ description: BAM index file (optional)
+ pattern: "*.csi"
+ ontologies: []
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "${prefix}.${extension}.bai":
+ type: file
+ description: BAM index file (optional)
+ pattern: "*.bai"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - samtools:
+ type: string
+ description: The tool name
+ - "samtools version | sed '1!d;s/.* //'":
+ type: string
+ description: The command used to generate the version of the tool
+
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The process the versions were collected from
+ - samtools:
+ type: string
+ description: The tool name
+ - "samtools version | sed '1!d;s/.* //'":
+ type: string
+ description: The command used to generate the version of the tool
+
+authors:
+ - "@drpatelh"
+ - "@ewels"
+ - "@matthdsm"
+maintainers:
+ - "@drpatelh"
+ - "@ewels"
+ - "@matthdsm"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test
new file mode 100644
index 0000000..df47bb2
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test
@@ -0,0 +1,332 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_SORT"
+ script "../main.nf"
+ process "SAMTOOLS_SORT"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/sort"
+
+ test("bam_no_index") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.bai,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("bam_bai_index") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = 'bai'
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.bai,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("bam_csi_index") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = 'csi'
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.csi,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("multiple bam") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("multiple bam bai index") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = 'bai'
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.bai.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("multiple bam csi index") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = 'csi'
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.bam,
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("cram") {
+
+ config "./nextflow_cram.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.findAll { key, val -> key.startsWith("versions") }
+ ).match()}
+ )
+ }
+ }
+
+ test("bam - stub") {
+
+ options "-stub"
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() }
+ )
+ }
+ }
+
+ test("multiple bam - stub") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true)
+ ]
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() }
+ )
+ }
+ }
+
+ test("cram - stub") {
+
+ options "-stub"
+ config "./nextflow_cram.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = ''
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap
new file mode 100644
index 0000000..4e618fa
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap
@@ -0,0 +1,296 @@
+{
+ "cram": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram.crai"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:47:01.171084"
+ },
+ "bam_csi_index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,72ca1dff5344a5e5e6b892fe5f6b134d"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.csi:md5,01394e702c729cb478df914ffaf9f7f8"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:46:00.961675"
+ },
+ "bam - stub": {
+ "content": [
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:47:12.154354"
+ },
+ "multiple bam bai index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,3ffa2affc29f0aa6e7b36dded84625fe"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.bai"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:46:25.488622"
+ },
+ "cram - stub": {
+ "content": [
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:47:28.485045"
+ },
+ "multiple bam": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,cd4eb0077f25e9cff395366b8883dd1f"
+ ]
+ ],
+ [
+
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:46:13.168476"
+ },
+ "multiple bam - stub": {
+ "content": [
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:47:21.628088"
+ },
+ "bam_no_index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,26b27d1f9bcb61c25da21b562349784e"
+ ]
+ ],
+ [
+
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:45:47.139418"
+ },
+ "multiple bam csi index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,295503ba5342531a3310c33ad0efbc22"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.csi"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:46:51.5531"
+ },
+ "bam_bai_index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,cae7564cb83bb4a5911205bf94124b54"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.bai:md5,50dd467c169545a4d5d1f709f7e986e0"
+ ]
+ ],
+ {
+ "versions_samtools": [
+ [
+ "SAMTOOLS_SORT",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-10-29T12:45:52.796936"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config
new file mode 100644
index 0000000..723f62b
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SAMTOOLS_SORT {
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config
new file mode 100644
index 0000000..3a8c018
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: SAMTOOLS_SORT {
+ ext.prefix = { "${meta.id}.sorted" }
+ ext.args = "--write-index --output-fmt cram"
+ }
+
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml
new file mode 100644
index 0000000..89e12a6
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ # renovate: datasource=conda depName=bioconda/htslib
+ - bioconda::htslib=1.22.1
+ # renovate: datasource=conda depName=bioconda/samtools
+ - bioconda::samtools=1.22.1
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf
index 413e8b2..57d2468 100644
--- a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf
@@ -1,48 +1,40 @@
process SAMTOOLS_STATS {
tag "$meta.id"
- label 'process_low'
+ label 'process_single'
- conda "bioconda::samtools=1.19"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' :
- 'biocontainers/samtools:1.19--h50ea8bc_0' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' :
+ 'biocontainers/samtools:1.22.1--h96c455f_0' }"
input:
- tuple val(meta), path(bam), path(bai)
- path fasta
+ tuple val(meta), path(input), path(input_index)
+ tuple val(meta2), path(fasta)
output:
tuple val(meta), path("*.stats"), emit: stats
- path "versions.yml", emit: versions
+ tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
"""
- samtools stats \\
- $args \\
- $reference \\
- $bam \\
+ samtools \\
+ stats \\
+ ${args} \\
+ --threads ${task.cpus} \\
+ ${reference} \\
+ ${input} \\
> ${prefix}.stats
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: \$(samtools --version | head -n1 | sed 's/samtools //')
- END_VERSIONS
"""
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.stats
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- samtools: 1.19
- END_VERSIONS
"""
}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml
new file mode 100644
index 0000000..5c59cce
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml
@@ -0,0 +1,88 @@
+name: samtools_stats
+description: Produces comprehensive statistics from SAM/BAM/CRAM file
+keywords:
+ - statistics
+ - counts
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+ identifier: biotools:samtools
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: BAM/CRAM file from alignment
+ pattern: "*.{bam,cram}"
+ ontologies: []
+ - input_index:
+ type: file
+ description: BAI/CRAI file from alignment
+ pattern: "*.{bai,crai}"
+ ontologies: []
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - fasta:
+ type: file
+ description: Reference file the CRAM was created with (optional)
+ pattern: "*.{fasta,fa}"
+ ontologies: []
+output:
+ stats:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - "*.stats":
+ type: file
+ description: File containing samtools stats output
+ pattern: "*.{stats}"
+ ontologies: []
+ versions_samtools:
+ - - ${task.process}:
+ type: string
+ description: Name of the process
+ - samtools:
+ type: string
+ description: Name of the tool
+ - samtools version | sed "1!d;s/.* //":
+ type: eval
+ description: The expression to obtain the version of the tool
+
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: Name of the process
+ - samtools:
+ type: string
+ description: Name of the tool
+ - samtools version | sed "1!d;s/.* //":
+ type: eval
+ description: The expression to obtain the version of the tool
+
+authors:
+ - "@drpatelh"
+ - "@FriederikeHanssen"
+ - "@ramprasadn"
+maintainers:
+ - "@drpatelh"
+ - "@FriederikeHanssen"
+ - "@ramprasadn"
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test
new file mode 100644
index 0000000..5bc8930
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test
@@ -0,0 +1,113 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_STATS"
+ script "../main.nf"
+ process "SAMTOOLS_STATS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/stats"
+
+ test("bam") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out).match()}
+ )
+ }
+ }
+
+ test("cram") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out).match()}
+ )
+ }
+ }
+
+ test("bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out).match()}
+ )
+ }
+ }
+
+ test("cram - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out).match()}
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap
new file mode 100644
index 0000000..94d981b
--- /dev/null
+++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap
@@ -0,0 +1,174 @@
+{
+ "cram": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-11-01T02:27:18.460724"
+ },
+ "bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-11-01T02:27:30.245839"
+ },
+ "cram - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-11-01T02:27:39.041649"
+ },
+ "bam": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9"
+ ]
+ ],
+ "1": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9"
+ ]
+ ],
+ "versions_samtools": [
+ [
+ "SAMTOOLS_STATS",
+ "samtools",
+ "1.22.1"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.0"
+ },
+ "timestamp": "2025-11-01T02:26:55.988241"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/nextflow.config b/pipelines/nf-atacseq/nextflow.config
index e844cbf..1357933 100644
--- a/pipelines/nf-atacseq/nextflow.config
+++ b/pipelines/nf-atacseq/nextflow.config
@@ -15,6 +15,8 @@ plugins {
manifest {
name = 'wasp2/nf-atacseq'
author = 'WASP2 Team'
+ homePage = 'https://github.com/mcvickerlab/WASP2'
+ doi = 'https://doi.org/10.1038/nmeth.3582'
description = 'ATAC-seq Allelic Imbalance Pipeline with WASP2 mapping bias correction'
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
@@ -60,11 +62,6 @@ params {
skip_peak_calling = false // Require peaks parameter if true
skip_multiqc = false
- // Resource limits
- max_cpus = 16
- max_memory = '128.GB'
- max_time = '240.h'
-
// Institutional config support (nf-core compatible)
custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
custom_config_version = 'master'
@@ -73,17 +70,18 @@ params {
help = false
version = false
tracedir = "${params.outdir}/pipeline_info"
+ validate_params = true
}
// Load configuration files
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
-// Load nf-core institutional configs
+// Load nf-core custom profiles from https://github.com/nf-core/configs
try {
- includeConfig "${params.custom_config_base}/nfcore_custom.config"
+ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
} catch (Exception e) {
- System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+ System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}")
}
// Execution profiles
@@ -97,7 +95,6 @@ profiles {
conda.enabled = true
docker.enabled = false
singularity.enabled = false
- process.conda = "${projectDir}/../../environment.yml"
}
docker {
docker.enabled = true
@@ -153,15 +150,15 @@ profiles {
def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
- file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
+ file = "${params.tracedir}/timeline_${trace_timestamp}.html"
}
report {
enabled = true
- file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
+ file = "${params.tracedir}/report_${trace_timestamp}.html"
}
trace {
enabled = true
- file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
+ file = "${params.tracedir}/trace_${trace_timestamp}.txt"
}
dag {
enabled = true
@@ -184,7 +181,7 @@ process {
withName: 'WASP2_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_VARIANTS|WASP2_FIND_IMBALANCE' {
container = wasp2_container
}
- withName: 'BWA_MEM' {
+ withName: 'BWA_INDEX|BWA_MEM' {
container = bwa_samtools_container
}
withName: 'SAMTOOLS_INDEX|SAMTOOLS_FAIDX|SAMTOOLS_STATS|SAMTOOLS_FLAGSTAT|SAMTOOLS_IDXSTATS|SAMTOOLS_SORT' {
@@ -196,32 +193,33 @@ process {
process.shell = ['/bin/bash', '-euo', 'pipefail']
// Function to ensure resources don't exceed limits
+// Resource capping is handled by process.resourceLimits in conf/base.config.
+// This function is retained for backward compatibility with process label closures.
def check_max(obj, type) {
if (type == 'memory') {
try {
- if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
- return params.max_memory as nextflow.util.MemoryUnit
- else
- return obj
- } catch (all) {
- println "WARNING: Invalid max_memory '${params.max_memory}', using default"
+ def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid memory config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'time') {
try {
- if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
- return params.max_time as nextflow.util.Duration
- else
- return obj
- } catch (all) {
- println "WARNING: Invalid max_time '${params.max_time}', using default"
+ def max = (params.max_time as nextflow.util.Duration) ?: 240.h
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid time config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'cpus') {
- try {
- return Math.min(obj, params.max_cpus as int)
- } catch (all) {
- println "WARNING: Invalid max_cpus '${params.max_cpus}', using default"
+ try { return Math.min(obj, (params.max_cpus ?: 16) as int) }
+ catch (Exception e) {
+ log.warn "Invalid CPU config: ${e.message}. Using ${obj}"
return obj
}
}
diff --git a/pipelines/nf-atacseq/nextflow_schema.json b/pipelines/nf-atacseq/nextflow_schema.json
index d36e09b..57cfc51 100644
--- a/pipelines/nf-atacseq/nextflow_schema.json
+++ b/pipelines/nf-atacseq/nextflow_schema.json
@@ -226,32 +226,25 @@
}
}
},
- "max_job_request_options": {
- "title": "Max resource options",
+ "institutional_config_options": {
+ "title": "Institutional config options",
"type": "object",
- "fa_icon": "fas fa-server",
- "description": "Set the maximum resource limits for pipeline processes.",
+ "fa_icon": "fas fa-university",
+ "description": "Parameters used to describe centralised config profiles. These should not be edited.",
"properties": {
- "max_cpus": {
- "type": "integer",
- "default": 16,
- "minimum": 1,
- "description": "Maximum number of CPUs that can be requested for any single process.",
- "fa_icon": "fas fa-microchip"
- },
- "max_memory": {
+ "custom_config_base": {
"type": "string",
- "default": "128.GB",
- "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
- "description": "Maximum amount of memory that can be requested for any single process.",
- "fa_icon": "fas fa-memory"
+ "default": "https://raw.githubusercontent.com/nf-core/configs/master",
+ "description": "Base URL for loading nf-core custom config profiles.",
+ "hidden": true,
+ "fa_icon": "fas fa-users-cog"
},
- "max_time": {
+ "custom_config_version": {
"type": "string",
- "default": "240.h",
- "pattern": "^(\\d+\\.?\\s*(s|m|h|d)\\.?\\s*)+$",
- "description": "Maximum amount of time that can be requested for any single process.",
- "fa_icon": "fas fa-clock"
+ "default": "master",
+ "description": "Git tag/branch for nf-core custom config profiles.",
+ "hidden": true,
+ "fa_icon": "fas fa-users-cog"
}
}
},
@@ -277,9 +270,16 @@
},
"tracedir": {
"type": "string",
- "default": "${params.outdir}/pipeline_info",
+ "default": "./results/pipeline_info",
"description": "Directory to keep pipeline Nextflow trace, timeline, report, and DAG files.",
"fa_icon": "fas fa-folder"
+ },
+ "validate_params": {
+ "type": "boolean",
+ "default": true,
+ "description": "Boolean whether to validate parameters against the schema at runtime.",
+ "fa_icon": "fas fa-check-square",
+ "hidden": true
}
}
}
@@ -292,7 +292,7 @@
{ "$ref": "#/definitions/aligner_options" },
{ "$ref": "#/definitions/wasp2_options" },
{ "$ref": "#/definitions/processing_options" },
- { "$ref": "#/definitions/max_job_request_options" },
+ { "$ref": "#/definitions/institutional_config_options" },
{ "$ref": "#/definitions/generic_options" }
]
}
diff --git a/pipelines/nf-atacseq/nf-test.config b/pipelines/nf-atacseq/nf-test.config
index 32f4307..d1d396c 100644
--- a/pipelines/nf-atacseq/nf-test.config
+++ b/pipelines/nf-atacseq/nf-test.config
@@ -11,5 +11,6 @@ config {
copy "modules/**"
copy "subworkflows/**"
copy "workflows/**"
+ copy "tests/**"
}
}
diff --git a/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf b/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf
index 965537a..afcc968 100644
--- a/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf
@@ -3,7 +3,7 @@
//
include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main'
-include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/index/main'
+include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/build/main'
include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main'
workflow PREPARE_GENOME {
@@ -32,9 +32,8 @@ workflow PREPARE_GENOME {
ch_fasta_fai = Channel.fromPath(params.fasta_fai, checkIfExists: true)
.map { fai -> [[id: file(params.fasta).baseName], fai] }
} else {
- SAMTOOLS_FAIDX ( ch_fasta )
+ SAMTOOLS_FAIDX ( ch_fasta.map { meta, fasta -> [meta, fasta, []] }, false )
ch_fasta_fai = SAMTOOLS_FAIDX.out.fai
- ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
}
//
@@ -47,7 +46,6 @@ workflow PREPARE_GENOME {
} else {
BWA_INDEX ( ch_fasta )
ch_bwa_index = BWA_INDEX.out.index.map { meta, index -> index }
- ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
}
}
@@ -61,7 +59,6 @@ workflow PREPARE_GENOME {
} else {
BOWTIE2_BUILD ( ch_fasta )
ch_bowtie2_index = BOWTIE2_BUILD.out.index.map { meta, index -> index }
- ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
}
}
diff --git a/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf b/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf
index 298d74c..908d3f6 100644
--- a/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf
@@ -26,6 +26,10 @@ workflow WASP_MAPPING {
main:
ch_versions = Channel.empty()
+ // Wrap plain path channels with meta for nf-core modules
+ ch_index_meta = ch_aligner_index.map { index -> [[id: 'genome'], index] }
+ ch_fasta_meta = ch_fasta.map { fasta -> [[id: 'genome'], fasta] }
+
//
// MODULE: Generate reads with swapped alleles for remapping
//
@@ -53,29 +57,26 @@ workflow WASP_MAPPING {
if (aligner == 'bwa') {
BWA_MEM(
ch_remap_reads,
- ch_aligner_index,
- ch_fasta,
+ ch_index_meta,
+ ch_fasta_meta,
true // sort_bam
)
ch_remapped_raw = BWA_MEM.out.bam
- ch_versions = ch_versions.mix(BWA_MEM.out.versions.first())
} else {
BOWTIE2_ALIGN(
ch_remap_reads,
- ch_aligner_index,
- ch_fasta,
+ ch_index_meta,
+ ch_fasta_meta,
false, // save_unaligned
true // sort_bam
)
- ch_remapped_raw = BOWTIE2_ALIGN.out.aligned
- ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first())
+ ch_remapped_raw = BOWTIE2_ALIGN.out.bam
}
//
// MODULE: Index remapped BAM (aligners already sort when sort_bam=true)
//
SAMTOOLS_INDEX(ch_remapped_raw)
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
// Combine BAM with index
ch_remapped = ch_remapped_raw
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf
index 03e8241..c33064e 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf
@@ -1,49 +1,46 @@
//
-// Mark duplicates with Picard and run BAM stats
+// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats
//
include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
+include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main'
workflow BAM_MARKDUPLICATES_PICARD {
+
take:
- ch_bam // channel: [ val(meta), path(bam) ]
- ch_fasta // channel: path(fasta)
- ch_fai // channel: path(fasta_fai)
+ ch_reads // channel: [ val(meta), path(reads) ]
+ ch_fasta // channel: [ val(meta), path(fasta) ]
+ ch_fai // channel: [ val(meta), path(fai) ]
main:
- ch_versions = Channel.empty()
-
- //
- // Mark duplicates with Picard
- //
- PICARD_MARKDUPLICATES (
- ch_bam,
- ch_fasta,
- ch_fai
- )
- ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first())
-
- //
- // Join BAM and BAI for stats
- //
- ch_bam_bai = PICARD_MARKDUPLICATES.out.bam
- .join(PICARD_MARKDUPLICATES.out.bai, by: [0], failOnMismatch: true)
-
- //
- // Run BAM stats
- //
- BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta )
- ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions)
+ PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai )
- emit:
- bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ]
- bai = PICARD_MARKDUPLICATES.out.bai // channel: [ val(meta), path(bai) ]
- metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ]
+ ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram)
+
+ SAMTOOLS_INDEX ( ch_markdup )
- stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
- flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
- idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
+ ch_reads_index = ch_markdup
+ .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true)
+ .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true)
+ .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true)
+ .map{meta, reads, bai, crai, csi ->
+ if (bai) [ meta, reads, bai ]
+ else if (crai) [ meta, reads, crai ]
+ else [ meta, reads, csi ]
+ }
- versions = ch_versions // channel: path(versions.yml)
+ BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta )
+
+ emit:
+ bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ]
+ cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ]
+ metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ]
+ bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ]
+ crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ]
+ csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ]
+
+ stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
+ flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
+ idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml
index 1b08bb0..433d35b 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml
@@ -1,78 +1,71 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "bam_markduplicates_picard"
-description: Mark duplicates with Picard and collect BAM statistics
+description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats
keywords:
+ - markduplicates
- bam
- - duplicates
- - picard
- - dedup
- - qc
+ - sam
+ - cram
components:
- picard/markduplicates
+ - samtools/index
+ - samtools/stats
+ - samtools/idxstats
+ - samtools/flagstat
- bam_stats_samtools
input:
- - ch_bam:
- type: channel
+ - ch_reads:
description: |
- Channel containing BAM files to deduplicate
- Structure: [ val(meta), path(bam) ]
- pattern: "*.bam"
+ Sequence reads in BAM/CRAM/SAM format
+ Structure: [ val(meta), path(reads) ]
- ch_fasta:
- type: channel
description: |
- Channel containing reference FASTA
- Structure: path(fasta)
- pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
- - ch_fai:
- type: channel
+ Reference genome fasta file required for CRAM input
+ Structure: [ path(fasta) ]
+ - ch_fasta:
description: |
- Channel containing FASTA index
- Structure: path(fasta.fai)
- pattern: "*.fai"
+ Index of the reference genome fasta file
+ Structure: [ path(fai) ]
output:
- bam:
- type: channel
description: |
- Deduplicated BAM file
+ processed BAM/SAM file
Structure: [ val(meta), path(bam) ]
- pattern: "*.markdup.bam"
- bai:
- type: channel
description: |
- BAM index file
+ BAM/SAM samtools index
Structure: [ val(meta), path(bai) ]
- pattern: "*.bai"
- - metrics:
- type: channel
+ - cram:
+ description: |
+ processed CRAM file
+ Structure: [ val(meta), path(cram) ]
+ - crai:
+ description: |
+ CRAM samtools index
+ Structure: [ val(meta), path(crai) ]
+ - csi:
description: |
- Picard MarkDuplicates metrics
- Structure: [ val(meta), path(metrics) ]
- pattern: "*.metrics.txt"
+ CSI samtools index
+ Structure: [ val(meta), path(csi) ]
- stats:
- type: channel
description: |
- Samtools stats output
+ File containing samtools stats output
Structure: [ val(meta), path(stats) ]
- pattern: "*.stats"
- flagstat:
- type: channel
description: |
- Samtools flagstat output
+ File containing samtools flagstat output
Structure: [ val(meta), path(flagstat) ]
- pattern: "*.flagstat"
- idxstats:
- type: channel
description: |
- Samtools idxstats output
+ File containing samtools idxstats output
Structure: [ val(meta), path(idxstats) ]
- pattern: "*.idxstats"
- versions:
- type: channel
description: |
- Version information
+ Files containing software versions
Structure: [ path(versions.yml) ]
- pattern: "versions.yml"
authors:
- - "@jjaureguy760"
+ - "@dmarron"
+ - "@drpatelh"
maintainers:
- - "@jjaureguy760"
+ - "@dmarron"
+ - "@drpatelh"
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test
new file mode 100644
index 0000000..816ff3e
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test
@@ -0,0 +1,155 @@
+nextflow_workflow {
+
+ name "Test Workflow BAM_MARKDUPLICATES_PICARD"
+ script "../main.nf"
+ workflow "BAM_MARKDUPLICATES_PICARD"
+ config "./nextflow.config"
+
+ tag "picard"
+ tag "picard/markduplicates"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "bam_markduplicates_picard"
+ tag "subworkflows/bam_markduplicates_picard"
+ tag "subworkflows/bam_stats_samtools"
+ tag "bam_stats_samtools"
+ tag "samtools"
+ tag "samtools/flagstat"
+ tag "samtools/idxstats"
+ tag "samtools/index"
+ tag "samtools/stats"
+
+ test("sarscov2 - bam") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end: false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("97") },
+ { assert snapshot(
+ path(workflow.out.bam[0][1]),
+ path(workflow.out.bai[0][1]),
+ path(workflow.out.flagstat[0][1]),
+ path(workflow.out.idxstats[0][1]),
+ path(workflow.out.stats[0][1])
+ ).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens - cram") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("0.999986") },
+ { assert snapshot(
+ file(workflow.out.cram[0][1]).name,
+ path(workflow.out.crai[0][1]),
+ path(workflow.out.flagstat[0][1]),
+ path(workflow.out.idxstats[0][1]),
+ path(workflow.out.stats[0][1])
+ ).match() }
+ )
+ }
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end: false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens - cram - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[2] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap
new file mode 100644
index 0000000..bfa595e
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap
@@ -0,0 +1,292 @@
+{
+ "homo_sapiens - cram": {
+ "content": [
+ "test.md.cram",
+ "test.md.cram.crai:md5,b641c19be42d4841ec7155c686b70f39",
+ "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d",
+ "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15",
+ "test.stats:md5,8ec963e4ee888c8cc9d41348cedd5106"
+ ],
+ "timestamp": "2026-02-19T19:00:47.4418381",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "7": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "8": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "cram": [
+
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T19:00:56.802484512",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "homo_sapiens - cram - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+
+ ],
+ "4": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ {
+ "id": "test"
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "7": [
+ [
+ {
+ "id": "test"
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "8": [
+ [
+ {
+ "id": "test"
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bai": [
+
+ ],
+ "bam": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test"
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test"
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test"
+ },
+ "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test"
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-02-19T19:01:05.884074864",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam": {
+ "content": [
+ "test.md.bam:md5,8aa8fc57298588fed0b03aacddd7ea77",
+ "test.md.bam.bai:md5,8973dd987f3ac6c352716ef89139c567",
+ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783",
+ "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2",
+ "test.stats:md5,950c07a54b20e443105a5391400a4c92"
+ ],
+ "timestamp": "2026-02-19T19:00:36.539092187",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config
new file mode 100644
index 0000000..2427cc4
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'PICARD_MARKDUPLICATES' {
+ ext.prefix = { "${meta.id}.md" }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf
index 42fa4d6..312c2d2 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf
@@ -1,5 +1,5 @@
//
-// Sort, index BAM file and run samtools stats, flagstat
+// Sort, index BAM file and run samtools stats, flagstat and idxstats
//
include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main'
@@ -8,43 +8,35 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main'
workflow BAM_SORT_STATS_SAMTOOLS {
take:
- ch_bam // channel: [ val(meta), path(bam) ]
- ch_fasta // channel: path(fasta)
+ ch_bam // channel: [ val(meta), [ bam ] ]
+ ch_fasta // channel: [ val(meta), path(fasta) ]
main:
- ch_versions = Channel.empty()
+ SAMTOOLS_SORT ( ch_bam, ch_fasta, '' )
- //
- // Sort BAM file
- //
- SAMTOOLS_SORT ( ch_bam, ch_fasta )
- ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
-
- //
- // Index sorted BAM file
- //
SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
- //
- // Join BAM and BAI for stats
- //
- ch_bam_bai = SAMTOOLS_SORT.out.bam
- .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true)
+ SAMTOOLS_SORT.out.bam
+ .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true)
+ .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true)
+ .map {
+ meta, bam, bai, csi ->
+ if (bai) {
+ [ meta, bam, bai ]
+ } else {
+ [ meta, bam, csi ]
+ }
+ }
+ .set { ch_bam_bai }
- //
- // Run samtools stats and flagstat
- //
BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta )
- ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions)
emit:
- bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), path(bam) ]
- bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ]
-
- stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
- flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
- idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
+ bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ]
+ bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ]
+ csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ]
- versions = ch_versions // channel: path(versions.yml)
+ stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ]
+ flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ]
+ idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ]
}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml
index 08b172a..e01f9cc 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml
@@ -1,66 +1,70 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: "bam_sort_stats_samtools"
-description: Sort BAM files and collect statistics with samtools
+name: bam_sort_stats_samtools
+description: Sort SAM/BAM/CRAM file
keywords:
- - bam
- sort
- - statistics
- - samtools
+ - bam
+ - sam
+ - cram
components:
- samtools/sort
- samtools/index
+ - samtools/stats
+ - samtools/idxstats
+ - samtools/flagstat
- bam_stats_samtools
input:
- - ch_bam:
- type: channel
- description: |
- Channel containing unsorted BAM files
- Structure: [ val(meta), path(bam) ]
- pattern: "*.bam"
- - ch_fasta:
- type: channel
+ - meta:
+ type: map
description: |
- Channel containing reference FASTA for stats calculation
- Structure: path(fasta)
- pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
-output:
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
- bam:
- type: channel
+ type: file
+ description: BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
+ - fasta:
+ type: file
+ description: Reference genome fasta file
+ pattern: "*.{fasta,fa}"
+# TODO Update when we decide on a standard for subworkflow docs
+output:
+ - meta:
+ type: map
description: |
- Sorted BAM file
- Structure: [ val(meta), path(bam) ]
- pattern: "*.sorted.bam"
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bam:
+ type: file
+ description: Sorted BAM/CRAM/SAM file
+ pattern: "*.{bam,cram,sam}"
- bai:
- type: channel
- description: |
- BAM index file
- Structure: [ val(meta), path(bai) ]
- pattern: "*.bai"
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
+ - crai:
+ type: file
+ description: BAM/CRAM/SAM index file
+ pattern: "*.{bai,crai,sai}"
- stats:
- type: channel
- description: |
- Samtools stats output
- Structure: [ val(meta), path(stats) ]
- pattern: "*.stats"
+ type: file
+ description: File containing samtools stats output
+ pattern: "*.{stats}"
- flagstat:
- type: channel
- description: |
- Samtools flagstat output
- Structure: [ val(meta), path(flagstat) ]
- pattern: "*.flagstat"
+ type: file
+ description: File containing samtools flagstat output
+ pattern: "*.{flagstat}"
- idxstats:
- type: channel
- description: |
- Samtools idxstats output with per-chromosome counts
- Structure: [ val(meta), path(idxstats) ]
- pattern: "*.idxstats"
+ type: file
+ description: File containing samtools idxstats output
+ pattern: "*.{idxstats}"
- versions:
- type: channel
- description: |
- Version information
- Structure: path(versions.yml)
+ type: file
+ description: File containing software versions
pattern: "versions.yml"
authors:
- - "@jjaureguy760"
+ - "@drpatelh"
+ - "@ewels"
maintainers:
- - "@jjaureguy760"
+ - "@drpatelh"
+ - "@ewels"
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test
new file mode 100644
index 0000000..c584128
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test
@@ -0,0 +1,132 @@
+nextflow_workflow {
+
+ name "Test Workflow BAM_SORT_STATS_SAMTOOLS"
+ script "../main.nf"
+ workflow "BAM_SORT_STATS_SAMTOOLS"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/bam_sort_stats_samtools"
+ tag "bam_sort_stats_samtools"
+ tag "subworkflows/bam_stats_samtools"
+ tag "bam_stats_samtools"
+ tag "samtools"
+ tag "samtools/index"
+ tag "samtools/sort"
+ tag "samtools/stats"
+ tag "samtools/idxstats"
+ tag "samtools/flagstat"
+
+ test("test_bam_sort_stats_samtools_single_end") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"},
+ { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"},
+ { assert snapshot(
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.stats).match() }
+ )
+ }
+ }
+
+ test("test_bam_sort_stats_samtools_paired_end") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"},
+ { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"},
+ { assert snapshot(
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.stats).match() }
+ )
+ }
+ }
+
+ test("test_bam_sort_stats_samtools_single_end - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+
+ test("test_bam_sort_stats_samtools_paired_end - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap
new file mode 100644
index 0000000..f62d68c
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap
@@ -0,0 +1,288 @@
+{
+ "test_bam_sort_stats_samtools_single_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,1101fe711c4a389fdb5c4a1532107d1f"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:33:01.647190952"
+ },
+ "test_bam_sort_stats_samtools_paired_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,f26c554c244ee86c89d62ebed509fd95"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:33:08.706742267"
+ },
+ "test_bam_sort_stats_samtools_single_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:11:02.1412136"
+ },
+ "test_bam_sort_stats_samtools_paired_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:11:09.165267895"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf
index 7ee13e9..34e8fe1 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf
@@ -1,32 +1,25 @@
//
-// Run samtools stats, flagstat, and idxstats
+// Run SAMtools stats, flagstat and idxstats
//
include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main'
-include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main'
include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main'
+include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main'
workflow BAM_STATS_SAMTOOLS {
take:
- ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
- ch_fasta // channel: path(fasta)
+ ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ]
+ ch_fasta // channel: [ val(meta), path(fasta) ]
main:
- ch_versions = Channel.empty()
-
SAMTOOLS_STATS ( ch_bam_bai, ch_fasta )
- ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first())
SAMTOOLS_FLAGSTAT ( ch_bam_bai )
- ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first())
SAMTOOLS_IDXSTATS ( ch_bam_bai )
- ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first())
emit:
stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ]
flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ]
idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ]
-
- versions = ch_versions // channel: path(versions.yml)
}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml
index b1a9700..809bf73 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml
@@ -1,54 +1,43 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: "bam_stats_samtools"
-description: Run samtools stats, flagstat, and idxstats on BAM files
+name: bam_stats_samtools
+description: Produces comprehensive statistics from SAM/BAM/CRAM file
keywords:
- - bam
- statistics
- - qc
- - samtools
+ - counts
+ - bam
+ - sam
+ - cram
components:
- samtools/stats
- - samtools/flagstat
- samtools/idxstats
+ - samtools/flagstat
input:
- ch_bam_bai:
- type: channel
description: |
- Channel containing BAM and BAI files
+ The input channel containing the BAM/CRAM and it's index
Structure: [ val(meta), path(bam), path(bai) ]
- pattern: "*.{bam,bai}"
- ch_fasta:
- type: channel
description: |
- Channel containing reference FASTA for stats calculation
- Structure: path(fasta)
- pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+ Reference genome fasta file
+ Structure: [ path(fasta) ]
output:
- stats:
- type: channel
description: |
- Samtools stats output with alignment metrics
+ File containing samtools stats output
Structure: [ val(meta), path(stats) ]
- pattern: "*.stats"
- flagstat:
- type: channel
description: |
- Samtools flagstat output with flag counts
+ File containing samtools flagstat output
Structure: [ val(meta), path(flagstat) ]
- pattern: "*.flagstat"
- idxstats:
- type: channel
description: |
- Samtools idxstats output with per-chromosome counts
- Structure: [ val(meta), path(idxstats) ]
- pattern: "*.idxstats"
+ File containing samtools idxstats output
+ Structure: [ val(meta), path(idxstats)]
- versions:
- type: channel
description: |
- Version information
- Structure: path(versions.yml)
- pattern: "versions.yml"
+ Files containing software versions
+ Structure: [ path(versions.yml) ]
authors:
- - "@jjaureguy760"
+ - "@drpatelh"
maintainers:
- - "@jjaureguy760"
+ - "@drpatelh"
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test
new file mode 100644
index 0000000..2f32969
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test
@@ -0,0 +1,185 @@
+nextflow_workflow {
+
+ name "Test Workflow BAM_STATS_SAMTOOLS"
+ script "../main.nf"
+ workflow "BAM_STATS_SAMTOOLS"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "bam_stats_samtools"
+ tag "subworkflows/bam_stats_samtools"
+ tag "samtools"
+ tag "samtools/flagstat"
+ tag "samtools/idxstats"
+ tag "samtools/stats"
+
+ test("test_bam_stats_samtools_single_end") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.stats).match() }
+ )
+ }
+ }
+
+ test("test_bam_stats_samtools_paired_end") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.stats).match() }
+ )
+ }
+ }
+
+ test("test_bam_stats_samtools_paired_end_cram") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.stats).match() }
+ )
+ }
+ }
+
+ test ("test_bam_stats_samtools_single_end - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+
+ test("test_bam_stats_samtools_paired_end - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+
+ test("test_bam_stats_samtools_paired_end_cram - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'genome' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap
new file mode 100644
index 0000000..9c8ff1b
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap
@@ -0,0 +1,305 @@
+{
+ "test_bam_stats_samtools_paired_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:10:30.076183827"
+ },
+ "test_bam_stats_samtools_single_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:10:24.379362883"
+ },
+ "test_bam_stats_samtools_paired_end_cram - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:10:35.91658956"
+ },
+ "test_bam_stats_samtools_single_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,7a05a22bdb17e8df6e8c2d100ff09a31"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:32:20.243663217"
+ },
+ "test_bam_stats_samtools_paired_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.stats:md5,a391612b5ef5b181e854ccaad8c8a068"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:32:26.434187887"
+ },
+ "test_bam_stats_samtools_paired_end_cram": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.stats:md5,2b0e31ab01b867a6ff312023ae03838d"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T11:32:32.441454186"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf
index 4a42b76..8cbc514 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf
@@ -3,56 +3,41 @@
//
include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main'
-include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
-include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main'
+include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main'
workflow FASTQ_ALIGN_BOWTIE2 {
take:
- ch_reads // channel: [ val(meta), path(reads) ]
- ch_index // channel: path(index)
- ch_fasta // channel: path(fasta)
+ ch_reads // channel: [ val(meta), [ reads ] ]
+ ch_index // channel: /path/to/bowtie2/index/
+ save_unaligned // val
+ sort_bam // val
+ ch_fasta // channel: /path/to/reference.fasta
main:
- ch_versions = Channel.empty()
- //
- // Align reads with Bowtie2 (outputs sorted BAM)
- //
- BOWTIE2_ALIGN (
- ch_reads,
- ch_index,
- ch_fasta,
- false, // save_unaligned
- true // sort_bam
- )
- ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first())
+ ch_versions = channel.empty()
//
- // Index BAM file
+ // Map reads with Bowtie2
//
- SAMTOOLS_INDEX ( BOWTIE2_ALIGN.out.aligned )
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+ BOWTIE2_ALIGN ( ch_reads, ch_index, ch_fasta, save_unaligned, sort_bam )
//
- // Join BAM and BAI
+ // Sort, index BAM file and run samtools stats, flagstat and idxstats
//
- ch_bam_bai = BOWTIE2_ALIGN.out.aligned
- .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true)
-
- //
- // Run BAM stats
- //
- BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta )
- ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions)
+ BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta )
emit:
- bam = BOWTIE2_ALIGN.out.aligned // channel: [ val(meta), path(bam) ]
- bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ]
- log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), path(log) ]
-
- stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
- flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
- idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
-
- versions = ch_versions // channel: path(versions.yml)
+ bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), aligned ]
+ log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ]
+ fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ]
+
+ bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ]
+ bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ]
+ csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ]
+ stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ]
+ flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ]
+ idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ]
+
+ versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml
index 4434311..b18e405 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml
@@ -1,79 +1,67 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: "fastq_align_bowtie2"
-description: Align reads with Bowtie2 and collect BAM statistics
+name: fastq_align_bowtie2
+description: Align reads to a reference genome using bowtie2 then sort with samtools
keywords:
- - alignment
- - bowtie2
- - bam
- - map
- - fastq
+ - align
+ - fasta
+ - genome
+ - reference
components:
- bowtie2/align
+ - samtools/sort
- samtools/index
- - bam_stats_samtools
+ - samtools/stats
+ - samtools/idxstats
+ - samtools/flagstat
+ - bam_sort_stats_samtools
input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
- ch_reads:
- type: channel
+ type: file
description: |
- Channel containing FASTQ reads
- Structure: [ val(meta), path(reads) ]
- pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
- ch_index:
- type: channel
+ type: file
+ description: Bowtie2 genome index files
+ pattern: "*.ebwt"
+ - save_unaligned:
+ type: boolean
description: |
- Channel containing Bowtie2 index files
- Structure: path(index)
- pattern: "*.bt2"
- - ch_fasta:
- type: channel
+ Save reads that do not map to the reference (true) or discard them (false)
+ (default: false)
+ - sort_bam:
+ type: boolean
description: |
- Channel containing reference FASTA
- Structure: path(fasta)
- pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+ Use samtools sort (true) or samtools view (false)
+ default: false
+ - ch_fasta:
+ type: file
+ description: Reference fasta file
+ pattern: "*.{fasta,fa}"
+# TODO Update when we decide on a standard for subworkflow docs
output:
- bam:
- type: channel
- description: |
- Aligned BAM file
- Structure: [ val(meta), path(bam) ]
- pattern: "*.bam"
- - bai:
- type: channel
- description: |
- BAM index file
- Structure: [ val(meta), path(bai) ]
- pattern: "*.bai"
- - log_out:
- type: channel
- description: |
- Bowtie2 alignment log
- Structure: [ val(meta), path(log) ]
- pattern: "*.log"
- - stats:
- type: channel
- description: |
- Samtools stats output
- Structure: [ val(meta), path(stats) ]
- pattern: "*.stats"
- - flagstat:
- type: channel
- description: |
- Samtools flagstat output
- Structure: [ val(meta), path(flagstat) ]
- pattern: "*.flagstat"
- - idxstats:
- type: channel
- description: |
- Samtools idxstats output
- Structure: [ val(meta), path(idxstats) ]
- pattern: "*.idxstats"
+ type: file
+ description: Output BAM file containing read alignments
+ pattern: "*.{bam}"
- versions:
- type: channel
- description: |
- Version information
- Structure: path(versions.yml)
+ type: file
+ description: File containing software versions
pattern: "versions.yml"
+ - fastq:
+ type: file
+ description: Unaligned FastQ files
+ pattern: "*.fastq.gz"
+ - log:
+ type: file
+ description: Alignment log
+ pattern: "*.log"
authors:
- - "@jjaureguy760"
+ - "@drpatelh"
maintainers:
- - "@jjaureguy760"
+ - "@drpatelh"
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test
new file mode 100644
index 0000000..6eca398
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test
@@ -0,0 +1,189 @@
+nextflow_workflow {
+
+ name "Test Subworkflow FASTQ_ALIGN_BOWTIE2"
+ script "../main.nf"
+ config "./nextflow.config"
+ workflow "FASTQ_ALIGN_BOWTIE2"
+
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_align_bowtie2"
+ tag "subworkflows/bam_sort_stats_samtools"
+ tag "bowtie2"
+ tag "bowtie2/build"
+ tag "bowtie2/align"
+
+ test("test_align_bowtie2_single_end") {
+ setup {
+ run("BOWTIE2_BUILD") {
+ script "../../../../modules/nf-core/bowtie2/build/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]])
+ input[1] = BOWTIE2_BUILD.out.index
+ input[2] = false
+ input[3] = false
+ input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ file(workflow.out.bam_orig[0][1]).name,
+ workflow.out.fastq,
+ workflow.out.log_out,
+ file(workflow.out.bam[0][1]).name,
+ file(workflow.out.bai[0][1]).name,
+ workflow.out.csi,
+ workflow.out.stats,
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.versions
+ ).match()}
+ )
+ }
+ }
+
+ test("test_align_bowtie2_paired_end") {
+ setup {
+ run("BOWTIE2_BUILD") {
+ script "../../../../modules/nf-core/bowtie2/build/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]])
+ input[1] = BOWTIE2_BUILD.out.index
+ input[2] = false
+ input[3] = false
+ input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ file(workflow.out.bam_orig[0][1]).name,
+ workflow.out.fastq,
+ workflow.out.log_out,
+ file(workflow.out.bam[0][1]).name,
+ file(workflow.out.bai[0][1]).name,
+ workflow.out.csi,
+ workflow.out.stats,
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.versions
+ ).match()}
+ )
+ }
+ }
+
+ test("test_align_bowtie2_single_end - stub") {
+
+ options "-stub"
+
+ setup {
+ run("BOWTIE2_BUILD") {
+ script "../../../../modules/nf-core/bowtie2/build/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]])
+ input[1] = BOWTIE2_BUILD.out.index
+ input[2] = false
+ input[3] = false
+ input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ file(workflow.out.bam_orig[0][1]).name,
+ workflow.out.fastq,
+ workflow.out.log_out,
+ file(workflow.out.bam[0][1]).name,
+ file(workflow.out.bai[0][1]).name,
+ workflow.out.csi,
+ workflow.out.stats,
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.versions
+ ).match()}
+ )
+ }
+ }
+
+ test("test_align_bowtie2_paired_end - stub") {
+
+ options "-stub"
+
+ setup {
+ run("BOWTIE2_BUILD") {
+ script "../../../../modules/nf-core/bowtie2/build/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]])
+ input[1] = BOWTIE2_BUILD.out.index
+ input[2] = false
+ input[3] = false
+ input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ file(workflow.out.bam_orig[0][1]).name,
+ workflow.out.fastq,
+ workflow.out.log_out,
+ file(workflow.out.bam[0][1]).name,
+ file(workflow.out.bai[0][1]).name,
+ workflow.out.csi,
+ workflow.out.stats,
+ workflow.out.flagstat,
+ workflow.out.idxstats,
+ workflow.out.versions
+ ).match()}
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap
new file mode 100644
index 0000000..2dc8896
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap
@@ -0,0 +1,230 @@
+{
+ "test_align_bowtie2_single_end - stub": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bowtie2.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "test.sorted.bam",
+ "test.sorted.bam.bai",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T15:14:25.504699933"
+ },
+ "test_align_bowtie2_single_end": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bowtie2.log:md5,7b8a9e61b7646da1089b041333c41a87"
+ ]
+ ],
+ "test.sorted.bam",
+ "test.sorted.bam.bai",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.stats:md5,48b911852e91d77db59154f7355ede4f"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.flagstat:md5,e9ce9093133116bc54fd335cfe698372"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.idxstats:md5,e16eb632f7f462514b0873c7ac8ac905"
+ ]
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T15:14:08.108143527"
+ },
+ "test_align_bowtie2_paired_end": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19"
+ ]
+ ],
+ "test.sorted.bam",
+ "test.sorted.bam.bai",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.stats:md5,cb422b3fcd4327488cb6bc5ac15a48ff"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.flagstat:md5,49f3d51a8804ce58fe9cecd2549d279b"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.idxstats:md5,29ff2fa56d35b2a47625b8f517f1a947"
+ ]
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T15:14:17.07821488"
+ },
+ "test_align_bowtie2_paired_end - stub": {
+ "content": [
+ "test.bam",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bowtie2.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "test.sorted.bam",
+ "test.sorted.bam.bai",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.flagstat:md5,67394650dbae96d1a4fcc70484822159"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.3",
+ "nextflow": "25.10.3"
+ },
+ "timestamp": "2026-02-03T15:14:34.088967148"
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config
new file mode 100644
index 0000000..9086ebf
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+ withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' {
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+ withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf
index 2524b46..e06a5fa 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf
@@ -1,56 +1,41 @@
//
-// Alignment with BWA-MEM
+// Alignment with BWA
//
include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main'
-include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main'
-include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main'
+include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main'
workflow FASTQ_ALIGN_BWA {
take:
- ch_reads // channel: [ val(meta), path(reads) ]
- ch_index // channel: path(index)
- ch_fasta // channel: path(fasta)
+ ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ]
+ ch_index // channel (mandatory): [ val(meta2), path(index) ]
+ val_sort_bam // boolean (mandatory): true or false
+ ch_fasta // channel (optional) : [ val(meta3), path(fasta) ]
main:
- ch_versions = Channel.empty()
+ ch_versions = channel.empty()
//
- // Align reads with BWA-MEM (outputs sorted BAM)
+ // Map reads with BWA
//
- BWA_MEM (
- ch_reads,
- ch_index,
- ch_fasta,
- true // sort_bam
- )
- ch_versions = ch_versions.mix(BWA_MEM.out.versions.first())
- //
- // Index BAM file
- //
- SAMTOOLS_INDEX ( BWA_MEM.out.bam )
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+ BWA_MEM ( ch_reads, ch_index, ch_fasta, val_sort_bam )
//
- // Join BAM and BAI
+ // Sort, index BAM file and run samtools stats, flagstat and idxstats
//
- ch_bam_bai = BWA_MEM.out.bam
- .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true)
- //
- // Run BAM stats
- //
- BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta )
- ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions)
+ BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta )
emit:
- bam = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ]
- bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ]
+ bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ]
- stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
- flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
- idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
+ bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ]
+ bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ]
+ csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ]
+ stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
+ flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
+ idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]
- versions = ch_versions // channel: path(versions.yml)
+ versions = ch_versions // channel: [ path(versions.yml) ]
}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml
index 31ebdc4..fa21840 100644
--- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml
@@ -1,73 +1,73 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: "fastq_align_bwa"
-description: Align reads with BWA-MEM and collect BAM statistics
+name: fastq_align_bwa
+description: Align reads to a reference genome using bwa then sort with samtools
keywords:
- - alignment
- - bwa
- - bam
- - map
- - fastq
+ - align
+ - fasta
+ - genome
+ - reference
components:
- bwa/mem
+ - bwa/align
+ - samtools/sort
- samtools/index
- - bam_stats_samtools
+ - samtools/stats
+ - samtools/idxstats
+ - samtools/flagstat
+ - bam_sort_stats_samtools
input:
- ch_reads:
- type: channel
description: |
- Channel containing FASTQ reads
- Structure: [ val(meta), path(reads) ]
- pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ Structure: [ val(meta), [ path(reads) ] ]
- ch_index:
- type: channel
description: |
- Channel containing BWA index files
- Structure: path(index)
- pattern: "*.{amb,ann,bwt,pac,sa}"
+ BWA genome index files
+ Structure: [ val(meta), path(index) ]
+ - val_sort_bam:
+ type: boolean
+ description: If true bwa modules sort resulting bam files
+ pattern: "true|false"
- ch_fasta:
- type: channel
+ type: file
description: |
- Channel containing reference FASTA
- Structure: path(fasta)
- pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+ Optional reference fasta file. This only needs to be given if val_sort_bam = true.
+ Structure: [ val(meta), path(fasta) ]
output:
+ - bam_orig:
+ description: |
+ BAM file produced by bwa
+ Structure: [ val(meta), path(bam) ]
- bam:
- type: channel
description: |
- Aligned BAM file
+ BAM file ordered by samtools
Structure: [ val(meta), path(bam) ]
- pattern: "*.bam"
- bai:
- type: channel
description: |
- BAM index file
+ BAI index of the ordered BAM file
Structure: [ val(meta), path(bai) ]
- pattern: "*.bai"
+ - csi:
+ description: |
+ CSI index of the ordered BAM file
+ Structure: [ val(meta), path(csi) ]
- stats:
- type: channel
description: |
- Samtools stats output
+ File containing samtools stats output
Structure: [ val(meta), path(stats) ]
- pattern: "*.stats"
- flagstat:
- type: channel
description: |
- Samtools flagstat output
+ File containing samtools flagstat output
Structure: [ val(meta), path(flagstat) ]
- pattern: "*.flagstat"
- idxstats:
- type: channel
description: |
- Samtools idxstats output
+ File containing samtools idxstats output
Structure: [ val(meta), path(idxstats) ]
- pattern: "*.idxstats"
- versions:
- type: channel
description: |
- Version information
- Structure: path(versions.yml)
- pattern: "versions.yml"
+ Files containing software versions
+ Structure: [ path(versions.yml) ]
authors:
- - "@jjaureguy760"
+ - "@JoseEspinosa"
maintainers:
- - "@jjaureguy760"
+ - "@JoseEspinosa"
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test
new file mode 100644
index 0000000..7262325
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test
@@ -0,0 +1,77 @@
+nextflow_workflow {
+
+ name "Test Subworkflow FASTQ_ALIGN_BWA"
+ script "../main.nf"
+ config "./nextflow.config"
+ workflow "FASTQ_ALIGN_BWA"
+
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fastq_align_bwa"
+ tag "subworkflows/bam_sort_stats_samtools"
+ tag "bwa"
+ tag "bwa/mem"
+ tag "bwa/index"
+
+
+ test("fastq_align_bwa_single_end") {
+ setup {
+ run("BWA_INDEX") {
+ script "../../../../modules/nf-core/bwa/index/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:true ],[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]])
+ input[1] = BWA_INDEX.out.index
+ input[2] = false
+ input[3] = Channel.value([[id: 'genome'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match()}
+ )
+ }
+ }
+
+ test("fastq_align_bwa_paired_end") {
+ setup {
+ run("BWA_INDEX") {
+ script "../../../../modules/nf-core/bwa/index/main.nf"
+ process {
+ """
+ input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+ }
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ] )
+ input[1] = BWA_INDEX.out.index
+ input[2] = false
+ input[3] = Channel.value([[id: 'genome'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match()}
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap
new file mode 100644
index 0000000..9a16da2
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap
@@ -0,0 +1,264 @@
+{
+ "fastq_align_bwa_paired_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,5dbdcfdba65fac634dcbb6984cffe2c4"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,ba4b90f87517a16a6ae6142f37a75d79"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.bai:md5,4c5e6fa0e71327b79034eebd652f2121"
+ ]
+ ],
+ "3": [
+
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.stats:md5,75934f2a51780a80d2ab4674301a018d"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159"
+ ]
+ ],
+ "6": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602"
+ ]
+ ],
+ "7": [
+
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.bai:md5,4c5e6fa0e71327b79034eebd652f2121"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,ba4b90f87517a16a6ae6142f37a75d79"
+ ]
+ ],
+ "bam_orig": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,5dbdcfdba65fac634dcbb6984cffe2c4"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.stats:md5,75934f2a51780a80d2ab4674301a018d"
+ ]
+ ],
+ "versions": [
+
+ ]
+ }
+ ],
+ "timestamp": "2026-02-18T12:47:43.306112",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "fastq_align_bwa_single_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,f7af092ddd5203f647ba96b926392c3e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam:md5,c406a43adde2d9673e71d8a8c7db7cfd"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.bai:md5,f79a40341ecfaae11d8621b138d4c2ea"
+ ]
+ ],
+ "3": [
+
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.stats:md5,0883b19c92a783883b3e11d5bfcc5d6a"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+ ]
+ ],
+ "6": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+ ]
+ ],
+ "7": [
+
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.bai:md5,f79a40341ecfaae11d8621b138d4c2ea"
+ ]
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam:md5,c406a43adde2d9673e71d8a8c7db7cfd"
+ ]
+ ],
+ "bam_orig": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,f7af092ddd5203f647ba96b926392c3e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "flagstat": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.flagstat:md5,2191911d72575a2358b08b1df64ccb53"
+ ]
+ ],
+ "idxstats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.idxstats:md5,613e048487662c694aa4a2f73ca96a20"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.sorted.bam.stats:md5,0883b19c92a783883b3e11d5bfcc5d6a"
+ ]
+ ],
+ "versions": [
+
+ ]
+ }
+ ],
+ "timestamp": "2026-02-18T12:47:30.203617",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config
new file mode 100644
index 0000000..2f85e80
--- /dev/null
+++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+ withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' {
+ ext.prefix = { "${meta.id}.sorted" }
+ }
+ withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.sorted.bam" }
+ }
+}
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa
index 923c055..182b3f7 100644
--- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa
+++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa
@@ -1,331 +1,335 @@
>chr_test
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
-TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG
+GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC
+GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG
+TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG
+GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG
+TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT
+CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG
+TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG
+TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG
+CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA
+TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG
+GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC
+ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG
+AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC
+ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT
+GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG
+AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT
+CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA
+AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT
+ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA
+AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT
+ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT
+TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG
+CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG
+TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT
+AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT
+TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA
+CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG
+TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT
+CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA
+TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT
+CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG
+CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG
+TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA
+TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA
+CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA
+GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC
+TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA
+TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC
+TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT
+ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT
+TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG
+TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT
+TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA
+TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT
+ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC
+GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA
+ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA
+CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC
+CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG
+AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA
+GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC
+TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT
+AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA
+TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG
+TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG
+TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT
+TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA
+CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT
+ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT
+CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG
+CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA
+GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT
+AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT
+AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC
+ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA
+GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA
+GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC
+TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT
+CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA
+CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC
+AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC
+AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT
+TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT
+CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG
+AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT
+CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT
+AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT
+TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC
+CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC
+CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG
+TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA
+CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG
+ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA
+GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT
+GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT
+ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC
+CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA
+CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT
+GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC
+TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA
+TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG
+AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG
+TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT
+CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG
+CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT
+AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA
+GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA
+TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC
+AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA
+ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA
+AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT
+GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA
+CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC
+TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG
+TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA
+ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT
+AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT
+GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA
+CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC
+GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG
+TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT
+TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT
+AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT
+AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG
+GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT
+TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT
+GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG
+ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC
+GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC
+TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC
+CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA
+GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA
+GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT
+ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA
+AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC
+TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG
+TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA
+TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT
+ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT
+ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG
+GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA
+GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC
+TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA
+GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA
+ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC
+TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT
+AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG
+TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC
+TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT
+CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG
+TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT
+ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG
+AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT
+GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA
+GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG
+TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC
+ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT
+CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT
+TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA
+TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA
+TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG
+TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC
+ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG
+AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC
+AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG
+TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA
+TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT
+GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA
+CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC
+AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT
+TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC
+AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT
+CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC
+TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT
+GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT
+AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG
+GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG
+TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC
+CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA
+GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG
+TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA
+TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG
+CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT
+CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT
+CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT
+AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT
+AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA
+TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA
+CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC
+CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA
+GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG
+TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA
+AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA
+CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT
+ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA
+AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT
+ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC
+TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG
+AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG
+TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT
+TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG
+TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA
+GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA
+TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG
+GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC
+ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA
+AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT
+CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT
+AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT
+GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG
+GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT
+TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC
+ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT
+ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT
+GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA
+ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA
+CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC
+CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG
+GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC
+GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA
+TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC
+AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC
+TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT
+ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT
+CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC
+AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG
+ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG
+CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT
+GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG
+TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG
+GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC
+ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT
+TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT
+CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC
+CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC
+ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT
+CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA
+AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC
+TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA
+TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA
+TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC
+AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA
+CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT
+GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC
+AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA
+GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA
+ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT
+AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT
+ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT
+TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA
+GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT
+CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA
+TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT
+TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT
+GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA
+CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC
+AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA
+TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA
+CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG
+CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG
+CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC
+GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC
+ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA
+TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG
+GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG
+CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG
+AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT
+ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT
+CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT
+ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT
+AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC
+AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT
+TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA
+ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT
+GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA
+CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA
+GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC
+TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG
+CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC
+GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC
+ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT
+TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC
+TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT
+TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG
+TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT
+AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT
+GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA
+GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG
+GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT
+TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC
+CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT
+TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG
+GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT
+GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG
+TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG
+TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA
+GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG
+TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT
+GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA
+GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT
+GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC
+CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT
+TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC
+TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT
+TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC
+CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT
+AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT
+TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT
+CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA
+CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT
+CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA
+AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT
+TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG
+GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG
+ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA
+ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG
+ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT
+TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA
+CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA
+AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA
+CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC
+TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA
+TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT
+TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG
+TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT
+AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG
+GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA
+ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA
+AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC
+GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC
+CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT
+ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC
+CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC
+GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT
+TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC
+CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG
+GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG
+TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT
+CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC
+GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG
+TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT
+AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA
+GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC
+CACTGCGCTAACACTAACAA
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb
index 0719bfe..5d6da8b 100644
--- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb
+++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb
@@ -1 +1 @@
-19800 1 0
+20000 1 0
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann
index 01f4a1e..a633aab 100644
--- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann
+++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann
@@ -1,3 +1,3 @@
-19800 1 11
+20000 1 11
0 chr_test (null)
-0 19800 0
+0 20000 0
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt
index 7b2e7ab..9ed4852 100644
Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt differ
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac
index dd39245..d99d805 100644
Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac differ
diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa
index 76e12a6..b19e11c 100644
Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa differ
diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa
deleted file mode 120000
index 60a78a3..0000000
--- a/pipelines/nf-atacseq/tests/data/chr_test.fa
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tests/shared_data/chr_test.fa
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa
new file mode 100644
index 0000000..182b3f7
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/chr_test.fa
@@ -0,0 +1,335 @@
+>chr_test
+GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC
+GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG
+TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG
+GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG
+TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT
+CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG
+TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG
+TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG
+CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA
+TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG
+GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC
+ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG
+AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC
+ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT
+GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG
+AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT
+CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA
+AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT
+ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA
+AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT
+ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT
+TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG
+CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG
+TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT
+AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT
+TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA
+CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG
+TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT
+CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA
+TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT
+CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG
+CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG
+TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA
+TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA
+CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA
+GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC
+TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA
+TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC
+TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT
+ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT
+TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG
+TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT
+TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA
+TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT
+ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC
+GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA
+ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA
+CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC
+CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG
+AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA
+GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC
+TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT
+AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA
+TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG
+TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG
+TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT
+TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA
+CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT
+ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT
+CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG
+CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA
+GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT
+AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT
+AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC
+ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA
+GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA
+GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC
+TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT
+CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA
+CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC
+AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC
+AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT
+TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT
+CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG
+AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT
+CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT
+AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT
+TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC
+CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC
+CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG
+TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA
+CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG
+ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA
+GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT
+GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT
+ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC
+CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA
+CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT
+GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC
+TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA
+TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG
+AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG
+TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT
+CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG
+CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT
+AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA
+GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA
+TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC
+AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA
+ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA
+AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT
+GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA
+CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC
+TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG
+TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA
+ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT
+AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT
+GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA
+CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC
+GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG
+TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT
+TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT
+AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT
+AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG
+GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT
+TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT
+GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG
+ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC
+GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC
+TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC
+CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA
+GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA
+GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT
+ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA
+AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC
+TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG
+TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA
+TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT
+ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT
+ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG
+GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA
+GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC
+TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA
+GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA
+ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC
+TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT
+AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG
+TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC
+TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT
+CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG
+TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT
+ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG
+AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT
+GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA
+GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG
+TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC
+ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT
+CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT
+TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA
+TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA
+TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG
+TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC
+ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG
+AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC
+AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG
+TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA
+TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT
+GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA
+CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC
+AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT
+TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC
+AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT
+CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC
+TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT
+GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT
+AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG
+GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG
+TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC
+CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA
+GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG
+TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA
+TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG
+CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT
+CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT
+CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT
+AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT
+AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA
+TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA
+CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC
+CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA
+GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG
+TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA
+AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA
+CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT
+ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA
+AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT
+ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC
+TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG
+AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG
+TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT
+TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG
+TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA
+GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA
+TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG
+GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC
+ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA
+AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT
+CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT
+AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT
+GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG
+GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT
+TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC
+ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT
+ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT
+GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA
+ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA
+CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC
+CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG
+GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC
+GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA
+TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC
+AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC
+TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT
+ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT
+CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC
+AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG
+ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG
+CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT
+GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG
+TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG
+GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC
+ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT
+TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT
+CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC
+CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC
+ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT
+CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA
+AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC
+TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA
+TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA
+TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC
+AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA
+CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT
+GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC
+AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA
+GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA
+ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT
+AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT
+ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT
+TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA
+GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT
+CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA
+TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT
+TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT
+GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA
+CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC
+AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA
+TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA
+CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG
+CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG
+CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC
+GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC
+ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA
+TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG
+GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG
+CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG
+AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT
+ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT
+CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT
+ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT
+AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC
+AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT
+TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA
+ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT
+GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA
+CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA
+GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC
+TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG
+CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC
+GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC
+ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT
+TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC
+TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT
+TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG
+TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT
+AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT
+GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA
+GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG
+GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT
+TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC
+CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT
+TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG
+GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT
+GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG
+TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG
+TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA
+GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG
+TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT
+GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA
+GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT
+GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC
+CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT
+TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC
+TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT
+TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC
+CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT
+AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT
+TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT
+CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA
+CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT
+CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA
+AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT
+TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG
+GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG
+ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA
+ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG
+ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT
+TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA
+CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA
+AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA
+CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC
+TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA
+TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT
+TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG
+TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT
+AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG
+GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA
+ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA
+AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC
+GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC
+CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT
+ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC
+CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC
+GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT
+TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC
+CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG
+GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG
+TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT
+CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC
+GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG
+TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT
+AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA
+GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC
+CACTGCGCTAACACTAACAA
diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai
deleted file mode 120000
index 8158c3c..0000000
--- a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tests/shared_data/chr_test.fa.fai
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai
new file mode 100644
index 0000000..4e99d5b
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai
@@ -0,0 +1 @@
+chr_test 20000 10 60 61
diff --git a/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py
new file mode 100644
index 0000000..cb9d937
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+"""
+Generate a realistic ~20kb non-repetitive reference sequence for ATAC-seq testing.
+
+Properties:
+ - ~42% GC content (human-like)
+ - No homopolymer runs > 5bp
+ - High k-mer uniqueness (>99% unique 20-mers)
+ - Deterministic (seeded RNG)
+
+Output: chr_test.fa with contig name 'chr_test'
+"""
+
+import random
+import sys
+
+SEED = 42
+LENGTH = 20000
+CONTIG = "chr_test"
+LINE_WIDTH = 60
+MAX_HOMOPOLYMER = 5
+
+# Target base frequencies for ~42% GC
+# A=29%, T=29%, G=21%, C=21%
+BASES = "ATGC"
+WEIGHTS = [0.29, 0.29, 0.21, 0.21]
+
+
+def generate_sequence(length, seed=SEED):
+ """Generate a non-repetitive sequence with controlled GC content."""
+ rng = random.Random(seed)
+
+ seq = []
+ homopolymer_count = 0
+ last_base = None
+
+ for _ in range(length):
+ # Pick a base using weighted random
+ base = rng.choices(BASES, weights=WEIGHTS, k=1)[0]
+
+ # Prevent long homopolymers
+ if base == last_base:
+ homopolymer_count += 1
+ if homopolymer_count >= MAX_HOMOPOLYMER:
+ # Force a different base
+ alternatives = [b for b in BASES if b != base]
+ alt_weights = [WEIGHTS[BASES.index(b)] for b in alternatives]
+ total = sum(alt_weights)
+ alt_weights = [w / total for w in alt_weights]
+ base = rng.choices(alternatives, weights=alt_weights, k=1)[0]
+ homopolymer_count = 1
+ else:
+ homopolymer_count = 1
+
+ seq.append(base)
+ last_base = base
+
+ return "".join(seq)
+
+
+def validate_sequence(seq):
+ """Validate sequence properties."""
+ gc = sum(1 for b in seq if b in "GC") / len(seq)
+
+ # Check k-mer uniqueness
+ kmers_20 = set()
+ for i in range(len(seq) - 19):
+ kmers_20.add(seq[i : i + 20])
+ unique_20 = len(kmers_20)
+ total_20 = len(seq) - 19
+ uniqueness = unique_20 / total_20
+
+ # Check max homopolymer
+ max_hp = 1
+ current_hp = 1
+ for i in range(1, len(seq)):
+ if seq[i] == seq[i - 1]:
+ current_hp += 1
+ max_hp = max(max_hp, current_hp)
+ else:
+ current_hp = 1
+
+ return {
+ "length": len(seq),
+ "gc_content": gc,
+ "unique_20mers": unique_20,
+ "total_20mers": total_20,
+ "uniqueness_pct": uniqueness * 100,
+ "max_homopolymer": max_hp,
+ }
+
+
+def write_fasta(seq, contig, filepath, line_width=LINE_WIDTH):
+ """Write sequence as FASTA."""
+ with open(filepath, "w") as f:
+ f.write(f">{contig}\n")
+ for i in range(0, len(seq), line_width):
+ f.write(seq[i : i + line_width] + "\n")
+
+
+def main():
+ output = sys.argv[1] if len(sys.argv) > 1 else "chr_test.fa"
+
+ print(f"Generating {LENGTH}bp non-repetitive reference sequence...")
+ seq = generate_sequence(LENGTH)
+
+ stats = validate_sequence(seq)
+ print(f" Length: {stats['length']}bp")
+ print(f" GC content: {stats['gc_content']:.1%}")
+ print(f" Unique 20-mers: {stats['unique_20mers']}/{stats['total_20mers']} ({stats['uniqueness_pct']:.1f}%)")
+ print(f" Max homopolymer: {stats['max_homopolymer']}bp")
+
+ # Validate
+ assert stats["gc_content"] > 0.38 and stats["gc_content"] < 0.46, f"GC content out of range: {stats['gc_content']}"
+ assert stats["uniqueness_pct"] > 99.0, f"Uniqueness too low: {stats['uniqueness_pct']}"
+ assert stats["max_homopolymer"] <= MAX_HOMOPOLYMER, f"Homopolymer too long: {stats['max_homopolymer']}"
+
+ write_fasta(seq, CONTIG, output)
+ print(f" Wrote {output}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/pipelines/nf-atacseq/tests/data/generate_test_data.sh b/pipelines/nf-atacseq/tests/data/generate_test_data.sh
index f5cb288..9cb2eb4 100755
--- a/pipelines/nf-atacseq/tests/data/generate_test_data.sh
+++ b/pipelines/nf-atacseq/tests/data/generate_test_data.sh
@@ -1,11 +1,18 @@
#!/bin/bash
# =============================================================================
-# WASP2 nf-atacseq Test Data Generator
+# WASP2 nf-atacseq Test Data Generator (v2 — realistic reference)
# =============================================================================
-# Creates ATAC-seq-like test data by symlinking shared core data and generating
-# pipeline-specific files (shorter fragment FASTQs, BWA index, samplesheet).
+# Generates self-contained ATAC-seq test data with a non-repetitive reference
+# so BWA alignment produces meaningful mapping rates (>80%).
#
-# Prerequisites: samtools, bgzip, tabix, wgsim, bwa (WASP2_dev2 conda env)
+# Previous version used the shared chr_test.fa which is a repetitive ATGC
+# pattern yielding ~0% mapping. This version generates its own reference.
+#
+# To produce non-zero allele counts, reads are simulated from BOTH haplotypes:
+# half from the REF haplotype, half from an ALT haplotype with het SNPs applied.
+#
+# Prerequisites: python3, samtools, bgzip, tabix, wgsim, bwa
+# (all available in WASP2_dev2 conda env or WASP2 micromamba env)
#
# Usage:
# cd pipelines/nf-atacseq/tests/data
@@ -17,106 +24,307 @@ set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
-SHARED_DATA="../../../../tests/shared_data"
+# BWA may not be in PATH; check common conda/micromamba locations
+if ! command -v bwa &>/dev/null; then
+ for candidate in \
+ /usr/local/Cellar/micromamba/*/envs/WASP2/bin/bwa \
+ /usr/local/Cellar/micromamba/*/envs/WASP2_dev2/bin/bwa \
+ "${HOME}/miniforge3/envs/WASP2/bin/bwa" \
+ "${HOME}/miniconda3/envs/WASP2/bin/bwa"; do
+ if [[ -x "$candidate" ]]; then
+ export PATH="$(dirname "$candidate"):$PATH"
+ break
+ fi
+ done
+fi
echo "==================================================================="
-echo " WASP2 nf-atacseq Test Data Generator"
+echo " WASP2 nf-atacseq Test Data Generator (v2)"
echo "==================================================================="
-# Validate shared core data exists
-if [[ ! -f "$SHARED_DATA/chr_test.fa" ]]; then
- echo "ERROR: Shared core data not found at $SHARED_DATA"
- echo " Run: cd tests/shared_data && bash generate_core_data.sh"
- exit 1
-fi
-
# -----------------------------------------------------------------------------
-# Symlink shared reference and variants
+# Check prerequisites
# -----------------------------------------------------------------------------
-echo "[1/4] Symlinking shared reference data..."
+echo "[0/7] Checking prerequisites..."
+
+check_tool() {
+ if ! command -v "$1" &>/dev/null; then
+ echo "ERROR: $1 is required but not found in PATH"
+ echo " Try: conda activate WASP2_dev2"
+ exit 1
+ fi
+ echo " OK: $1"
+}
+
+check_tool python3
+check_tool samtools
+check_tool bwa
+check_tool wgsim
+check_tool bgzip
+check_tool tabix
+echo ""
+# -----------------------------------------------------------------------------
+# Clean stale symlinks and old data (one-time migration from v1)
+# -----------------------------------------------------------------------------
+echo "[1/7] Cleaning stale data..."
for f in chr_test.fa chr_test.fa.fai variants.vcf.gz variants.vcf.gz.tbi annotation.gtf regions.bed; do
- if [[ ! -e "$f" ]]; then
- ln -sf "$SHARED_DATA/$f" "$f"
- echo " ✓ Linked $f"
- else
- echo " - $f already exists"
+ if [[ -L "$f" ]]; then
+ rm -f "$f"
+ echo " Removed symlink: $f"
fi
done
+rm -rf bwa_index
+rm -f sample1_R1.fq.gz sample1_R2.fq.gz
+rm -f chr_test.fa chr_test.fa.fai variants.vcf variants.vcf.gz variants.vcf.gz.tbi regions.bed
+echo " Cleaned previous outputs"
+echo ""
+
+# -----------------------------------------------------------------------------
+# Generate realistic non-repetitive reference
+# -----------------------------------------------------------------------------
+echo "[2/7] Generating realistic reference genome..."
+python3 "${SCRIPT_DIR}/generate_realistic_reference.py" chr_test.fa
+samtools faidx chr_test.fa
+echo " Created chr_test.fa + .fai"
+echo ""
+# -----------------------------------------------------------------------------
+# Generate VCF with ~30 het SNPs + ALT haplotype reference
+# -----------------------------------------------------------------------------
+echo "[3/7] Creating VCF with 30 het SNPs and ALT haplotype..."
+
+python3 - <<'PYEOF'
+import random
+
+# Read reference
+with open("chr_test.fa") as f:
+ lines = f.readlines()
+seq = "".join(l.strip() for l in lines[1:])
+
+# Deterministic SNP positions spread across the reference
+rng = random.Random(99)
+positions = sorted(rng.sample(range(200, 19800), 30))
+
+# Transition mapping for plausible variants
+transitions = {"A": "G", "G": "A", "T": "C", "C": "T"}
+
+# --- Write VCF ---
+vcf_lines = []
+vcf_lines.append("##fileformat=VCFv4.2")
+vcf_lines.append("##source=WASP2_nf_atacseq_test_data_v2")
+vcf_lines.append("##reference=chr_test.fa")
+vcf_lines.append("##contig=")
+vcf_lines.append('##INFO=')
+vcf_lines.append('##FORMAT=')
+vcf_lines.append('##FORMAT=')
+vcf_lines.append("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample1")
+
+snp_map = {} # pos -> (ref, alt)
+for i, pos in enumerate(positions):
+ ref = seq[pos - 1] # 1-based
+ alt = transitions[ref]
+ snp_id = f"snp{i+1:03d}"
+ vcf_lines.append(
+ f"chr_test\t{pos}\t{snp_id}\t{ref}\t{alt}\t100\tPASS\tDP=50\tGT:DP\t0|1:50"
+ )
+ snp_map[pos] = (ref, alt)
+
+with open("variants.vcf", "w") as f:
+ f.write("\n".join(vcf_lines) + "\n")
+
+print(f" Created variants.vcf with {len(positions)} het SNPs")
+
+# --- Write ALT haplotype reference ---
+alt_seq = list(seq)
+for pos, (ref, alt) in snp_map.items():
+ assert alt_seq[pos - 1] == ref, f"Mismatch at {pos}: expected {ref}, got {alt_seq[pos-1]}"
+ alt_seq[pos - 1] = alt
+
+with open("chr_test_alt.fa", "w") as f:
+ f.write(">chr_test\n")
+ alt_str = "".join(alt_seq)
+ for i in range(0, len(alt_str), 60):
+ f.write(alt_str[i:i+60] + "\n")
+
+print(f" Created chr_test_alt.fa (ALT haplotype with {len(snp_map)} substitutions)")
+PYEOF
+
+# Compress and index
+rm -f variants.vcf.gz variants.vcf.gz.tbi
+bgzip -c variants.vcf > variants.vcf.gz
+tabix -p vcf variants.vcf.gz
+echo " Created variants.vcf.gz + .tbi"
echo ""
# -----------------------------------------------------------------------------
-# Simulate ATAC-seq-like reads (shorter fragments, 150-250bp)
+# Create regions BED covering all SNP positions
# -----------------------------------------------------------------------------
-echo "[2/4] Simulating ATAC-seq reads..."
+echo "[4/7] Creating regions BED..."
+
+python3 - <<'PYEOF'
+import random
+
+rng = random.Random(99)
+positions = sorted(rng.sample(range(200, 19800), 30))
-NUM_READS=500
+# Create ~500bp regions centered on each SNP, merge overlapping
+regions = []
+for pos in positions:
+ start = max(0, pos - 250)
+ end = min(20000, pos + 250)
+ regions.append((start, end))
+
+# Merge overlapping regions
+merged = [regions[0]]
+for start, end in regions[1:]:
+ if start <= merged[-1][1]:
+ merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+ else:
+ merged.append((start, end))
+
+with open("regions.bed", "w") as f:
+ for i, (start, end) in enumerate(merged):
+ f.write(f"chr_test\t{start}\t{end}\tpeak_{i+1}\n")
+
+print(f" Created regions.bed with {len(merged)} peaks covering {len(positions)} SNPs")
+PYEOF
+echo ""
+
+# -----------------------------------------------------------------------------
+# Simulate ATAC-seq reads from BOTH haplotypes (REF + ALT)
+# -----------------------------------------------------------------------------
+echo "[5/7] Simulating ATAC-seq paired-end reads (dual haplotype)..."
+
+# 20kb genome, 75bp reads, ~20x total coverage
+# Split: ~1350 pairs from REF, ~1350 pairs from ALT
+NUM_READS_PER_HAP=1350
READ_LEN=75
FRAG_SIZE=180
FRAG_STD=30
ERROR_RATE=0.001
-SEED=100
-
-if [[ -f "sample1_R1.fq.gz" && -f "sample1_R2.fq.gz" ]]; then
- echo " FASTQs already exist, skipping"
-else
- wgsim -N $NUM_READS \
- -1 $READ_LEN \
- -2 $READ_LEN \
- -r 0 -R 0 -X 0 \
- -e $ERROR_RATE \
- -S $SEED \
- -d $FRAG_SIZE \
- -s $FRAG_STD \
- "$SHARED_DATA/chr_test.fa" \
- sample1_R1.fq \
- sample1_R2.fq \
- > /dev/null 2>&1
-
- gzip -f sample1_R1.fq
- gzip -f sample1_R2.fq
- echo " ✓ Created sample1_R{1,2}.fq.gz (${NUM_READS} pairs, ${READ_LEN}bp, ${FRAG_SIZE}bp frags)"
-fi
+# Simulate from REF haplotype
+wgsim -N $NUM_READS_PER_HAP \
+ -1 $READ_LEN \
+ -2 $READ_LEN \
+ -r 0 -R 0 -X 0 \
+ -e $ERROR_RATE \
+ -S 100 \
+ -d $FRAG_SIZE \
+ -s $FRAG_STD \
+ chr_test.fa \
+ ref_R1.fq \
+ ref_R2.fq \
+ > /dev/null 2>&1
+echo " Simulated ${NUM_READS_PER_HAP} pairs from REF haplotype"
+
+# Simulate from ALT haplotype
+wgsim -N $NUM_READS_PER_HAP \
+ -1 $READ_LEN \
+ -2 $READ_LEN \
+ -r 0 -R 0 -X 0 \
+ -e $ERROR_RATE \
+ -S 200 \
+ -d $FRAG_SIZE \
+ -s $FRAG_STD \
+ chr_test_alt.fa \
+ alt_R1.fq \
+ alt_R2.fq \
+ > /dev/null 2>&1
+echo " Simulated ${NUM_READS_PER_HAP} pairs from ALT haplotype"
+
+# Combine and compress
+cat ref_R1.fq alt_R1.fq | gzip -c > sample1_R1.fq.gz
+cat ref_R2.fq alt_R2.fq | gzip -c > sample1_R2.fq.gz
+echo " Combined into sample1_R{1,2}.fq.gz ($((NUM_READS_PER_HAP * 2)) total pairs)"
+
+# Clean up temporary files
+rm -f ref_R1.fq ref_R2.fq alt_R1.fq alt_R2.fq chr_test_alt.fa
echo ""
# -----------------------------------------------------------------------------
-# Build BWA index (for local testing)
+# Build BWA index
# -----------------------------------------------------------------------------
-echo "[3/4] Building BWA index..."
+echo "[6/7] Building BWA index..."
BWA_INDEX_DIR="bwa_index"
-if [[ -f "${BWA_INDEX_DIR}/chr_test.fa.bwt" ]]; then
- echo " BWA index already exists, skipping"
-else
- mkdir -p "$BWA_INDEX_DIR"
- cp "$SHARED_DATA/chr_test.fa" "$BWA_INDEX_DIR/"
- bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2
- echo " ✓ Created BWA index ($(du -sh $BWA_INDEX_DIR | cut -f1))"
-fi
-
+mkdir -p "$BWA_INDEX_DIR"
+cp chr_test.fa "$BWA_INDEX_DIR/"
+bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2
+echo " Created BWA index"
echo ""
# -----------------------------------------------------------------------------
-# Create test samplesheet
+# Create samplesheets (both test and local variants)
# -----------------------------------------------------------------------------
-echo "[4/4] Creating test samplesheet..."
+echo "[7/7] Creating samplesheets..."
-SAMPLESHEET="samplesheet_test.csv"
-if [[ -f "$SAMPLESHEET" ]]; then
- echo " $SAMPLESHEET already exists, skipping"
-else
- cat > "$SAMPLESHEET" << EOF
+# test samplesheet uses absolute paths
+cat > samplesheet_test.csv << EOF
sample,fastq_1,fastq_2,sample_name
-test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,SAMPLE1
+test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,sample1
EOF
- echo " ✓ Created $SAMPLESHEET"
-fi
+echo " Created samplesheet_test.csv"
+
+# local samplesheet uses ${projectDir} relative paths (for nextflow)
+cat > samplesheet_local.csv << 'EOF'
+sample,fastq_1,fastq_2,sample_name
+test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1
+EOF
+echo " Created samplesheet_local.csv"
+
+# -----------------------------------------------------------------------------
+# Quick validation
+# -----------------------------------------------------------------------------
+echo ""
+echo "==================================================================="
+echo " Validation"
+echo "==================================================================="
+
+# Check BWA alignment quality
+echo ""
+echo "--- Quick alignment test (first 100 pairs) ---"
+bwa mem -t 2 \
+ -R "@RG\tID:sample1\tSM:sample1\tPL:ILLUMINA\tLB:lib1" \
+ "$BWA_INDEX_DIR/chr_test.fa" \
+ <(gunzip -c sample1_R1.fq.gz | head -400) \
+ <(gunzip -c sample1_R2.fq.gz | head -400) \
+ 2>/dev/null \
+| samtools flagstat - 2>/dev/null
+
+echo ""
+
+# Check VCF REF alleles match reference
+echo "--- VCF REF allele validation ---"
+python3 - <<'PYEOF'
+seq_lines = open("chr_test.fa").readlines()
+seq = "".join(l.strip() for l in seq_lines[1:])
+
+errors = 0
+total = 0
+with open("variants.vcf") as f:
+ for line in f:
+ if line.startswith("#"):
+ continue
+ fields = line.strip().split("\t")
+ pos = int(fields[1])
+ ref = fields[3]
+ actual = seq[pos - 1]
+ total += 1
+ if ref != actual:
+ print(f" MISMATCH at pos {pos}: VCF REF={ref}, actual={actual}")
+ errors += 1
+
+if errors == 0:
+ print(f" All {total} REF alleles match reference")
+else:
+ print(f" {errors}/{total} mismatches found!")
+PYEOF
echo ""
echo "==================================================================="
-echo " SUCCESS! nf-atacseq test data generated."
+echo " SUCCESS! nf-atacseq test data generated (v2)."
echo "==================================================================="
echo "Total: $(du -sh . | cut -f1)"
echo ""
diff --git a/pipelines/nf-atacseq/tests/data/real_counts.tsv b/pipelines/nf-atacseq/tests/data/real_counts.tsv
new file mode 120000
index 0000000..08351b0
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/real_counts.tsv
@@ -0,0 +1 @@
+../../../../tests/shared_data/expected_counts.tsv
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/real_test.bam b/pipelines/nf-atacseq/tests/data/real_test.bam
new file mode 120000
index 0000000..21f7b54
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/real_test.bam
@@ -0,0 +1 @@
+../../../../tests/shared_data/sample1.bam
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/real_test.bam.bai b/pipelines/nf-atacseq/tests/data/real_test.bam.bai
new file mode 120000
index 0000000..0037730
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/real_test.bam.bai
@@ -0,0 +1 @@
+../../../../tests/shared_data/sample1.bam.bai
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/real_wasp_data.json b/pipelines/nf-atacseq/tests/data/real_wasp_data.json
new file mode 120000
index 0000000..bd05953
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/real_wasp_data.json
@@ -0,0 +1 @@
+../../../../tests/shared_data/wasp_data.json
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed
deleted file mode 120000
index da6c378..0000000
--- a/pipelines/nf-atacseq/tests/data/regions.bed
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tests/shared_data/regions.bed
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed
new file mode 100644
index 0000000..9b399eb
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/regions.bed
@@ -0,0 +1,15 @@
+chr_test 2668 3393 peak_1
+chr_test 4316 4816 peak_2
+chr_test 4939 5439 peak_3
+chr_test 5808 6315 peak_4
+chr_test 6486 7995 peak_5
+chr_test 8090 8679 peak_6
+chr_test 11052 11552 peak_7
+chr_test 12204 13167 peak_8
+chr_test 13187 13687 peak_9
+chr_test 13766 14266 peak_10
+chr_test 15071 15819 peak_11
+chr_test 15982 16482 peak_12
+chr_test 17274 18099 peak_13
+chr_test 18526 19026 peak_14
+chr_test 19230 19730 peak_15
diff --git a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz
index 2d8e601..88debd7 100644
Binary files a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz differ
diff --git a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz
index 76535bc..8767790 100644
Binary files a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz differ
diff --git a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv
index d50c362..cf1884a 100644
--- a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv
+++ b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv
@@ -1,2 +1,2 @@
sample,fastq_1,fastq_2,sample_name
-test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1
+test_sample1,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz,sample1
diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf b/pipelines/nf-atacseq/tests/data/variants.vcf
new file mode 100644
index 0000000..e3e67f2
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/data/variants.vcf
@@ -0,0 +1,38 @@
+##fileformat=VCFv4.2
+##source=WASP2_nf_atacseq_test_data_v2
+##reference=chr_test.fa
+##contig=
+##INFO=
+##FORMAT=
+##FORMAT=
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1
+chr_test 2918 snp001 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 3037 snp002 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 3077 snp003 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 3143 snp004 G A 100 PASS DP=50 GT:DP 0|1:50
+chr_test 4566 snp005 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 5189 snp006 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 6058 snp007 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 6065 snp008 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 6736 snp009 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 6756 snp010 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 7166 snp011 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 7300 snp012 G A 100 PASS DP=50 GT:DP 0|1:50
+chr_test 7745 snp013 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 8340 snp014 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 8429 snp015 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 11302 snp016 G A 100 PASS DP=50 GT:DP 0|1:50
+chr_test 12454 snp017 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 12676 snp018 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 12752 snp019 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 12917 snp020 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 13437 snp021 A G 100 PASS DP=50 GT:DP 0|1:50
+chr_test 14016 snp022 G A 100 PASS DP=50 GT:DP 0|1:50
+chr_test 15321 snp023 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 15569 snp024 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 16232 snp025 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 17524 snp026 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 17593 snp027 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 17849 snp028 T C 100 PASS DP=50 GT:DP 0|1:50
+chr_test 18776 snp029 C T 100 PASS DP=50 GT:DP 0|1:50
+chr_test 19480 snp030 T C 100 PASS DP=50 GT:DP 0|1:50
diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz
deleted file mode 120000
index 380b7aa..0000000
--- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tests/shared_data/variants.vcf.gz
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz
new file mode 100644
index 0000000..d7a7f83
Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz differ
diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi
deleted file mode 120000
index 7a95bbe..0000000
--- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tests/shared_data/variants.vcf.gz.tbi
\ No newline at end of file
diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi
new file mode 100644
index 0000000..98e77de
Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi differ
diff --git a/pipelines/nf-atacseq/tests/default.nf.test b/pipelines/nf-atacseq/tests/default.nf.test
new file mode 100644
index 0000000..bcba730
--- /dev/null
+++ b/pipelines/nf-atacseq/tests/default.nf.test
@@ -0,0 +1,28 @@
+nextflow_pipeline {
+
+ name "Test default pipeline run"
+ script "../main.nf"
+
+ tag "pipeline"
+ tag "default"
+
+ test("Should run default stub-run mode") {
+
+ options "-stub"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ path("${outputDir}/pipeline_info/versions.yml")
+ ).match() }
+ )
+ }
+ }
+}
diff --git a/pipelines/nf-atacseq/tests/main.nf.test b/pipelines/nf-atacseq/tests/main.nf.test
index 48607fd..d47aeb7 100644
--- a/pipelines/nf-atacseq/tests/main.nf.test
+++ b/pipelines/nf-atacseq/tests/main.nf.test
@@ -13,12 +13,17 @@ nextflow_pipeline {
when {
params {
- outdir = "$outputDir/results"
+ outdir = "$outputDir"
}
}
then {
- assert workflow.success
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ path("${outputDir}/pipeline_info/versions.yml")
+ ).match("stub_versions") }
+ )
}
}
diff --git a/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test b/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test
index 886c80a..e916334 100644
--- a/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test
+++ b/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test
@@ -7,8 +7,72 @@ nextflow_process {
tag "modules"
tag "modules_local"
tag "wasp2"
+ tag "wasp2_filter_remapped"
- test("Should filter remapped reads and produce WASP-corrected BAM - stub") {
+ // -------------------------------------------------------------------------
+ // Real test: requires WASP2 container with wasp2-map and samtools.
+ // Input files must be actual BAM/JSON from a prior make-reads + remap step.
+ // To generate these, run the pipeline test profile or use:
+ // cd tests/data && bash generate_test_data.sh
+ // then run make-reads + bwa remap manually on the outputs.
+ //
+ // Until real intermediate files are generated, this test is tagged
+ // 'wasp2_filter_remapped_real' so it can be selectively skipped.
+ // -------------------------------------------------------------------------
+
+ test("wasp2_filter_remapped - real") {
+
+ tag "wasp2_filter_remapped_real"
+
+ when {
+ process {
+ """
+ // Uses shared test data symlinked into tests/data/
+ // real_test.bam -> shared_data/sample1.bam (aligned reads)
+ // These serve as stand-ins; a full integration test requires
+ // actual make-reads output. The process will validate via
+ // wasp2-map filter-remapped + samtools index + samtools flagstat.
+ input[0] = [
+ [ id:'test_real', single_end:false ],
+ file("${projectDir}/tests/data/real_test.bam"),
+ file("${projectDir}/tests/data/real_test.bam.bai"),
+ file("${projectDir}/tests/data/real_test.bam"),
+ file("${projectDir}/tests/data/real_test.bam"),
+ file("${projectDir}/tests/data/real_wasp_data.json")
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ // Process completes successfully
+ { assert process.success },
+
+ // BAM output: filtered WASP-corrected BAM + index
+ { assert process.out.bam.size() == 1 },
+ { assert path(process.out.bam[0][1]).exists() },
+ { assert path(process.out.bam[0][1]).toFile().size() > 0 },
+ { assert path(process.out.bam[0][2]).exists() },
+ { assert path(process.out.bam[0][2]).toFile().size() > 0 },
+
+ // Stats output: samtools flagstat results
+ { assert process.out.stats.size() == 1 },
+ { assert path(process.out.stats[0][1]).exists() },
+ { assert path(process.out.stats[0][1]).text.contains("mapped") },
+
+ // versions.yml emitted with both tools
+ { assert process.out.versions.size() == 1 },
+ { assert path(process.out.versions[0]).text.contains("wasp2") },
+ { assert path(process.out.versions[0]).text.contains("samtools") },
+
+ // meta map preserved
+ { assert process.out.bam[0][0].id == 'test_real' }
+ )
+ }
+ }
+
+ test("wasp2_filter_remapped - stub") {
options "-stub-run"
@@ -28,10 +92,23 @@ nextflow_process {
}
then {
- assert process.success
- assert process.out.bam
- assert process.out.stats
- assert process.out.versions
+ assertAll(
+ { assert process.success },
+
+ // All output channels emitted
+ { assert process.out.bam.size() == 1 },
+ { assert process.out.stats.size() == 1 },
+ { assert process.out.versions.size() == 1 },
+
+ // Stub versions contain expected tools
+ { assert snapshot(process.out.versions).match("versions_stub") },
+
+ // Stats file exists with stub content
+ { assert path(process.out.stats[0][1]).exists() },
+
+ // meta map preserved through stub
+ { assert process.out.bam[0][0].id == 'test_sample' }
+ )
}
}
}
diff --git a/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test b/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test
index 1ceadaa..79e7969 100644
--- a/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test
+++ b/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test
@@ -7,14 +7,15 @@ nextflow_process {
tag "modules"
tag "modules_local"
tag "wasp2"
+ tag "wasp2_make_reads"
- test("Should generate swapped-allele reads for remapping - real") {
+ test("wasp2_make_reads - paired_end - real") {
when {
process {
"""
input[0] = [
- [ id:'test_real', single_end:false, sample_name:'SAMPLE1' ],
+ [ id:'test_real', single_end:false, sample_name:'sample1' ],
file("${projectDir}/tests/data/real_test.bam"),
file("${projectDir}/tests/data/real_test.bam.bai")
]
@@ -25,16 +26,42 @@ nextflow_process {
then {
assertAll(
+ // Process completes successfully
{ assert process.success },
+
+ // FASTQ outputs: paired-end swapped-allele reads
+ { assert process.out.fastq.size() == 1 },
+ { assert path(process.out.fastq[0][1]).exists() },
+ { assert path(process.out.fastq[0][2]).exists() },
+ { assert path(process.out.fastq[0][1]).toFile().size() > 0 },
+ { assert path(process.out.fastq[0][2]).toFile().size() > 0 },
+
+ // to_remap BAM: reads that overlap variants and need remapping
{ assert process.out.to_remap_bam.size() == 1 },
+ { assert path(process.out.to_remap_bam[0][1]).exists() },
+ { assert path(process.out.to_remap_bam[0][1]).toFile().size() > 0 },
+
+ // keep BAM: reads that don't overlap variants (pass through)
{ assert process.out.keep_bam.size() == 1 },
+ { assert path(process.out.keep_bam[0][1]).exists() },
+
+ // JSON: WASP data tracking file for filter-remapped step
{ assert process.out.json.size() == 1 },
- { assert process.out.versions.size() == 1 }
+ { assert path(process.out.json[0][1]).exists() },
+ { assert path(process.out.json[0][1]).text.length() > 2 },
+
+ // versions.yml emitted
+ { assert process.out.versions.size() == 1 },
+ { assert path(process.out.versions[0]).text.contains("wasp2") },
+
+ // meta map preserved
+ { assert process.out.fastq[0][0].id == 'test_real' },
+ { assert process.out.to_remap_bam[0][0].id == 'test_real' }
)
}
}
- test("Should generate swapped-allele reads for remapping - stub") {
+ test("wasp2_make_reads - paired_end - stub") {
options "-stub-run"
@@ -42,7 +69,7 @@ nextflow_process {
process {
"""
input[0] = [
- [ id:'test_sample', single_end:false, sample_name:'NA12878' ],
+ [ id:'test_sample', single_end:false, sample_name:'sample1' ],
file("${projectDir}/tests/data/stub_test.bam"),
file("${projectDir}/tests/data/stub_test.bam.bai")
]
@@ -52,12 +79,22 @@ nextflow_process {
}
then {
- assert process.success
- assert process.out.fastq
- assert process.out.to_remap_bam
- assert process.out.keep_bam
- assert process.out.json
- assert process.out.versions
+ assertAll(
+ { assert process.success },
+
+ // All output channels emitted
+ { assert process.out.fastq.size() == 1 },
+ { assert process.out.to_remap_bam.size() == 1 },
+ { assert process.out.keep_bam.size() == 1 },
+ { assert process.out.json.size() == 1 },
+ { assert process.out.versions.size() == 1 },
+
+ // Stub versions contain expected tool
+ { assert snapshot(process.out.versions).match("versions_stub") },
+
+ // meta map preserved through stub
+ { assert process.out.fastq[0][0].id == 'test_sample' }
+ )
}
}
}
diff --git a/pipelines/nf-atacseq/tests/nextflow.config b/pipelines/nf-atacseq/tests/nextflow.config
index 979614b..a87dc0f 100644
--- a/pipelines/nf-atacseq/tests/nextflow.config
+++ b/pipelines/nf-atacseq/tests/nextflow.config
@@ -3,7 +3,14 @@
* Loaded via nf-test.config configFile directive
*/
params {
- max_cpus = 2
- max_memory = '6.GB'
- max_time = '6.h'
+ modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+ pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
+}
+
+process {
+ resourceLimits = [
+ cpus: 2,
+ memory: 6.GB,
+ time: 6.h
+ ]
}
diff --git a/pipelines/nf-atacseq/workflows/atacseq.nf b/pipelines/nf-atacseq/workflows/atacseq.nf
index caeff59..2972171 100644
--- a/pipelines/nf-atacseq/workflows/atacseq.nf
+++ b/pipelines/nf-atacseq/workflows/atacseq.nf
@@ -53,6 +53,7 @@ workflow ATACSEQ {
ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
ch_fasta = PREPARE_GENOME.out.fasta
+ ch_fasta_meta = ch_fasta.map { fasta -> [[id: 'genome'], fasta] }
ch_vcf = params.vcf ? Channel.fromPath(params.vcf, checkIfExists: true).collect() : Channel.empty()
//
@@ -60,7 +61,6 @@ workflow ATACSEQ {
//
if (!params.skip_fastqc) {
FASTQC ( ch_samplesheet )
- ch_versions = ch_versions.mix(FASTQC.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] })
}
@@ -69,13 +69,12 @@ workflow ATACSEQ {
//
if (!params.skip_trimming) {
FASTP (
- ch_samplesheet,
- [], // adapter_fasta
+ ch_samplesheet.map { meta, reads -> [meta, reads, []] },
+ false, // discard_trimmed_pass
false, // save_trimmed_fail
false // save_merged
)
ch_reads = FASTP.out.reads
- ch_versions = ch_versions.mix(FASTP.out.versions.first())
ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect { it[1] })
} else {
ch_reads = ch_samplesheet
@@ -94,8 +93,9 @@ workflow ATACSEQ {
if (params.aligner == 'bwa') {
FASTQ_ALIGN_BWA (
ch_reads,
- PREPARE_GENOME.out.bwa_index,
- ch_fasta
+ PREPARE_GENOME.out.bwa_index.map { index -> [[id: 'genome'], index] },
+ true,
+ ch_fasta_meta
)
ch_aligned_bam = FASTQ_ALIGN_BWA.out.bam
ch_aligned_bai = FASTQ_ALIGN_BWA.out.bai
@@ -107,6 +107,8 @@ workflow ATACSEQ {
FASTQ_ALIGN_BOWTIE2 (
ch_reads,
PREPARE_GENOME.out.bowtie2_index,
+ false,
+ true,
ch_fasta
)
ch_aligned_bam = FASTQ_ALIGN_BOWTIE2.out.bam
@@ -130,16 +132,16 @@ workflow ATACSEQ {
// SUBWORKFLOW: Mark duplicates with Picard and run BAM stats (optional)
//
ch_fasta_fai = PREPARE_GENOME.out.fasta_fai
+ ch_fasta_fai_meta = ch_fasta_fai.map { fai -> [[id: 'genome'], fai] }
if (!params.skip_dedup) {
BAM_MARKDUPLICATES_PICARD (
ch_bam_indexed.map { meta, bam, bai -> [meta, bam] },
- ch_fasta,
- ch_fasta_fai
+ ch_fasta_meta,
+ ch_fasta_fai_meta
)
ch_bam_dedup = BAM_MARKDUPLICATES_PICARD.out.bam
.join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0], failOnMismatch: true)
- ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions)
// Add deduplication stats to MultiQC
ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect { it[1] })
@@ -155,7 +157,7 @@ workflow ATACSEQ {
//
if (!params.skip_peak_calling) {
MACS2_CALLPEAK (
- ch_bam_dedup.map { meta, bam, bai -> [meta, bam] },
+ ch_bam_dedup.map { meta, bam, bai -> [meta, bam, []] },
params.macs_gsize
)
ch_peaks = MACS2_CALLPEAK.out.peak
@@ -238,16 +240,14 @@ workflow ATACSEQ {
//
ch_multiqc_report = Channel.empty()
if (!params.skip_multiqc) {
- ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: false).ifEmpty([])
+ def multiqc_config_file = file("${projectDir}/assets/multiqc_config.yml")
MULTIQC (
- ch_multiqc_files.collect(),
- ch_multiqc_config.toList(),
- [], // extra_multiqc_config
- [] // multiqc_logo
+ ch_multiqc_files.collect().map { files ->
+ [ [id: 'multiqc'], files, multiqc_config_file.exists() ? [multiqc_config_file] : [], [], [], [] ]
+ }
)
- ch_multiqc_report = MULTIQC.out.report
- ch_versions = ch_versions.mix(MULTIQC.out.versions)
+ ch_multiqc_report = MULTIQC.out.report.map { meta, report -> report }
}
emit:
diff --git a/pipelines/nf-outrider/conf/test_local.config b/pipelines/nf-outrider/conf/test_local.config
index b988b75..7f22492 100644
--- a/pipelines/nf-outrider/conf/test_local.config
+++ b/pipelines/nf-outrider/conf/test_local.config
@@ -26,3 +26,12 @@ params {
outrider_min_samples = 3
outrider_min_count = 1 // Low threshold for simulated test data (~5x coverage)
}
+
+// Override base.config resourceLimits so local workstations don't OOM
+process {
+ resourceLimits = [
+ cpus: 2,
+ memory: 6.GB,
+ time: 1.h
+ ]
+}
diff --git a/pipelines/nf-outrider/nextflow.config b/pipelines/nf-outrider/nextflow.config
index a70e9d3..03e66cd 100644
--- a/pipelines/nf-outrider/nextflow.config
+++ b/pipelines/nf-outrider/nextflow.config
@@ -6,10 +6,17 @@
----------------------------------------------------------------------------------------
*/
+// Plugin configuration
+plugins {
+ id 'nf-validation@1.1.3'
+}
+
// Pipeline metadata
manifest {
name = 'wasp2/nf-outrider'
author = 'WASP2 Team'
+ homePage = 'https://github.com/mcvickerlab/WASP2'
+ doi = 'https://doi.org/10.1038/nmeth.3582'
description = 'WASP2 + OUTRIDER for aberrant expression and mono-allelic expression detection'
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
@@ -65,11 +72,6 @@ params {
// Processing options
skip_multiqc = false
- // Resource limits
- max_cpus = 16
- max_memory = '128.GB'
- max_time = '240.h'
-
// Institutional config support (nf-core compatible)
custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
custom_config_version = 'master'
@@ -77,6 +79,7 @@ params {
// Generic options
help = false
version = false
+ validate_params = true
tracedir = "${params.outdir}/pipeline_info"
}
@@ -84,11 +87,11 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
-// Load nf-core institutional configs
+// Load nf-core custom profiles from https://github.com/nf-core/configs
try {
- includeConfig "${params.custom_config_base}/nfcore_custom.config"
+ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
} catch (Exception e) {
- System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+ System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}")
}
// Execution profiles
@@ -102,7 +105,6 @@ profiles {
conda.enabled = true
docker.enabled = false
singularity.enabled = false
- process.conda = "${projectDir}/../../environment.yml"
}
docker {
docker.enabled = true
@@ -158,15 +160,15 @@ profiles {
def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
- file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
+ file = "${params.tracedir}/timeline_${trace_timestamp}.html"
}
report {
enabled = true
- file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
+ file = "${params.tracedir}/report_${trace_timestamp}.html"
}
trace {
enabled = true
- file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
+ file = "${params.tracedir}/trace_${trace_timestamp}.txt"
}
dag {
enabled = true
@@ -196,32 +198,33 @@ process {
process.shell = ['/bin/bash', '-euo', 'pipefail']
// Function to ensure resources don't exceed limits
+// Resource capping is handled by process.resourceLimits in conf/base.config.
+// This function is retained for backward compatibility with process label closures.
def check_max(obj, type) {
if (type == 'memory') {
try {
- if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
- return params.max_memory as nextflow.util.MemoryUnit
- else
- return obj
- } catch (all) {
- println "WARNING: Invalid max_memory '${params.max_memory}', using default"
+ def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid memory config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'time') {
try {
- if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
- return params.max_time as nextflow.util.Duration
- else
- return obj
- } catch (all) {
- println "WARNING: Invalid max_time '${params.max_time}', using default"
+ def max = (params.max_time as nextflow.util.Duration) ?: 240.h
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid time config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'cpus') {
- try {
- return Math.min(obj, params.max_cpus as int)
- } catch (all) {
- println "WARNING: Invalid max_cpus '${params.max_cpus}', using default"
+ try { return Math.min(obj, (params.max_cpus ?: 16) as int) }
+ catch (Exception e) {
+ log.warn "Invalid CPU config: ${e.message}. Using ${obj}"
return obj
}
}
diff --git a/pipelines/nf-rnaseq/conf/test_local.config b/pipelines/nf-rnaseq/conf/test_local.config
index 5eaae60..e8063d6 100644
--- a/pipelines/nf-rnaseq/conf/test_local.config
+++ b/pipelines/nf-rnaseq/conf/test_local.config
@@ -25,3 +25,12 @@ params {
// Lower thresholds for small test dataset
min_count = 1
}
+
+// Override base.config resourceLimits so local workstations don't OOM
+process {
+ resourceLimits = [
+ cpus: 2,
+ memory: 4.GB,
+ time: 2.h
+ ]
+}
diff --git a/pipelines/nf-rnaseq/nextflow.config b/pipelines/nf-rnaseq/nextflow.config
index f26865d..5625145 100644
--- a/pipelines/nf-rnaseq/nextflow.config
+++ b/pipelines/nf-rnaseq/nextflow.config
@@ -4,6 +4,22 @@
========================================================================================
*/
+plugins {
+ id 'nf-validation@1.1.3'
+}
+
+// Pipeline manifest
+manifest {
+ name = 'wasp2/nf-rnaseq'
+ author = 'WASP2 Team'
+ homePage = 'https://github.com/mcvickerlab/WASP2'
+ doi = 'https://doi.org/10.1038/nmeth.3582'
+ description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2'
+ mainScript = 'main.nf'
+ nextflowVersion = '!>=23.04.0'
+ version = '1.0.0'
+}
+
// Global default params
params {
// Pipeline options
@@ -39,10 +55,8 @@ params {
// ML Output options
output_format = null // ML output formats: zarr,parquet,anndata (comma-separated)
- // Resource limits
- max_cpus = 16
- max_memory = '128.GB'
- max_time = '240.h'
+ // Validation
+ validate_params = true
// Trace directory
tracedir = "${params.outdir}/pipeline_info"
@@ -56,11 +70,11 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
-// Load nf-core institutional configs
+// Load nf-core custom profiles from https://github.com/nf-core/configs
try {
- includeConfig "${params.custom_config_base}/nfcore_custom.config"
+ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
} catch (Exception e) {
- System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+ System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}")
}
profiles {
@@ -74,7 +88,6 @@ profiles {
conda.enabled = true
docker.enabled = false
singularity.enabled = false
- process.conda = "${projectDir}/../../environment.yml"
}
docker {
@@ -84,6 +97,13 @@ profiles {
singularity.enabled = false
}
+ arm {
+ // Apple Silicon / ARM64 compatibility — forces linux/amd64 containers
+ // via Rosetta 2 emulation. Combine with a container profile:
+ // nextflow run main.nf -profile docker,arm [options]
+ includeConfig 'conf/arm.config'
+ }
+
singularity {
singularity.enabled = true
singularity.autoMounts = true
@@ -136,37 +156,22 @@ profiles {
}
}
-// Container overrides
-def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0'
-def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4'
-process {
- withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' {
- container = wasp2_container
- }
- withName: 'STAR_ALIGN.*' {
- container = star_container
- }
-}
-
-// Capture exit codes from upstream processes when piping
-process.shell = ['/bin/bash', '-euo', 'pipefail']
-
// Execution reports
def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
- file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
+ file = "${params.tracedir}/timeline_${trace_timestamp}.html"
}
report {
enabled = true
- file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
+ file = "${params.tracedir}/report_${trace_timestamp}.html"
}
trace {
enabled = true
- file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
+ file = "${params.tracedir}/trace_${trace_timestamp}.txt"
}
dag {
@@ -181,44 +186,49 @@ env {
R_ENVIRON_USER = "/.Renviron"
}
-// Pipeline manifest
-manifest {
- name = 'wasp2/nf-rnaseq'
- author = 'WASP2 Team'
- homePage = 'https://github.com/mcvickerlab/WASP2'
- description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2'
- mainScript = 'main.nf'
- nextflowVersion = '!>=23.04.0'
- version = '1.0.0'
+// Container overrides
+def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0'
+def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4'
+process {
+ withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' {
+ container = wasp2_container
+ }
+ withName: 'STAR_ALIGN.*' {
+ container = star_container
+ }
}
-// Function to check max resource limits
+// Capture exit codes from upstream processes when piping
+process.shell = ['/bin/bash', '-euo', 'pipefail']
+
+// Function to ensure resources don't exceed limits
+// Resource capping is handled by process.resourceLimits in conf/base.config.
+// This function is retained for backward compatibility with process label closures.
def check_max(obj, type) {
if (type == 'memory') {
try {
- if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
- return params.max_memory as nextflow.util.MemoryUnit
- else
- return obj
- } catch (all) {
- println " ### ERROR ### Max memory '${params.max_memory}' is not valid!"
+ def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid memory config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'time') {
try {
- if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
- return params.max_time as nextflow.util.Duration
- else
- return obj
- } catch (all) {
- println " ### ERROR ### Max time '${params.max_time}' is not valid!"
+ def max = (params.max_time as nextflow.util.Duration) ?: 240.h
+ if (obj.compareTo(max) == 1)
+ return max
+ else return obj
+ } catch (Exception e) {
+ log.warn "Invalid time config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'cpus') {
- try {
- return Math.min(obj, params.max_cpus as int)
- } catch (all) {
- println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid!"
+ try { return Math.min(obj, (params.max_cpus ?: 16) as int) }
+ catch (Exception e) {
+ log.warn "Invalid CPU config: ${e.message}. Using ${obj}"
return obj
}
}
diff --git a/pipelines/nf-scatac/conf/test_local.config b/pipelines/nf-scatac/conf/test_local.config
index e6bbcd9..a4f54cb 100644
--- a/pipelines/nf-scatac/conf/test_local.config
+++ b/pipelines/nf-scatac/conf/test_local.config
@@ -23,3 +23,12 @@ params {
skip_anndata = false
create_zarr = false
}
+
+// Override base.config resourceLimits so local workstations don't OOM
+process {
+ resourceLimits = [
+ cpus: 2,
+ memory: 6.GB,
+ time: 1.h
+ ]
+}
diff --git a/pipelines/nf-scatac/nextflow.config b/pipelines/nf-scatac/nextflow.config
index 679d953..0a5352e 100644
--- a/pipelines/nf-scatac/nextflow.config
+++ b/pipelines/nf-scatac/nextflow.config
@@ -4,9 +4,16 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+// Plugin configuration
+plugins {
+ id 'nf-validation@1.1.3'
+}
+
manifest {
name = 'wasp2/nf-scatac'
author = 'WASP2 Team'
+ homePage = 'https://github.com/mcvickerlab/WASP2'
+ doi = 'https://doi.org/10.1038/nmeth.3582'
description = 'Single-Cell ATAC-seq Allelic Imbalance Pipeline'
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
@@ -36,11 +43,6 @@ params {
// ML Output options
output_format = null // ML output formats: zarr,parquet,anndata (comma-separated)
- // Resource limits
- max_cpus = 16
- max_memory = '128.GB'
- max_time = '240.h'
-
// Institutional config support (nf-core compatible)
custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master'
custom_config_version = 'master'
@@ -55,11 +57,11 @@ params {
includeConfig 'conf/base.config'
includeConfig 'conf/modules.config'
-// Load nf-core institutional configs
+// Load nf-core custom profiles from https://github.com/nf-core/configs
try {
- includeConfig "${params.custom_config_base}/nfcore_custom.config"
+ includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null"
} catch (Exception e) {
- System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}")
+ System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}")
}
// Container version - override all WASP2/SCATAC processes to use 1.4.0
@@ -77,6 +79,11 @@ profiles {
process.beforeScript = 'echo $HOSTNAME'
cleanup = false
}
+ conda {
+ conda.enabled = true
+ docker.enabled = false
+ singularity.enabled = false
+ }
docker {
docker.enabled = true
conda.enabled = false
@@ -92,12 +99,6 @@ profiles {
conda.enabled = false
docker.enabled = false
}
- conda {
- conda.enabled = true
- docker.enabled = false
- singularity.enabled = false
- process.conda = "${projectDir}/../../environment.yml"
- }
test {
includeConfig 'conf/test.config'
}
@@ -145,32 +146,43 @@ dag {
file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html"
}
+// Export these variables to prevent local Python/Perl libs from conflicting
+env {
+ PYTHONNOUSERSITE = 1
+ R_PROFILE_USER = "/.Rprofile"
+ R_ENVIRON_USER = "/.Renviron"
+}
+
process.shell = ['/bin/bash', '-euo', 'pipefail']
-// Resource limit checker with logging for configuration errors
+// Function to ensure resources don't exceed limits
+// Resource capping is handled by process.resourceLimits in conf/base.config.
+// This function is retained for backward compatibility with process label closures.
def check_max(obj, type) {
if (type == 'memory') {
try {
- if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
- return params.max_memory as nextflow.util.MemoryUnit
+ def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB
+ if (obj.compareTo(max) == 1)
+ return max
else return obj
} catch (Exception e) {
- log.warn "Invalid memory config (${obj}, max=${params.max_memory}): ${e.message}. Using ${obj}"
+ log.warn "Invalid memory config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'time') {
try {
- if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
- return params.max_time as nextflow.util.Duration
+ def max = (params.max_time as nextflow.util.Duration) ?: 240.h
+ if (obj.compareTo(max) == 1)
+ return max
else return obj
} catch (Exception e) {
- log.warn "Invalid time config (${obj}, max=${params.max_time}): ${e.message}. Using ${obj}"
+ log.warn "Invalid time config: ${e.message}. Using ${obj}"
return obj
}
} else if (type == 'cpus') {
- try { return Math.min(obj, params.max_cpus as int) }
+ try { return Math.min(obj, (params.max_cpus ?: 16) as int) }
catch (Exception e) {
- log.warn "Invalid CPU config (${obj}, max=${params.max_cpus}): ${e.message}. Using ${obj}"
+ log.warn "Invalid CPU config: ${e.message}. Using ${obj}"
return obj
}
}