diff --git a/pipelines/nf-atacseq/.github/.dockstore.yml b/pipelines/nf-atacseq/.github/.dockstore.yml new file mode 100644 index 0000000..8c3f31e --- /dev/null +++ b/pipelines/nf-atacseq/.github/.dockstore.yml @@ -0,0 +1,5 @@ +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /pipelines/nf-atacseq/main.nf + publish: true diff --git a/pipelines/nf-atacseq/.github/CONTRIBUTING.md b/pipelines/nf-atacseq/.github/CONTRIBUTING.md new file mode 100644 index 0000000..a5995f2 --- /dev/null +++ b/pipelines/nf-atacseq/.github/CONTRIBUTING.md @@ -0,0 +1,29 @@ +# Contributing to nf-atacseq + +## Getting help + +For questions, bugs, or feature requests, please open an issue on [GitHub](https://github.com/mcvickerlab/WASP2/issues). + +## Development workflow + +1. Fork the repository +2. Create a feature branch from `dev` +3. Make your changes +4. Run `nf-core pipelines lint` to verify compliance +5. Submit a pull request to `dev` + +## Code style + +- Follow nf-core module conventions for new modules +- Use `tuple val(meta), path(...)` for all process inputs/outputs +- Include `stub:` blocks in all processes +- Add `versions.yml` output to all processes +- Write `meta.yml` documentation for new modules + +## Testing + +Run the test profile before submitting changes: + +```bash +nextflow run main.nf -profile test,docker --outdir test_results +``` diff --git a/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..77ae53a --- /dev/null +++ b/pipelines/nf-atacseq/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: WASP2 Documentation + url: https://wasp2.readthedocs.io + about: Check the documentation for usage help diff --git a/pipelines/nf-atacseq/.github/actions/get-shards/action.yml b/pipelines/nf-atacseq/.github/actions/get-shards/action.yml new file mode 100644 index 0000000..08ec9a5 --- /dev/null +++ b/pipelines/nf-atacseq/.github/actions/get-shards/action.yml @@ -0,0 +1,19 @@ +name: Get test shards +description: Get nf-test shards for parallel execution +inputs: + test-path: + description: Path to test files + required: false + default: tests +outputs: + shards: + description: JSON array of test shards + value: ${{ steps.get-shards.outputs.shards }} +runs: + using: composite + steps: + - id: get-shards + shell: bash + run: | + shards=$(find ${{ inputs.test-path }} -name "*.nf.test" | jq -R -s -c 'split("\n") | map(select(. != ""))') + echo "shards=$shards" >> $GITHUB_OUTPUT diff --git a/pipelines/nf-atacseq/.github/actions/nf-test/action.yml b/pipelines/nf-atacseq/.github/actions/nf-test/action.yml new file mode 100644 index 0000000..8187d63 --- /dev/null +++ b/pipelines/nf-atacseq/.github/actions/nf-test/action.yml @@ -0,0 +1,15 @@ +name: Run nf-test +description: Run nf-test for a specific test file +inputs: + test-file: + description: Path to the nf-test file + required: true + profile: + description: Nextflow profile to use + required: false + default: test,docker +runs: + using: composite + steps: + - shell: bash + run: nf-test test ${{ inputs.test-file }} --profile ${{ inputs.profile }} diff --git a/pipelines/nf-atacseq/.github/workflows/branch.yml b/pipelines/nf-atacseq/.github/workflows/branch.yml new file mode 100644 index 0000000..27d29aa --- /dev/null +++ b/pipelines/nf-atacseq/.github/workflows/branch.yml @@ -0,0 +1,13 @@ +name: nf-core branch protection +on: + pull_request_target: + branches: [master, main] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Check PRs + if: github.repository == 'mcvickerlab/WASP2' + run: | + { [[ ${{github.event.pull_request.head.repo.full_name}} == mcvickerlab/WASP2 ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || echo "This PR is not from dev. It will be reviewed manually." diff --git a/pipelines/nf-atacseq/.github/workflows/linting_comment.yml b/pipelines/nf-atacseq/.github/workflows/linting_comment.yml new file mode 100644 index 0000000..86e12a3 --- /dev/null +++ b/pipelines/nf-atacseq/.github/workflows/linting_comment.yml @@ -0,0 +1,13 @@ +name: nf-core linting comment +on: + workflow_run: + workflows: ["nf-core linting"] + types: [completed] + +jobs: + linting-comment: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Post linting comment + run: echo "Linting workflow completed" diff --git a/pipelines/nf-atacseq/.github/workflows/nf-test.yml b/pipelines/nf-atacseq/.github/workflows/nf-test.yml new file mode 100644 index 0000000..fa7474d --- /dev/null +++ b/pipelines/nf-atacseq/.github/workflows/nf-test.yml @@ -0,0 +1,40 @@ +name: Run nf-test +on: + pull_request: + branches: [dev, master] + release: + types: [published] + merge_group: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + NXF_ANSI_LOG: false + NFT_VER: "0.9.2" + +jobs: + nf-test: + name: "nf-test | NXF ${{ matrix.NXF_VER }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - uses: actions/checkout@v4 + + - uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test + run: nf-test test --profile test,docker diff --git a/pipelines/nf-atacseq/README.md b/pipelines/nf-atacseq/README.md index a0f718a..64d62d0 100644 --- a/pipelines/nf-atacseq/README.md +++ b/pipelines/nf-atacseq/README.md @@ -1,6 +1,6 @@ # nf-atacseq -[![nf-atacseq Tests](https://github.com/your-org/WASP2/actions/workflows/nf-atacseq-tests.yml/badge.svg)](https://github.com/your-org/WASP2/actions/workflows/nf-atacseq-tests.yml) +[![nf-atacseq CI](https://github.com/mcvickerlab/WASP2/actions/workflows/ci.yml/badge.svg)](https://github.com/mcvickerlab/WASP2/actions/workflows/ci.yml) ATAC-seq Allelic Imbalance (AI) Pipeline with WASP2 mapping bias correction. @@ -60,6 +60,12 @@ nextflow run pipelines/nf-atacseq -profile test,docker nextflow run pipelines/nf-atacseq -profile test,docker -stub-run # Workflow validation only ``` +### Local Test (chr21 data) + +```bash +nextflow run pipelines/nf-atacseq -profile test_local,docker +``` + ## Samplesheet Format ```csv @@ -112,6 +118,19 @@ results/ See [docs/output.md](docs/output.md) for detailed output descriptions. +## Validation with chr21 1000 Genomes Data + +Run a quick validation using chr21 data from the 1000 Genomes Project: + +```bash +# Uses pre-configured chr21 test data (NA12878, HG00096) +nextflow run pipelines/nf-atacseq -profile test_local,docker + +# Expect: ~2-5 min runtime, allele counts at chr21 het SNPs +``` + +This profile uses downsampled chr21 FASTQ reads and a chr21-only VCF, providing a fast end-to-end validation without downloading full genomes. + ## Testing ### Run nf-test Suite @@ -148,6 +167,7 @@ nextflow run . -profile test -stub-run | `singularity` | Run with Singularity containers | | `conda` | Run with Conda environments | | `test` | Minimal test configuration | +| `test_local` | Local test with chr21 1000 Genomes data | | `test_full` | Full test with real data | ## Pipeline DAG @@ -168,7 +188,7 @@ FASTQ → FastQC → Fastp → BWA/Bowtie2 → Samtools → Picard → MACS2 → If you use nf-atacseq, please cite: -- **WASP2**: [GitHub Repository](https://github.com/your-org/WASP2) +- **WASP2**: [GitHub Repository](https://github.com/mcvickerlab/WASP2) - **Nextflow**: Di Tommaso, P., et al. (2017). Nextflow enables reproducible computational workflows. *Nature Biotechnology*. ## License @@ -177,5 +197,5 @@ MIT License - see [LICENSE](../../LICENSE) for details. ## Support -- [Issues](https://github.com/your-org/WASP2/issues) +- [Issues](https://github.com/mcvickerlab/WASP2/issues) - [Documentation](docs/) diff --git a/pipelines/nf-atacseq/assets/email_template.html b/pipelines/nf-atacseq/assets/email_template.html new file mode 100644 index 0000000..161c1e4 --- /dev/null +++ b/pipelines/nf-atacseq/assets/email_template.html @@ -0,0 +1,48 @@ + + + + + + ${workflow.manifest.name} Pipeline Report + + +
+ + + +

${workflow.manifest.name} v${workflow.manifest.version}

+

Run Name: $runName

+ +<% if (!success) { %> +
+

⚠️ ${workflow.manifest.name} execution completed unsuccessfully!

+

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

+

The full error message was:

+
${errorReport}
+
+<% } else { %> +
+${workflow.manifest.name} execution completed successfully! +
+<% } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
+ +

Pipeline Configuration:

+ + + + +<% if (summary.size() > 0) { %> +<% for (e in summary) { %> +<% } %> +<% } %> +
Nextflow Version$nextflowVersion
Run Name$runName
Session ID$sessionId
${e.key}${e.value}
+ +

--- ${workflow.manifest.name}

+ +
+ + diff --git a/pipelines/nf-atacseq/assets/email_template.txt b/pipelines/nf-atacseq/assets/email_template.txt new file mode 100644 index 0000000..17cb4f4 --- /dev/null +++ b/pipelines/nf-atacseq/assets/email_template.txt @@ -0,0 +1,34 @@ +---------------------------------------------------- + ${workflow.manifest.name} v${workflow.manifest.version} +---------------------------------------------------- +Run Name: $runName + +<% if (success) { + out << "## ${workflow.manifest.name} execution completed successfully! ##" +} else { + out << """#################################################### +## ${workflow.manifest.name} execution completed unsuccessfully! ## +#################################################### +The exit status of the task that caused the workflow execution to fail was: $exitStatus. +The full error message was: + +${errorReport} +""" +} %> + + +The workflow was completed at $dateComplete (duration: $duration) + +The command used to launch the workflow was as follows: + + $commandLine + + + +Pipeline Configuration: +----------------------- +<% for (e in summary) { %> ${e.key.padRight(30)}: ${e.value} +<% } %> + +-- +${workflow.manifest.name} diff --git a/pipelines/nf-atacseq/assets/multiqc_config.yml b/pipelines/nf-atacseq/assets/multiqc_config.yml index ecb2e54..2b9c9d4 100644 --- a/pipelines/nf-atacseq/assets/multiqc_config.yml +++ b/pipelines/nf-atacseq/assets/multiqc_config.yml @@ -1,14 +1,17 @@ # MultiQC configuration for nf-atacseq report_comment: > - This report has been generated by the nf-atacseq - pipeline. It summarizes QC metrics from ATAC-seq allelic imbalance analysis with WASP2. + This report has been generated by the nf-core/nf-atacseq + analysis pipeline. For information about how to interpret these results, please see the + documentation. report_section_order: - software_versions: - order: -1000 nf-atacseq-methods-description: + order: -1000 + software_versions: order: -1001 + wasp2-nf-atacseq-summary: + order: -1002 export_plots: true diff --git a/pipelines/nf-atacseq/assets/nf-core-nf-atacseq_logo_light.png b/pipelines/nf-atacseq/assets/nf-core-nf-atacseq_logo_light.png new file mode 100644 index 0000000..11ceb4e Binary files /dev/null and b/pipelines/nf-atacseq/assets/nf-core-nf-atacseq_logo_light.png differ diff --git a/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png b/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png new file mode 100644 index 0000000..476ce66 Binary files /dev/null and b/pipelines/nf-atacseq/assets/nf-core-pipeline_logo_light.png differ diff --git a/pipelines/nf-atacseq/conf/modules.config b/pipelines/nf-atacseq/conf/modules.config index d5fb2ba..f05ba6a 100644 --- a/pipelines/nf-atacseq/conf/modules.config +++ b/pipelines/nf-atacseq/conf/modules.config @@ -45,7 +45,6 @@ process { // withName: 'BWA_MEM' { ext.args = '-M' - ext.args2 = '-bhS' publishDir = [ path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, @@ -63,6 +62,10 @@ process { ] } + withName: 'SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + } + withName: 'SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/alignment" }, diff --git a/pipelines/nf-atacseq/conf/test_local.config b/pipelines/nf-atacseq/conf/test_local.config index d201f80..b462e8e 100644 --- a/pipelines/nf-atacseq/conf/test_local.config +++ b/pipelines/nf-atacseq/conf/test_local.config @@ -37,3 +37,12 @@ params { wasp_min_count = 1 wasp_pseudocount = 1 } + +// Override base.config resourceLimits so local workstations don't OOM +process { + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 1.h + ] +} diff --git a/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_dark.png b/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_dark.png new file mode 100644 index 0000000..a1910c4 Binary files /dev/null and b/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_dark.png differ diff --git a/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_light.png b/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_light.png new file mode 100644 index 0000000..11ceb4e Binary files /dev/null and b/pipelines/nf-atacseq/docs/images/nf-core-nf-atacseq_logo_light.png differ diff --git a/pipelines/nf-atacseq/environment.yml b/pipelines/nf-atacseq/environment.yml new file mode 100644 index 0000000..8ba8147 --- /dev/null +++ b/pipelines/nf-atacseq/environment.yml @@ -0,0 +1,23 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# Conda environment for nf-atacseq local Python/WASP2 modules +# (wasp2_make_reads, wasp2_filter_remapped, wasp2_count_variants, wasp2_find_imbalance) +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.10 + - numpy>=1.21,<2.0 + - pandas>=2.0 + - polars>=0.19 + - scipy>=1.10 + - pysam + - pybedtools + - samtools + - bcftools + - bedtools + - typer + - rich + - pip + - pip: + - wasp2==1.2.1 diff --git a/pipelines/nf-atacseq/modules.json b/pipelines/nf-atacseq/modules.json index 0d78430..9efc78d 100644 --- a/pipelines/nf-atacseq/modules.json +++ b/pipelines/nf-atacseq/modules.json @@ -1,5 +1,126 @@ { "name": "wasp2/nf-atacseq", - "homePage": "", - "repos": {} + "homePage": "https://github.com/mcvickerlab/WASP2", + "repos": { + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "bowtie2/align": { + "branch": "master", + "git_sha": "92b8df948fd8cdb223e051f5f5e414818a073ee0", + "installed_by": ["fastq_align_bowtie2"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", + "installed_by": ["modules"] + }, + "bwa/index": { + "branch": "master", + "git_sha": "966ba9887e2b04d89d64db06c01508873bde13b1", + "installed_by": ["modules"] + }, + "bwa/mem": { + "branch": "master", + "git_sha": "707241c72951f24fd89982c4c80c5983a4c437ef", + "installed_by": ["fastq_align_bwa"] + }, + "fastp": { + "branch": "master", + "git_sha": "a331ecfd1aa48b2b2298aab23bb4516c800e410b", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "3009f27c4e4b6e99da4eeebe82799e13924a4a1f", + "installed_by": ["modules"] + }, + "macs2/callpeak": { + "branch": "master", + "git_sha": "fe0ec4b67b1abd71ff9b5ece41fd5a4d8abadad5", + "installed_by": ["modules"] + }, + "multiqc": { + "branch": "master", + "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "a631e12055f6c23ba2c942d3902b3ed1b9eed859", + "installed_by": ["bam_markduplicates_picard"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools", + "fastq_align_bowtie2", + "fastq_align_bwa" + ] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "5cb9a8694da0a0e550921636bb60bc8c56445fd7", + "installed_by": ["bam_sort_stats_samtools"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", + "installed_by": ["bam_stats_samtools"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "a631e12055f6c23ba2c942d3902b3ed1b9eed859", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", + "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", + "installed_by": [ + "bam_markduplicates_picard", + "bam_sort_stats_samtools", + "fastq_align_bowtie2", + "fastq_align_bwa" + ] + }, + "fastq_align_bowtie2": { + "branch": "master", + "git_sha": "9afa0584136287aa20fc18296f45f103c0c4e69a", + "installed_by": ["subworkflows"] + }, + "fastq_align_bwa": { + "branch": "master", + "git_sha": "9afa0584136287aa20fc18296f45f103c0c4e69a", + "installed_by": ["subworkflows"] + } + } + } + } + } } diff --git a/pipelines/nf-atacseq/modules/local/wasp2_make_reads/main.nf b/pipelines/nf-atacseq/modules/local/wasp2_make_reads/main.nf index 7d0245e..998477e 100644 --- a/pipelines/nf-atacseq/modules/local/wasp2_make_reads/main.nf +++ b/pipelines/nf-atacseq/modules/local/wasp2_make_reads/main.nf @@ -65,6 +65,12 @@ process WASP2_MAKE_READS { [ -f "\$f" ] && [ "\$f" != "${prefix}_keep.bam" ] && mv "\$f" ${prefix}_keep.bam && break done + # Update JSON to reflect renamed files + bam_prefix=\$(basename ${bam} .bam) + if [ "\$bam_prefix" != "${prefix}" ]; then + sed -i "s|\$bam_prefix|${prefix}|g" ${prefix}_wasp_data_files.json + fi + # Validate outputs for expected in ${prefix}_remap_r1.fq.gz ${prefix}_remap_r2.fq.gz ${prefix}_to_remap.bam ${prefix}_keep.bam ${prefix}_wasp_data_files.json; do if [ ! -f "\$expected" ]; then diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/environment.yml b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 0000000..066ff52 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/bowtie2 + - bioconda::bowtie2=2.5.4 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 + - conda-forge::pigz=2.8 diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/main.nf b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/main.nf index f2a1dd9..0a8c1a0 100644 --- a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/main.nf @@ -2,62 +2,105 @@ process BOWTIE2_ALIGN { tag "$meta.id" label 'process_high' - conda "bioconda::bowtie2=2.5.2 bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6' : - 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b41b403e81883126c3227fc45840015538e8e2212f13abc9ae84e4b98891d51c/data' : + 'community.wave.seqera.io/library/bowtie2_htslib_samtools_pigz:edeb13799090a2a6' }" input: - tuple val(meta), path(reads) - path index - path fasta + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.bam"), emit: aligned - tuple val(meta), path("*.log"), emit: log - path "versions.yml", emit: versions + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + tuple val("${task.process}"), val('bowtie2'), eval("bowtie2 --version 2>&1 | sed -n '1s/.*bowtie2-align-s version //p'"), emit: versions_bowtie2, topic: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions + tuple val("${task.process}"), val('pigz'), eval("pigz --version 2>&1 | sed 's/pigz //'"), emit: versions_pigz, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" def prefix = task.ext.prefix ?: "${meta.id}" + def rg = args.contains("--rg-id") ? "" : "--rg-id ${prefix} --rg SM:${prefix}" - def read_inputs = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" - def samtools_command = sort_bam ? "samtools sort -@ ${task.cpus} -o ${prefix}.bam -" : "samtools view -@ ${task.cpus} -bS -o ${prefix}.bam -" + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ - INDEX=`find -L ./ -name "*.1.bt2" | sed 's/\\.1.bt2\$//'` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 bowtie2 \\ - $args \\ - --threads $task.cpus \\ -x \$INDEX \\ - $read_inputs \\ - 2> ${prefix}.bowtie2.log \\ - | $samtools_command - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(bowtie2 --version | head -n1 | sed 's/.*version //') - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $rg \\ + $args \\ + 2>| >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi """ stub: + def args2 = task.ext.args2 ?: "" def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + """ - touch ${prefix}.bam + touch ${prefix}.${extension} + ${create_index} touch ${prefix}.bowtie2.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: 2.5.2 - samtools: 1.19 - END_VERSIONS + ${create_unmapped} """ + } diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/meta.yml b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 0000000..2d8051d --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,192 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1186/gb-2009-10-3-r25 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + ontologies: [] + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + sam: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + ontologies: [] + bam: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + ontologies: [] + cram: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + ontologies: [] + csi: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + ontologies: [] + crai: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + ontologies: [] + log: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.log": + type: file + description: Alignment log + pattern: "*.log" + ontologies: [] + fastq: + - - meta: + type: map + description: Groovy Map containing sample information + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions_bowtie2: + - - ${task.process}: + type: string + description: The name of the process + - bowtie2: + type: string + description: The name of the tool + - "bowtie2 --version 2>&1 | sed -n '1s/.*bowtie2-align-s version //p'": + type: eval + description: The expression to obtain the version of bowtie2 + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The expression to obtain the version of samtools + versions_pigz: + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //'": + type: eval + description: The expression to obtain the version of pigz + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bowtie2: + type: string + description: The name of the tool + - "bowtie2 --version 2>&1 | sed -n '1s/.*bowtie2-align-s version //p'": + type: eval + description: The expression to obtain the version of bowtie2 + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The expression to obtain the version of samtools + - - ${task.process}: + type: string + description: The name of the process + - pigz: + type: string + description: The name of the tool + - "pigz --version 2>&1 | sed 's/pigz //'": + type: eval + description: The expression to obtain the version of pigz + +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/cram_crai.config b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 0000000..03f1d5e --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/large_index.config b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 0000000..b2f0c40 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 0000000..1705b66 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 0000000..b1df41e --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,551 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + { + "versions_bowtie2": [ + [ + "BOWTIE2_ALIGN", + "bowtie2", + "2.5.4" + ] + ], + "versions_pigz": [ + [ + "BOWTIE2_ALIGN", + "pigz", + "2.8" + ] + ], + "versions_samtools": [ + [ + "BOWTIE2_ALIGN", + "samtools", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T15:18:12.706444258" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA&1 | sed -n "s/^Version: //p"'), topic: versions, emit: versions_bwa when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' """ mkdir bwa - bwa index $args -p bwa/${fasta.baseName} $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(bwa 2>&1 | grep -o 'Version: [0-9.]*' | sed 's/Version: //') - END_VERSIONS + bwa \\ + index \\ + $args \\ + -p bwa/${prefix} \\ + $fasta """ stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" """ mkdir bwa - touch bwa/${fasta.baseName}.amb - touch bwa/${fasta.baseName}.ann - touch bwa/${fasta.baseName}.bwt - touch bwa/${fasta.baseName}.pac - touch bwa/${fasta.baseName}.sa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: 0.7.18 - END_VERSIONS + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 0000000..f5bf7f5 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,71 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: https://bio-bwa.sourceforge.net/bwa.shtml + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] + identifier: "biotools:bwa" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA +output: + index: + - - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bwa: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + pattern: "*.{amb,ann,bwt,pac,sa}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + versions_bwa: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bwa: + type: string + description: The tool name + - 'bwa 2>&1 | sed -n "s/^Version: //p"': + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bwa: + type: string + description: The tool name + - 'bwa 2>&1 | sed -n "s/^Version: //p"': + type: string + description: The command used to generate the version of the tool +authors: + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@maxulysse" + - "@gallvp" diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 0000000..f0fba82 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + + test("BWA index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 0000000..21a6f73 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,108 @@ +{ + "BWA index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.bwt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.sa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BWA_INDEX", + "bwa", + "0.7.19-r1273" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.bwt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.sa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bwa": [ + [ + "BWA_INDEX", + "bwa", + "0.7.19-r1273" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T16:58:59.966558606" + }, + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + [ + "BWA_INDEX", + "bwa", + "0.7.19-r1273" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions_bwa": [ + [ + "BWA_INDEX", + "bwa", + "0.7.19-r1273" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T16:58:53.330725134" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 0000000..54e6794 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa + - bioconda::bwa=0.7.19 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf index c94299b..e373267 100644 --- a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/main.nf @@ -2,59 +2,62 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa=0.7.18 bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3571' : - 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3571' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/d7/d7e24dc1e4d93ca4d3a76a78d4c834a7be3985b0e1e56fddd61662e047863a8a/data' : + 'community.wave.seqera.io/library/bwa_htslib_samtools:83b50ff84ead50d0' }" input: - tuple val(meta), path(reads) - path index - path fasta + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val sort_bam output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml", emit: versions + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.csi") , emit: csi, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + tuple val("${task.process}"), val('bwa'), eval('bwa 2>&1 | sed -n "s/^Version: //p"'), topic: versions, emit: versions_bwa + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def read_group = "@RG\\tID:${meta.id}\\tSM:${meta.id}\\tPL:ILLUMINA" - - def samtools_command = sort_bam ? "samtools sort -@ ${task.cpus} -o ${prefix}.bam -" : "samtools view -@ ${task.cpus} $args2 -o ${prefix}.bam -" - + def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa mem \\ $args \\ - -R "$read_group" \\ -t $task.cpus \\ \$INDEX \\ $reads \\ - | $samtools_command - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(bwa 2>&1 | grep -o 'Version: [0-9.]*' | sed 's/Version: //') - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS + | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} - """ stub: + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: 0.7.18 - samtools: 1.19 - END_VERSIONS + touch ${prefix}.${extension} + touch ${prefix}.csi + touch ${prefix}.crai """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml new file mode 100644 index 0000000..450a3fe --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/meta.yml @@ -0,0 +1,149 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: https://bio-bwa.sourceforge.net/bwa.shtml + arxiv: arXiv:1303.3997 + licence: + - "GPL-3.0-or-later" + identifier: "biotools:bwa" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1930" + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + ontologies: + - edam: "http://edamontology.org/data_3210" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fa}" + ontologies: + - edam: "http://edamontology.org/data_2044" + - edam: "http://edamontology.org/format_1929" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + bam: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" + cram: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" + csi: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.csi": + type: file + description: Optional index file for BAM file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.crai": + type: file + description: Optional index file for CRAM file + pattern: "*.{crai}" + ontologies: [] + versions_bwa: + - - ${task.process}: + type: string + description: The name of the process + - bwa: + type: string + description: The name of the tool + - 'bwa 2>&1 | sed -n "s/^Version: //p"': + type: eval + description: The expression to obtain the version of the tool + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwa: + type: string + description: The name of the tool + - 'bwa 2>&1 | sed -n "s/^Version: //p"': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@jeremy1805" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@jeremy1805" + - "@matthdsm" diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test new file mode 100644 index 0000000..6486ab0 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test @@ -0,0 +1,255 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + tag "bwa/index" + script "../main.nf" + process "BWA_MEM" + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + test("Single-End") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") }, + bam(process.out.bam[0][1]).getReadsMD5() + ).match() + } + ) + } + + } + + test("Single-End Sort") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") }, + bam(process.out.bam[0][1]).getReadsMD5() + ).match() + } + ) + } + + } + + test("Paired-End") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") }, + bam(process.out.bam[0][1]).getReadsMD5() + ).match() + } + ) + } + + } + + test("Paired-End Sort") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") }, + bam(process.out.bam[0][1]).getReadsMD5() + ).match() + } + ) + } + + } + + test("Paired-End - no fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[:],[]] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.findAll { key, val -> key.startsWith("versions") }, + bam(process.out.bam[0][1]).getReadsMD5() + ).match() + } + ) + } + + } + + test("Single-end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("Paired-end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 0000000..8aca4b2 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,375 @@ +{ + "Single-End": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + { + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + }, + "798439cbd7fd81cbcc5078022dc5479d" + ], + "timestamp": "2026-02-18T12:42:52.901827", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Single-End Sort": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + { + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + }, + "94fcf617f5b994584c4e8d4044e16b4f" + ], + "timestamp": "2026-02-18T12:43:01.149915", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Paired-End": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + { + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + }, + "57aeef88ed701a8ebc8e2f0a381b2a6" + ], + "timestamp": "2026-02-18T12:43:09.528042", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Paired-End Sort": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + { + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + }, + "af8628d9df18b2d3d4f6fd47ef2bb872" + ], + "timestamp": "2026-02-18T12:43:17.876121", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Single-end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "5": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + [ + { + "id": "test", + "single_end": true + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + } + ], + "timestamp": "2026-02-18T12:43:33.853248", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Paired-End - no fasta": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + { + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + }, + "57aeef88ed701a8ebc8e2f0a381b2a6" + ], + "timestamp": "2026-02-18T12:43:26.121474", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Paired-end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "5": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bwa": [ + [ + "BWA_MEM", + "bwa", + "0.7.19-r1273" + ] + ], + "versions_samtools": [ + [ + "BWA_MEM", + "samtools", + "1.22.1" + ] + ] + } + ], + "timestamp": "2026-02-18T12:43:42.119907", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml b/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml new file mode 100644 index 0000000..0c36eed --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/fastp + - bioconda::fastp=1.0.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf b/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf index c9b1380..e13509c 100644 --- a/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/main.nf @@ -2,23 +2,25 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda "bioconda::fastp=0.23.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : - 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/527b18847a97451091dba07a886b24f17f742a861f9f6c9a6bfb79d4f1f3bf9d/data' : + 'community.wave.seqera.io/library/fastp:1.0.1--c8b87fe62dcc103c' }" input: - tuple val(meta), path(reads) - path adapter_fasta + tuple val(meta), path(reads), path(adapter_fasta) + val discard_trimmed_pass val save_trimmed_fail val save_merged output: - tuple val(meta), path('*.fastp.fastq.gz'), emit: reads - tuple val(meta), path('*.json'), emit: json - tuple val(meta), path('*.html'), emit: html - tuple val(meta), path('*.log'), emit: log - path "versions.yml", emit: versions + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + tuple val("${task.process}"), val('fastp'), eval('fastp --version 2>&1 | sed -e "s/fastp //g"'), emit: versions_fastp, topic: versions when: task.ext.when == null || task.ext.when @@ -27,58 +29,76 @@ process FASTP { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - - if (meta.single_end) { + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_R1.fail.fastq.gz --unpaired2 ${prefix}_R2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_R1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_R2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ - --in1 ${reads[0]} \\ - --out1 ${prefix}.fastp.fastq.gz \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $adapter_list \\ + $fail_fastq \\ $args \\ - 2> >(tee ${prefix}.fastp.log >&2) + 2>| >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed 's/fastp //') - END_VERSIONS + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) """ } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ + [ ! -f ${prefix}_R1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_R1.fastq.gz + [ ! -f ${prefix}_R2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_R2.fastq.gz fastp \\ - --in1 ${reads[0]} \\ - --in2 ${reads[1]} \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ - --thread $task.cpus \\ + --in1 ${prefix}_R1.fastq.gz \\ + --in2 ${prefix}_R2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ - --detect_adapter_for_pe \\ $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ $args \\ - 2> >(tee ${prefix}.fastp.log >&2) - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: \$(fastp --version 2>&1 | sed 's/fastp //') - END_VERSIONS + 2>| >(tee ${prefix}.fastp.log >&2) """ } stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_R1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_R2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_R1.fail.fastq.gz ; echo '' | gzip > ${prefix}_R2.fail.fastq.gz" """ - touch ${prefix}_1.fastp.fastq.gz - touch ${prefix}_2.fastp.fastq.gz - echo '{}' > ${prefix}.fastp.json - touch ${prefix}.fastp.html - touch ${prefix}.fastp.log - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastp: 0.23.4 - END_VERSIONS + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml b/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml new file mode 100644 index 0000000..a67be39 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/meta.yml @@ -0,0 +1,144 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + ontologies: [] + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + ontologies: [] + - discard_trimmed_pass: + type: boolean + description: | + Specify true to not write any reads that pass trimming thresholds. + This can be used to use fastp for the output report only. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + ontologies: [] + reads_fail: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + reads_merged: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + ontologies: [] + versions_fastp: + - - "${task.process}": + type: string + description: The name of the process + - fastp: + type: string + description: The name of the tool + - 'fastp --version 2>&1 | sed -e "s/fastp //g"': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - "${task.process}": + type: string + description: The name of the process + - fastp: + type: string + description: The name of the tool + - 'fastp --version 2>&1 | sed -e "s/fastp //g"': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@kevinmenden" + - "@eit-maxlcummins" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 0000000..b790157 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,661 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + adapter_fasta = [] // empty list for no adapter file! + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() + } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = false + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.findAll { key, val -> key.startsWith('versions') }).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 0000000..5677235 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1376 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:52.14535813" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_R2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:26.421773402" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:59.832295907" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:47:06.486959565" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_R2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_R1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_R2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:46.736511024" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:16.097071654" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:03.317192706" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:44.851708205" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:37.581047713" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:53.190202914" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:09.585957282" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:19.624824985" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:22.800659826" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:30.271734068" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:33.4628687" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:46:39.895973372" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + { + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-23T09:47:13.015833707" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions_fastp": [ + [ + "FASTP", + "fastp", + "1.0.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-22T13:00:59.670106791" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 0000000..4be8dbd --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 0000000..53b61b0 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml b/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml new file mode 100644 index 0000000..f9f54ee --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf b/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf index 40d10a5..f562952 100644 --- a/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/main.nf @@ -1,8 +1,8 @@ process FASTQC { - tag "$meta.id" - label 'process_medium' + tag "${meta.id}" + label 'process_low' - conda "bioconda::fastqc=0.12.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" @@ -11,34 +11,44 @@ process FASTQC { tuple val(meta), path(reads) output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip"), emit: zip - path "versions.yml", emit: versions + tuple val(meta) , path("*.html") , emit: html + tuple val(meta) , path("*.zip") , emit: zip + tuple val("${task.process}"), val('fastqc'), eval('fastqc --version | sed "/FastQC v/!d; s/.*v//"'), emit: versions_fastqc, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - fastqc $args --threads $task.cpus $reads + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ _old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS + """ + printf "%s %s\\n" ${rename_to} | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + + fastqc \\ + ${args} \\ + --threads ${task.cpus} \\ + --memory ${fastqc_memory} \\ + ${renamed_files} """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_fastqc.html - touch ${prefix}_fastqc.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: 0.12.1 - END_VERSIONS + touch ${prefix}.html + touch ${prefix}.zip """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml b/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml new file mode 100644 index 0000000..49164c8 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/meta.yml @@ -0,0 +1,111 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] + identifier: biotools:fastqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] +output: + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + ontologies: [] + zip: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + ontologies: [] + versions_fastqc: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fastqc: + type: string + description: The tool name + - fastqc --version | sed "/FastQC v/!d; s/.*v//": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fastqc: + type: string + description: The tool name + - fastqc --version | sed "/FastQC v/!d; s/.*v//": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" +containers: + conda: + linux_amd64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-af7a5314d5015c29_1/condalock + linux_arm64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-df99cb252670875a_2/condalock + docker: + linux_amd64: + build_id: bd-af7a5314d5015c29_1 + name: community.wave.seqera.io/library/fastqc:0.12.1--af7a5314d5015c29 + scanId: sc-a618548acbee5a8a_30 + linux_arm64: + build_id: bd-df99cb252670875a_2 + name: community.wave.seqera.io/library/fastqc:0.12.1--df99cb252670875a + scanId: sc-b5913ed5d42b22d2_18 + singularity: + linux_amd64: + build_id: bd-104d26ddd9519960_1 + name: oras://community.wave.seqera.io/library/fastqc:0.12.1--104d26ddd9519960 + https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:e0c976cb2eca5fee72618a581537a4f8ea42fcae24c9b201e2e0f764fd28648a + linux_arm64: + build_id: bd-d56b505a93aef38a_1 + name: oras://community.wave.seqera.io/library/fastqc:0.12.1--d56b505a93aef38a + https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:fd39534bf298698cbe3ee4d4a6f1e73330ec4bca44c38dd9a4d06cb5ea838017 diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 0000000..66c44da --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,309 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 0000000..c8ee120 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 custom_prefix": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:14.518503" + }, + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:19.309008" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:44.94888" + }, + "sarscov2 interleaved [fastq]": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:38:45.168496" + }, + "sarscov2 paired-end [bam]": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:38:53.268919" + }, + "sarscov2 multiple [fastq]": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:05.050305" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:38:37.2373" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:24.450398" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:39.758762" + }, + "sarscov2 single-end [fastq]": { + "content": [ + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:38:29.555068" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:29.193136" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-28T16:39:34.144919" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml new file mode 100644 index 0000000..7f8c3ca --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::macs2=2.2.9.1=py39hff71179_1 + - conda-forge::python=3.9.19 + - conda-forge::setuptools=70.0 diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf index 709500b..730845d 100644 --- a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/main.nf @@ -2,21 +2,23 @@ process MACS2_CALLPEAK { tag "$meta.id" label 'process_medium' - conda "bioconda::macs2=2.2.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/macs2:2.2.9.1--py39hf95cd2a_0' : - 'biocontainers/macs2:2.2.9.1--py39hf95cd2a_0' }" + 'https://depot.galaxyproject.org/singularity/macs2:2.2.9.1--py39hff71179_1': + 'biocontainers/macs2:2.2.9.1--py39hff71179_1' }" input: - tuple val(meta), path(bam) - val gsize + tuple val(meta), path(ipbam), path(controlbam) + val macs2_gsize output: - tuple val(meta), path("*.narrowPeak"), emit: peak - tuple val(meta), path("*.xls"), emit: xls - tuple val(meta), path("*.summits.bed"), emit: summits, optional: true - tuple val(meta), path("*.bdg"), emit: bedgraph, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak + tuple val(meta), path("*.xls") , emit: xls + path "versions.yml" , emit: versions + + tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped + tuple val(meta), path("*.bed") , optional:true, emit: bed + tuple val(meta), path("*.bdg") , optional:true, emit: bdg when: task.ext.when == null || task.ext.when @@ -24,32 +26,43 @@ process MACS2_CALLPEAK { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def format = meta.single_end ? 'BAM' : 'BAMPE' + def args_list = args.tokenize() + def format = meta.single_end ? 'BAM' : 'BAMPE' + def control = controlbam ? "--control $controlbam" : '' + if(args_list.contains('--format')){ + def id = args_list.findIndexOf{args_i -> args_i=='--format'} + format = args_list[id+1] + args_list.remove(id+1) + args_list.remove(id) + } """ - macs2 callpeak \\ - $args \\ - -g $gsize \\ - -f $format \\ - -t $bam \\ - -n $prefix \\ - --outdir . + macs2 \\ + callpeak \\ + ${args_list.join(' ')} \\ + --gsize $macs2_gsize \\ + --format $format \\ + --name $prefix \\ + --treatment $ipbam \\ + $control cat <<-END_VERSIONS > versions.yml "${task.process}": - macs2: \$(macs2 --version 2>&1 | sed 's/macs2 //') + macs2: \$(macs2 --version | sed -e "s/macs2 //g") END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_peaks.narrowPeak - touch ${prefix}_peaks.xls - touch ${prefix}_summits.bed + touch ${prefix}.gappedPeak + touch ${prefix}.bed + touch ${prefix}.bdg + touch ${prefix}.narrowPeak + touch ${prefix}.xls cat <<-END_VERSIONS > versions.yml "${task.process}": - macs2: 2.2.9.1 + macs3: \$(macs3 --version | sed -e "s/macs3 //g") END_VERSIONS """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml new file mode 100644 index 0000000..4e354fa --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/meta.yml @@ -0,0 +1,106 @@ +name: macs2_callpeak +description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments +keywords: + - alignment + - atac-seq + - chip-seq + - peak-calling +tools: + - macs2: + description: Model Based Analysis for ChIP-Seq data + documentation: https://docs.csc.fi/apps/macs2/ + tool_dev_url: https://github.com/macs3-project/MACS + doi: "10.1101/496521" + licence: ["BSD"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ipbam: + type: file + description: The ChIP-seq treatment file + ontologies: [] + - controlbam: + type: file + description: The control file + ontologies: [] + - macs2_gsize: + type: string + description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' + for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' + for fruitfly (1.2e8) +output: + peak: + - - meta: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + - "*.{narrowPeak,broadPeak}": + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + xls: + - - meta: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + - "*.xls": + type: file + description: xls file containing annotated peaks + pattern: "*.xls" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + gapped: + - - meta: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + - "*.gappedPeak": + type: file + description: Optional BED file containing gapped peak + pattern: "*.gappedPeak" + ontologies: [] + bed: + - - meta: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + - "*.bed": + type: file + description: Optional BED file containing peak summits locations for every + peak + pattern: "*.bed" + ontologies: [] + bdg: + - - meta: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + ontologies: [] + - "*.bdg": + type: file + description: Optional bedGraph files for input and treatment input samples + pattern: "*.bdg" + ontologies: [] +authors: + - "@ntoda03" + - "@JoseEspinosa" + - "@jianhong" +maintainers: + - "@ntoda03" + - "@JoseEspinosa" + - "@jianhong" diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config new file mode 100644 index 0000000..17a7d3e --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bam.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS2_CALLPEAK' { + ext.args = '--qval 0.1' + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config new file mode 100644 index 0000000..aeba9a9 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/bed.config @@ -0,0 +1,5 @@ +process { + withName: 'MACS2_CALLPEAK' { + ext.args = '--format BED --qval 10 --nomodel --extsize 200' + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test new file mode 100644 index 0000000..b9ebc06 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test @@ -0,0 +1,125 @@ +nextflow_process { + + name "Test Process MACS2_CALLPEAK" + script "../main.nf" + process "MACS2_CALLPEAK" + + tag "modules" + tag "modules_nfcore" + tag "macs2" + tag "macs2/callpeak" + + test("homo_sapiens - callpeak - bed") { + + when { + config "./bed.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bed/alz.ccs.fl.NEB_5p--NEB_Clontech_3p.flnc.clustered.singletons.merged.aligned_tc.bed', checkIfExists: true) ], + [] + ] + input[1] = 4000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("homo_sapiens - callpeak - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("homo_sapiens - callpeak - control - bam") { + + when { + config "./bam.config" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.name.sorted.bam', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.name.sorted.bam', checkIfExists: true) ] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ], + [] + ] + input[1] = 40000 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap new file mode 100644 index 0000000..a9aa3ce --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/macs2/callpeak/tests/main.nf.test.snap @@ -0,0 +1,222 @@ +{ + "homo_sapiens - callpeak - bam": { + "content": [ + { + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,26f0f97b6c14dbca129e947a58067c82" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,2e4da1c1704595e12aaf99cc715ad70c" + ] + ], + "versions": [ + "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,dd0cbdd9520b150b3dd5f7bede0d4a1e" + ] + ] + }, + { + "MACS2_CALLPEAK": { + "macs2": "2.2.9.1" + } + } + ], + "timestamp": "2026-02-16T13:06:13.194555144", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - bam - stub": { + "content": [ + { + "bdg": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bdg:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gapped": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gappedPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test.narrowPeak:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b0ab3b36d39f9851effaf8b0d5cc0b92" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test.xls:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + }, + { + "MACS2_CALLPEAK": { + "macs3": null + } + } + ], + "timestamp": "2026-02-16T13:06:28.267130581", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - callpeak - control - bam": { + "content": [ + { + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,4f3c7c53a1d730d90d1b3dd9d3197af4" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,653e1108cc57ca07d0f60fc0f4fb8ba3" + ] + ], + "versions": [ + "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,ba5c031a290fc98828d7a3b9320863ac" + ] + ] + }, + { + "MACS2_CALLPEAK": { + "macs2": "2.2.9.1" + } + } + ], + "timestamp": "2026-02-16T13:06:20.925967923", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - callpeak - bed": { + "content": [ + { + "bdg": [ + + ], + "bed": [ + [ + { + "id": "test", + "single_end": false + }, + "test_summits.bed:md5,28833eeb7816688f0d698f51670be946" + ] + ], + "gapped": [ + + ], + "peak": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.narrowPeak:md5,10e7d4747f8a2513e5ebb04856a51673" + ] + ], + "versions": [ + "versions.yml:md5,ba6bf9efdccff6f86c722ce9b61ce75e" + ], + "xls": [ + [ + { + "id": "test", + "single_end": false + }, + "test_peaks.xls:md5,0b7c5a46179fe9d3f61c8dbc192a3c3d" + ] + ] + }, + { + "MACS2_CALLPEAK": { + "macs2": "2.2.9.1" + } + } + ], + "timestamp": "2026-02-16T13:06:05.333162919", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml b/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml new file mode 100644 index 0000000..009874d --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::multiqc=1.33 diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf b/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf index b3a9eba..5376aea 100644 --- a/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/main.nf @@ -1,54 +1,50 @@ process MULTIQC { + tag "${meta.id}" label 'process_single' - conda "bioconda::multiqc=1.19" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : - 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' + : 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b'}" input: - path multiqc_files, stageAs: "?/*" - path multiqc_config - path extra_multiqc_config - path multiqc_logo + tuple val(meta), path(multiqc_files, stageAs: "?/*"), path(multiqc_config, stageAs: "?/*"), path(multiqc_logo), path(replace_names), path(sample_names) output: - path "*multiqc_report.html", emit: report - path "*_data", emit: data - path "*_plots", emit: plots, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("*.html"), emit: report + tuple val(meta), path("*_data"), emit: data + tuple val(meta), path("*_plots"), emit: plots, optional: true + // MultiQC should not push its versions to the `versions` topic. Its input depends on the versions topic to be resolved thus outputting to the topic will let the pipeline hang forever + tuple val("${task.process}"), val('multiqc'), eval('multiqc --version | sed "s/.* //g"'), emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def config = multiqc_config ? "--config $multiqc_config" : "" - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : "" - def logo = multiqc_logo ? "--cl-config 'custom_logo: $multiqc_logo'" : "" + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' + def config = multiqc_config ? multiqc_config instanceof List ? "--config ${multiqc_config.join(' --config ')}" : "--config ${multiqc_config}" : "" + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ - $args \\ - $config \\ - $extra_config \\ - $logo \\ + ${args} \\ + ${config} \\ + ${prefix} \\ + ${logo} \\ + ${replace} \\ + ${samples} \\ . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed 's/multiqc, version //' ) - END_VERSIONS """ stub: """ - touch multiqc_report.html mkdir multiqc_data - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: 1.19 - END_VERSIONS + touch multiqc_data/.stub + mkdir multiqc_plots + touch multiqc_plots/.stub + touch multiqc_report.html """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml b/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml new file mode 100644 index 0000000..ef434a9 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/meta.yml @@ -0,0 +1,133 @@ +name: multiqc +description: Aggregate results from bioinformatics analyses across many samples + into a single report +keywords: + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report +tools: + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: + - "GPL-3.0-or-later" + identifier: biotools:multiqc +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 +output: + report: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.html": + type: file + description: MultiQC report file + pattern: ".html" + ontologies: [] + data: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_plots" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - multiqc: + type: string + description: The tool name + - multiqc --version | sed "s/.* //g": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" +containers: + conda: + linux/amd64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock + linux/arm64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock + docker: + linux/amd64: + build_id: bd-ee7739d47738383b_1 + name: community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b + scanId: sc-6ddec592dcadd583_4 + linux/arm64: + build_id: bd-58d7dee710ab3aa8_1 + name: community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8 + scanId: sc-a04c42273e34c55c_2 + singularity: + linux/amd64: + build_id: bd-e3576ddf588fa00d_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d + linux/arm64: + build_id: bd-2537ca5f8445e3c2_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2 diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config new file mode 100644 index 0000000..b30b135 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/custom_prefix.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = "custom_prefix" + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 0000000..0e422ea --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,161 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + config "./nextflow.config" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } + } + + test("sarscov2 single-end [fastqc] - custom prefix") { + config "./custom_prefix.config" + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true), + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + file(process.out.plots[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } + } + + test("sarscov2 single-end [fastqc] [multiple configs]") { + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [ + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true), + file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true) + ], + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.data[0][1]).name, + file(process.out.plots[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") + }).match() } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = channel.of([ + [ id: 'FASTQC' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true), + [], + [], + [], + [] + ]) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 0000000..c022701 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,130 @@ +{ + "sarscov2 single-end [fastqc] [multiple configs]": { + "content": [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T20:21:35.851707" + }, + "sarscov2 single-end [fastqc]": { + "content": [ + "multiqc_report.html", + "multiqc_data", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:10:36.019680076" + }, + "sarscov2 single-end [fastqc] - stub": { + "content": [ + { + "data": [ + [ + { + "id": "FASTQC" + }, + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "plots": [ + [ + { + "id": "FASTQC" + }, + [ + ".stub:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "report": [ + [ + { + "id": "FASTQC" + }, + "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:14:39.789193051" + }, + "sarscov2 single-end [fastqc] [config]": { + "content": [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:21:29.116129274" + }, + "sarscov2 single-end [fastqc] - custom prefix": { + "content": [ + "custom_prefix.html", + "custom_prefix_data", + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-26T15:10:43.419877592" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 0000000..c537a6a --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 0000000..b4ac4fe --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/picard + - bioconda::picard=3.4.0 diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf index de90d7f..17bcf27 100644 --- a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/main.nf @@ -1,22 +1,23 @@ process PICARD_MARKDUPLICATES { - tag "$meta.id" + tag "${meta.id}" label 'process_medium' - conda "bioconda::picard=3.1.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : - 'biocontainers/picard:3.1.1--hdfd78af_0' }" + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/08/0861295baa7c01fc593a9da94e82b44a729dcaf8da92be8e565da109aa549b25/data' + : 'community.wave.seqera.io/library/picard:3.4.0--e9963040df0a9bf6'}" input: - tuple val(meta), path(bam) - path fasta - path fasta_fai + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: - tuple val(meta), path("*.markdup.bam"), emit: bam - tuple val(meta), path("*.markdup.bam.bai"), emit: bai - tuple val(meta), path("*.metrics.txt"), emit: metrics - path "versions.yml", emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.metrics.txt"), emit: metrics + tuple val("${task.process}"), val('picard'), eval("picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'"), topic: versions, emit: versions_picard when: task.ext.when == null || task.ext.when @@ -24,38 +25,39 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3072 if (!task.memory) { - log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB' - } else { - avail_mem = (task.memory.mega*0.8).intValue() + log.info('[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.') + } + else { + avail_mem = (task.memory.mega * 0.8).intValue() + } + + if ("${reads}" == "${prefix}.${suffix}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") } """ picard \\ -Xmx${avail_mem}M \\ MarkDuplicates \\ - $args \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.markdup.bam \\ - --METRICS_FILE ${prefix}.metrics.txt \\ - --CREATE_INDEX true - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: \$(picard MarkDuplicates --version 2>&1 | grep -o 'Version:[0-9.]*' | sed 's/Version://') - END_VERSIONS + ${args} \\ + --INPUT ${reads} \\ + --OUTPUT ${prefix}.${suffix} \\ + ${reference} \\ + --METRICS_FILE ${prefix}.metrics.txt """ stub: def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("${reads}" == "${prefix}.${suffix}") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } """ - touch ${prefix}.markdup.bam - touch ${prefix}.markdup.bam.bai + touch ${prefix}.${suffix} + touch ${prefix}.${suffix}.bai touch ${prefix}.metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - picard: 3.1.1 - END_VERSIONS """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml new file mode 100644 index 0000000..aa0ddbd --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/meta.yml @@ -0,0 +1,124 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] + identifier: biotools:picard_tools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file, required for CRAM input + pattern: "*.{fasta,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" + ontologies: [] +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be + passed as a flag + pattern: "*.{bai}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file + pattern: "*.{cram}" + ontologies: [] + metrics: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.metrics.txt": + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + ontologies: [] + versions_picard: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - picard: + type: string + description: The tool name + - "picard MarkDuplicates --version 2>&1 | sed -n 's/.*Version://p'": + type: string + description: The command used to generate the version of the tool +authors: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 0000000..4d00645 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + path(process.out.metrics.get(0).get(1)).readLines()[0..2], + process.out.findAll { key, val -> key.startsWith("versions") }) + .match() } + ) + } + } + + test("sarscov2 [unsorted bam] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } + + test("sarscov2 [sorted bam] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } + + test("homo_sapiens [cram] - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 0000000..4ea479a --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "sarscov2 [sorted bam] - stub": { + "content": [ + { + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:43:13.544887277", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 [unsorted bam] - stub": { + "content": [ + { + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:43:06.193033248", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 [unsorted bam]": { + "content": [ + "test.md.bam", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.md.bam --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:42:40.574463587", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 [sorted bam]": { + "content": [ + "test.md.bam", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.md.bam --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:42:49.374645492", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens [cram]": { + "content": [ + "test.md.cram", + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.md.cram --METRICS_FILE test.md.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_DUP_STRATEGY FLOW_QUALITY_SUM_STRATEGY --FLOW_USE_END_IN_UNPAIRED_READS false --FLOW_USE_UNPAIRED_CLIPPED_END false --FLOW_UNPAIRED_END_UNCERTAINTY 0 --FLOW_UNPAIRED_START_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ], + { + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:42:59.07843756", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens [cram] - stub": { + "content": [ + { + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.cram.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_picard": [ + [ + "PICARD_MARKDUPLICATES", + "picard", + "3.4.0" + ] + ] + } + ], + "timestamp": "2026-02-19T17:43:20.676018462", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 0000000..f8dd0f1 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.md" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf index bb37a19..97bfb57 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/main.nf @@ -2,42 +2,48 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(fasta) + tuple val(meta), path(fasta), path(fai) + val get_sizes output: - tuple val(meta), path("*.fai"), emit: fai - tuple val(meta), path("*.gzi"), emit: gzi, optional: true - path "versions.yml", emit: versions + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' """ - samtools faidx \\ - $args \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS + samtools \\ + faidx \\ + $fasta \\ + $args + + ${get_sizes_command} """ stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' """ + ${fastacmd} touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS + ${get_sizes_command} """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 0000000..80aae1d --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,112 @@ +name: samtools_faidx +description: Index FASTA file, and optionally generate a file of chromosome + sizes +keywords: + - index + - fasta + - faidx + - chromosome +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) +output: + fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + ontologies: [] + sizes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: eval + description: The command used to generate the version of the tool +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@maxulysse" + - "@phue" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 0000000..9a86db8 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,253 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + config "./nextflow.config" + + test("test_samtools_faidx") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_fasta") { + + when { + params { + module_args = 'MT192765.1 -o extract.fa' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + options "-stub" + when { + params { + module_args = '-o extract.fa' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + options "-stub" + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true), + [] + ] + input[1] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match()} + ) + } + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 0000000..4169744 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,352 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:12.541649151" + }, + "test_samtools_faidx_get_sizes_bgzip - stub": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:41:44.040426987" + }, + "test_samtools_faidx_get_sizes": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:47:03.653912015" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:50:04.023566795" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "fa": [ + [ + { + "id": "test" + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:23.529404162" + }, + "test_samtools_faidx_get_sizes - stub": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:41:39.039834304" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "fa": [ + [ + { + "id": "test" + }, + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:28.961701609" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:34.471028474" + }, + "test_samtools_faidx_get_sizes_bgzip": { + "content": [ + { + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-10T15:39:45.439016495" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 0000000..202c036 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = params.module_args + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf index 38465a3..0cfb7e8 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/main.nf @@ -1,46 +1,47 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.flagstat"), emit: flagstat - path "versions.yml", emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - samtools flagstat \\ - $args \\ - -@ $task.cpus \\ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ $bam \\ > ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.flagstat - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS + cat <<-END_FLAGSTAT > ${prefix}.flagstat + 1000000 + 0 in total (QC-passed reads + QC-failed reads) + 0 + 0 secondary + 0 + 0 supplementary + 0 + 0 duplicates + 900000 + 0 mapped (90.00% : N/A) + 1000000 + 0 paired in sequencing + 500000 + 0 read1 + 500000 + 0 read2 + 800000 + 0 properly paired (80.00% : N/A) + 850000 + 0 with mate mapped to a different chr + 50000 + 0 with mate mapped to a different chr (mapQ>=5) + END_FLAGSTAT """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 0000000..8caa1bc --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,75 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each + FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: [] +output: + flagstat: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.flagstat": + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 0000000..3b648a3 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 0000000..f5c882d --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "BAM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "1": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:14:30.820969684" + }, + "BAM": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FLAGSTAT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:14:25.581619424" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf index 7b76f0d..d5b70a7 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/main.nf @@ -2,44 +2,37 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: tuple val(meta), path(bam), path(bai) output: tuple val(meta), path("*.idxstats"), emit: idxstats - path "versions.yml", emit: versions + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + """ - samtools idxstats \\ - $args \\ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ > ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" + """ touch ${prefix}.idxstats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 0000000..fd15384 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,75 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: [] +output: + idxstats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.idxstats": + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 0000000..c990cd5 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("bam - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.idxstats, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + }} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 0000000..19a54c7 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,56 @@ +{ + "bam - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-02T16:21:46.333090477" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_IDXSTATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-02T16:21:41.063422521" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf index 343f905..e2a0e56 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/main.nf @@ -2,17 +2,19 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input) output: - tuple val(meta), path("*.bai"), emit: bai - path "versions.yml", emit: versions + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when @@ -20,21 +22,18 @@ process SAMTOOLS_INDEX { script: def args = task.ext.args ?: '' """ - samtools index $args -@ $task.cpus $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${bam}.bai - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS + touch ${input}.${extension} """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..c6d4ce2 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,92 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: + - "MIT" + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file + ontologies: [] +output: + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools version | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 0000000..0ed260e --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 0000000..c96cec8 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.crai, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.csi, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 0000000..afc8a1f --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,156 @@ +{ + "csi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:52:10.030187" + }, + "crai - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:59.125484" + }, + "bai - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:47.277042" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:35.758735" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:26.561965" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_INDEX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-28T17:51:15.299035" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf index 3215395..6b5aa31 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/main.nf @@ -2,46 +2,77 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(bam) - path fasta + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + val index_format output: - tuple val(meta), path("*.sorted.bam"), emit: bam - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + output_file = index_format ? "${prefix}.${extension}##idx##${prefix}.${extension}.${index_format} --write-index" : "${prefix}.${extension}" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools cat \\ + ${bam} \\ + | \\ samtools sort \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.sorted.bam \\ - $bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${output_file} \\ + - + """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + index = index_format ? "touch ${prefix}.${extension}.${index_format}" : "" + """ - touch ${prefix}.sorted.bam + touch ${prefix}.${extension} + ${index} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 0000000..6996830 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,142 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true + ontologies: [] + - index_format: + type: string + description: Index format to use (optional) + pattern: "bai|csi|crai" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + ontologies: [] + sam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.sam": + type: file + description: Sorted SAM file + pattern: "*.{sam}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.bai": + type: file + description: BAM index file (optional) + pattern: "*.bai" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 0000000..df47bb2 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,332 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam_no_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_bai_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_csi_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam bai index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam csi index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 0000000..4e618fa --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,296 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:01.171084" + }, + "bam_csi_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,72ca1dff5344a5e5e6b892fe5f6b134d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,01394e702c729cb478df914ffaf9f7f8" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:00.961675" + }, + "bam - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:12.154354" + }, + "multiple bam bai index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,3ffa2affc29f0aa6e7b36dded84625fe" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:25.488622" + }, + "cram - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:28.485045" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cd4eb0077f25e9cff395366b8883dd1f" + ] + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:13.168476" + }, + "multiple bam - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:21.628088" + }, + "bam_no_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,26b27d1f9bcb61c25da21b562349784e" + ] + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:45:47.139418" + }, + "multiple bam csi index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,295503ba5342531a3310c33ad0efbc22" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:51.5531" + }, + "bam_bai_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cae7564cb83bb4a5911205bf94124b54" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,50dd467c169545a4d5d1f709f7e986e0" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:45:52.796936" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 0000000..723f62b --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 0000000..3a8c018 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000..89e12a6 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf index 413e8b2..57d2468 100644 --- a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/main.nf @@ -1,48 +1,40 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda "bioconda::samtools=1.19" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19--h50ea8bc_0' : - 'biocontainers/samtools:1.19--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" input: - tuple val(meta), path(bam), path(bai) - path fasta + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats - path "versions.yml", emit: versions + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ - samtools stats \\ - $args \\ - $reference \\ - $bam \\ + samtools \\ + stats \\ + ${args} \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ > ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(samtools --version | head -n1 | sed 's/samtools //') - END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.stats - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: 1.19 - END_VERSIONS """ } diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 0000000..5c59cce --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,88 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + ontologies: [] +output: + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 0000000..5bc8930 --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 0000000..94d981b --- /dev/null +++ b/pipelines/nf-atacseq/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,174 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:18.460724" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:30.245839" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:39.041649" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:26:55.988241" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/nextflow.config b/pipelines/nf-atacseq/nextflow.config index e844cbf..1357933 100644 --- a/pipelines/nf-atacseq/nextflow.config +++ b/pipelines/nf-atacseq/nextflow.config @@ -15,6 +15,8 @@ plugins { manifest { name = 'wasp2/nf-atacseq' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'ATAC-seq Allelic Imbalance Pipeline with WASP2 mapping bias correction' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -60,11 +62,6 @@ params { skip_peak_calling = false // Require peaks parameter if true skip_multiqc = false - // Resource limits - max_cpus = 16 - max_memory = '128.GB' - max_time = '240.h' - // Institutional config support (nf-core compatible) custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master' custom_config_version = 'master' @@ -73,17 +70,18 @@ params { help = false version = false tracedir = "${params.outdir}/pipeline_info" + validate_params = true } // Load configuration files includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -// Load nf-core institutional configs +// Load nf-core custom profiles from https://github.com/nf-core/configs try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}") } // Execution profiles @@ -97,7 +95,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { docker.enabled = true @@ -153,15 +150,15 @@ profiles { def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { enabled = true @@ -184,7 +181,7 @@ process { withName: 'WASP2_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_VARIANTS|WASP2_FIND_IMBALANCE' { container = wasp2_container } - withName: 'BWA_MEM' { + withName: 'BWA_INDEX|BWA_MEM' { container = bwa_samtools_container } withName: 'SAMTOOLS_INDEX|SAMTOOLS_FAIDX|SAMTOOLS_STATS|SAMTOOLS_FLAGSTAT|SAMTOOLS_IDXSTATS|SAMTOOLS_SORT' { @@ -196,32 +193,33 @@ process { process.shell = ['/bin/bash', '-euo', 'pipefail'] // Function to ensure resources don't exceed limits +// Resource capping is handled by process.resourceLimits in conf/base.config. +// This function is retained for backward compatibility with process label closures. def check_max(obj, type) { if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println "WARNING: Invalid max_memory '${params.max_memory}', using default" + def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid memory config: ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println "WARNING: Invalid max_time '${params.max_time}', using default" + def max = (params.max_time as nextflow.util.Duration) ?: 240.h + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid time config: ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println "WARNING: Invalid max_cpus '${params.max_cpus}', using default" + try { return Math.min(obj, (params.max_cpus ?: 16) as int) } + catch (Exception e) { + log.warn "Invalid CPU config: ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-atacseq/nextflow_schema.json b/pipelines/nf-atacseq/nextflow_schema.json index d36e09b..57cfc51 100644 --- a/pipelines/nf-atacseq/nextflow_schema.json +++ b/pipelines/nf-atacseq/nextflow_schema.json @@ -226,32 +226,25 @@ } } }, - "max_job_request_options": { - "title": "Max resource options", + "institutional_config_options": { + "title": "Institutional config options", "type": "object", - "fa_icon": "fas fa-server", - "description": "Set the maximum resource limits for pipeline processes.", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", "properties": { - "max_cpus": { - "type": "integer", - "default": 16, - "minimum": 1, - "description": "Maximum number of CPUs that can be requested for any single process.", - "fa_icon": "fas fa-microchip" - }, - "max_memory": { + "custom_config_base": { "type": "string", - "default": "128.GB", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "description": "Maximum amount of memory that can be requested for any single process.", - "fa_icon": "fas fa-memory" + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "description": "Base URL for loading nf-core custom config profiles.", + "hidden": true, + "fa_icon": "fas fa-users-cog" }, - "max_time": { + "custom_config_version": { "type": "string", - "default": "240.h", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d)\\.?\\s*)+$", - "description": "Maximum amount of time that can be requested for any single process.", - "fa_icon": "fas fa-clock" + "default": "master", + "description": "Git tag/branch for nf-core custom config profiles.", + "hidden": true, + "fa_icon": "fas fa-users-cog" } } }, @@ -277,9 +270,16 @@ }, "tracedir": { "type": "string", - "default": "${params.outdir}/pipeline_info", + "default": "./results/pipeline_info", "description": "Directory to keep pipeline Nextflow trace, timeline, report, and DAG files.", "fa_icon": "fas fa-folder" + }, + "validate_params": { + "type": "boolean", + "default": true, + "description": "Boolean whether to validate parameters against the schema at runtime.", + "fa_icon": "fas fa-check-square", + "hidden": true } } } @@ -292,7 +292,7 @@ { "$ref": "#/definitions/aligner_options" }, { "$ref": "#/definitions/wasp2_options" }, { "$ref": "#/definitions/processing_options" }, - { "$ref": "#/definitions/max_job_request_options" }, + { "$ref": "#/definitions/institutional_config_options" }, { "$ref": "#/definitions/generic_options" } ] } diff --git a/pipelines/nf-atacseq/nf-test.config b/pipelines/nf-atacseq/nf-test.config index 32f4307..d1d396c 100644 --- a/pipelines/nf-atacseq/nf-test.config +++ b/pipelines/nf-atacseq/nf-test.config @@ -11,5 +11,6 @@ config { copy "modules/**" copy "subworkflows/**" copy "workflows/**" + copy "tests/**" } } diff --git a/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf b/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf index 965537a..afcc968 100644 --- a/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf +++ b/pipelines/nf-atacseq/subworkflows/local/prepare_genome/main.nf @@ -3,7 +3,7 @@ // include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main' -include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/index/main' +include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' workflow PREPARE_GENOME { @@ -32,9 +32,8 @@ workflow PREPARE_GENOME { ch_fasta_fai = Channel.fromPath(params.fasta_fai, checkIfExists: true) .map { fai -> [[id: file(params.fasta).baseName], fai] } } else { - SAMTOOLS_FAIDX ( ch_fasta ) + SAMTOOLS_FAIDX ( ch_fasta.map { meta, fasta -> [meta, fasta, []] }, false ) ch_fasta_fai = SAMTOOLS_FAIDX.out.fai - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) } // @@ -47,7 +46,6 @@ workflow PREPARE_GENOME { } else { BWA_INDEX ( ch_fasta ) ch_bwa_index = BWA_INDEX.out.index.map { meta, index -> index } - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) } } @@ -61,7 +59,6 @@ workflow PREPARE_GENOME { } else { BOWTIE2_BUILD ( ch_fasta ) ch_bowtie2_index = BOWTIE2_BUILD.out.index.map { meta, index -> index } - ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } } diff --git a/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf b/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf index 298d74c..908d3f6 100644 --- a/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf +++ b/pipelines/nf-atacseq/subworkflows/local/wasp_mapping/main.nf @@ -26,6 +26,10 @@ workflow WASP_MAPPING { main: ch_versions = Channel.empty() + // Wrap plain path channels with meta for nf-core modules + ch_index_meta = ch_aligner_index.map { index -> [[id: 'genome'], index] } + ch_fasta_meta = ch_fasta.map { fasta -> [[id: 'genome'], fasta] } + // // MODULE: Generate reads with swapped alleles for remapping // @@ -53,29 +57,26 @@ workflow WASP_MAPPING { if (aligner == 'bwa') { BWA_MEM( ch_remap_reads, - ch_aligner_index, - ch_fasta, + ch_index_meta, + ch_fasta_meta, true // sort_bam ) ch_remapped_raw = BWA_MEM.out.bam - ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) } else { BOWTIE2_ALIGN( ch_remap_reads, - ch_aligner_index, - ch_fasta, + ch_index_meta, + ch_fasta_meta, false, // save_unaligned true // sort_bam ) - ch_remapped_raw = BOWTIE2_ALIGN.out.aligned - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + ch_remapped_raw = BOWTIE2_ALIGN.out.bam } // // MODULE: Index remapped BAM (aligners already sort when sort_bam=true) // SAMTOOLS_INDEX(ch_remapped_raw) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) // Combine BAM with index ch_remapped = ch_remapped_raw diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf index 03e8241..c33064e 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -1,49 +1,46 @@ // -// Mark duplicates with Picard and run BAM stats +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats // include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_MARKDUPLICATES_PICARD { + take: - ch_bam // channel: [ val(meta), path(bam) ] - ch_fasta // channel: path(fasta) - ch_fai // channel: path(fasta_fai) + ch_reads // channel: [ val(meta), path(reads) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai) ] main: - ch_versions = Channel.empty() - - // - // Mark duplicates with Picard - // - PICARD_MARKDUPLICATES ( - ch_bam, - ch_fasta, - ch_fai - ) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - - // - // Join BAM and BAI for stats - // - ch_bam_bai = PICARD_MARKDUPLICATES.out.bam - .join(PICARD_MARKDUPLICATES.out.bai, by: [0], failOnMismatch: true) - - // - // Run BAM stats - // - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) - emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] - bai = PICARD_MARKDUPLICATES.out.bai // channel: [ val(meta), path(bai) ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] + ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + + SAMTOOLS_INDEX ( ch_markdup ) - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + ch_reads_index = ch_markdup + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, reads, bai, crai, csi -> + if (bai) [ meta, reads, bai ] + else if (crai) [ meta, reads, crai ] + else [ meta, reads, csi ] + } - versions = ch_versions // channel: path(versions.yml) + BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] } diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml index 1b08bb0..433d35b 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -1,78 +1,71 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "bam_markduplicates_picard" -description: Mark duplicates with Picard and collect BAM statistics +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats keywords: + - markduplicates - bam - - duplicates - - picard - - dedup - - qc + - sam + - cram components: - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat - bam_stats_samtools input: - - ch_bam: - type: channel + - ch_reads: description: | - Channel containing BAM files to deduplicate - Structure: [ val(meta), path(bam) ] - pattern: "*.bam" + Sequence reads in BAM/CRAM/SAM format + Structure: [ val(meta), path(reads) ] - ch_fasta: - type: channel description: | - Channel containing reference FASTA - Structure: path(fasta) - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - ch_fai: - type: channel + Reference genome fasta file required for CRAM input + Structure: [ path(fasta) ] + - ch_fasta: description: | - Channel containing FASTA index - Structure: path(fasta.fai) - pattern: "*.fai" + Index of the reference genome fasta file + Structure: [ path(fai) ] output: - bam: - type: channel description: | - Deduplicated BAM file + processed BAM/SAM file Structure: [ val(meta), path(bam) ] - pattern: "*.markdup.bam" - bai: - type: channel description: | - BAM index file + BAM/SAM samtools index Structure: [ val(meta), path(bai) ] - pattern: "*.bai" - - metrics: - type: channel + - cram: + description: | + processed CRAM file + Structure: [ val(meta), path(cram) ] + - crai: + description: | + CRAM samtools index + Structure: [ val(meta), path(crai) ] + - csi: description: | - Picard MarkDuplicates metrics - Structure: [ val(meta), path(metrics) ] - pattern: "*.metrics.txt" + CSI samtools index + Structure: [ val(meta), path(csi) ] - stats: - type: channel description: | - Samtools stats output + File containing samtools stats output Structure: [ val(meta), path(stats) ] - pattern: "*.stats" - flagstat: - type: channel description: | - Samtools flagstat output + File containing samtools flagstat output Structure: [ val(meta), path(flagstat) ] - pattern: "*.flagstat" - idxstats: - type: channel description: | - Samtools idxstats output + File containing samtools idxstats output Structure: [ val(meta), path(idxstats) ] - pattern: "*.idxstats" - versions: - type: channel description: | - Version information + Files containing software versions Structure: [ path(versions.yml) ] - pattern: "versions.yml" authors: - - "@jjaureguy760" + - "@dmarron" + - "@drpatelh" maintainers: - - "@jjaureguy760" + - "@dmarron" + - "@drpatelh" diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test new file mode 100644 index 0000000..816ff3e --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_workflow { + + name "Test Workflow BAM_MARKDUPLICATES_PICARD" + script "../main.nf" + workflow "BAM_MARKDUPLICATES_PICARD" + config "./nextflow.config" + + tag "picard" + tag "picard/markduplicates" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_markduplicates_picard" + tag "subworkflows/bam_markduplicates_picard" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/index" + tag "samtools/stats" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("97") }, + { assert snapshot( + path(workflow.out.bam[0][1]), + path(workflow.out.bai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]) + ).match() } + ) + } + } + + test("homo_sapiens - cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("0.999986") }, + { assert snapshot( + file(workflow.out.cram[0][1]).name, + path(workflow.out.crai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]) + ).match() } + ) + } + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - cram - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap new file mode 100644 index 0000000..bfa595e --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -0,0 +1,292 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.md.cram", + "test.md.cram.crai:md5,b641c19be42d4841ec7155c686b70f39", + "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", + "test.stats:md5,8ec963e4ee888c8cc9d41348cedd5106" + ], + "timestamp": "2026-02-19T19:00:47.4418381", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "timestamp": "2026-02-19T19:00:56.802484512", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test" + }, + "test.md.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + [ + { + "id": "test" + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "8": [ + [ + { + "id": "test" + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test" + }, + "test.md.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test" + }, + "test.md.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test" + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test" + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "metrics": [ + [ + { + "id": "test" + }, + "test.md.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "timestamp": "2026-02-19T19:01:05.884074864", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "sarscov2 - bam": { + "content": [ + "test.md.bam:md5,8aa8fc57298588fed0b03aacddd7ea77", + "test.md.bam.bai:md5,8973dd987f3ac6c352716ef89139c567", + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783", + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2", + "test.stats:md5,950c07a54b20e443105a5391400a4c92" + ], + "timestamp": "2026-02-19T19:00:36.539092187", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config new file mode 100644 index 0000000..2427cc4 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_markduplicates_picard/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'PICARD_MARKDUPLICATES' { + ext.prefix = { "${meta.id}.md" } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index 42fa4d6..312c2d2 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -1,5 +1,5 @@ // -// Sort, index BAM file and run samtools stats, flagstat +// Sort, index BAM file and run samtools stats, flagstat and idxstats // include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' @@ -8,43 +8,35 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_SORT_STATS_SAMTOOLS { take: - ch_bam // channel: [ val(meta), path(bam) ] - ch_fasta // channel: path(fasta) + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: - ch_versions = Channel.empty() + SAMTOOLS_SORT ( ch_bam, ch_fasta, '' ) - // - // Sort BAM file - // - SAMTOOLS_SORT ( ch_bam, ch_fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) - - // - // Index sorted BAM file - // SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - // - // Join BAM and BAI for stats - // - ch_bam_bai = SAMTOOLS_SORT.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true) + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } - // - // Run samtools stats and flagstat - // BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), path(bam) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - versions = ch_versions // channel: path(versions.yml) + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] } diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml index 08b172a..e01f9cc 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -1,66 +1,70 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "bam_sort_stats_samtools" -description: Sort BAM files and collect statistics with samtools +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file keywords: - - bam - sort - - statistics - - samtools + - bam + - sam + - cram components: - samtools/sort - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat - bam_stats_samtools input: - - ch_bam: - type: channel - description: | - Channel containing unsorted BAM files - Structure: [ val(meta), path(bam) ] - pattern: "*.bam" - - ch_fasta: - type: channel + - meta: + type: map description: | - Channel containing reference FASTA for stats calculation - Structure: path(fasta) - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" -output: + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - bam: - type: channel + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map description: | - Sorted BAM file - Structure: [ val(meta), path(bam) ] - pattern: "*.sorted.bam" + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" - bai: - type: channel - description: | - BAM index file - Structure: [ val(meta), path(bai) ] - pattern: "*.bai" + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - stats: - type: channel - description: | - Samtools stats output - Structure: [ val(meta), path(stats) ] - pattern: "*.stats" + type: file + description: File containing samtools stats output + pattern: "*.{stats}" - flagstat: - type: channel - description: | - Samtools flagstat output - Structure: [ val(meta), path(flagstat) ] - pattern: "*.flagstat" + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" - idxstats: - type: channel - description: | - Samtools idxstats output with per-chromosome counts - Structure: [ val(meta), path(idxstats) ] - pattern: "*.idxstats" + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" - versions: - type: channel - description: | - Version information - Structure: path(versions.yml) + type: file + description: File containing software versions pattern: "versions.yml" authors: - - "@jjaureguy760" + - "@drpatelh" + - "@ewels" maintainers: - - "@jjaureguy760" + - "@drpatelh" + - "@ewels" diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 0000000..c584128 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,132 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 0000000..f62d68c --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,288 @@ +{ + "test_bam_sort_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,1101fe711c4a389fdb5c4a1532107d1f" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:33:01.647190952" + }, + "test_bam_sort_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f26c554c244ee86c89d62ebed509fd95" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:33:08.706742267" + }, + "test_bam_sort_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:11:02.1412136" + }, + "test_bam_sort_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:11:09.165267895" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf index 7ee13e9..34e8fe1 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -1,32 +1,25 @@ // -// Run samtools stats, flagstat, and idxstats +// Run SAMtools stats, flagstat and idxstats // include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' -include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' workflow BAM_STATS_SAMTOOLS { take: - ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] - ch_fasta // channel: path(fasta) + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: - ch_versions = Channel.empty() - SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) SAMTOOLS_FLAGSTAT ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) SAMTOOLS_IDXSTATS ( ch_bam_bai ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) emit: stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: path(versions.yml) } diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml index b1a9700..809bf73 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -1,54 +1,43 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "bam_stats_samtools" -description: Run samtools stats, flagstat, and idxstats on BAM files +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file keywords: - - bam - statistics - - qc - - samtools + - counts + - bam + - sam + - cram components: - samtools/stats - - samtools/flagstat - samtools/idxstats + - samtools/flagstat input: - ch_bam_bai: - type: channel description: | - Channel containing BAM and BAI files + The input channel containing the BAM/CRAM and it's index Structure: [ val(meta), path(bam), path(bai) ] - pattern: "*.{bam,bai}" - ch_fasta: - type: channel description: | - Channel containing reference FASTA for stats calculation - Structure: path(fasta) - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + Reference genome fasta file + Structure: [ path(fasta) ] output: - stats: - type: channel description: | - Samtools stats output with alignment metrics + File containing samtools stats output Structure: [ val(meta), path(stats) ] - pattern: "*.stats" - flagstat: - type: channel description: | - Samtools flagstat output with flag counts + File containing samtools flagstat output Structure: [ val(meta), path(flagstat) ] - pattern: "*.flagstat" - idxstats: - type: channel description: | - Samtools idxstats output with per-chromosome counts - Structure: [ val(meta), path(idxstats) ] - pattern: "*.idxstats" + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] - versions: - type: channel description: | - Version information - Structure: path(versions.yml) - pattern: "versions.yml" + Files containing software versions + Structure: [ path(versions.yml) ] authors: - - "@jjaureguy760" + - "@drpatelh" maintainers: - - "@jjaureguy760" + - "@drpatelh" diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 0000000..2f32969 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,185 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats).match() } + ) + } + } + + test ("test_bam_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 0000000..9c8ff1b --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,305 @@ +{ + "test_bam_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:10:30.076183827" + }, + "test_bam_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:10:24.379362883" + }, + "test_bam_stats_samtools_paired_end_cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:10:35.91658956" + }, + "test_bam_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,7a05a22bdb17e8df6e8c2d100ff09a31" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:32:20.243663217" + }, + "test_bam_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,a391612b5ef5b181e854ccaad8c8a068" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:32:26.434187887" + }, + "test_bam_stats_samtools_paired_end_cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,2b0e31ab01b867a6ff312023ae03838d" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T11:32:32.441454186" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf index 4a42b76..8cbc514 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -3,56 +3,41 @@ // include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' workflow FASTQ_ALIGN_BOWTIE2 { take: - ch_reads // channel: [ val(meta), path(reads) ] - ch_index // channel: path(index) - ch_fasta // channel: path(fasta) + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta main: - ch_versions = Channel.empty() - // - // Align reads with Bowtie2 (outputs sorted BAM) - // - BOWTIE2_ALIGN ( - ch_reads, - ch_index, - ch_fasta, - false, // save_unaligned - true // sort_bam - ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + ch_versions = channel.empty() // - // Index BAM file + // Map reads with Bowtie2 // - SAMTOOLS_INDEX ( BOWTIE2_ALIGN.out.aligned ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + BOWTIE2_ALIGN ( ch_reads, ch_index, ch_fasta, save_unaligned, sort_bam ) // - // Join BAM and BAI + // Sort, index BAM file and run samtools stats, flagstat and idxstats // - ch_bam_bai = BOWTIE2_ALIGN.out.aligned - .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true) - - // - // Run BAM stats - // - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) emit: - bam = BOWTIE2_ALIGN.out.aligned // channel: [ val(meta), path(bam) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), path(log) ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - - versions = ch_versions // channel: path(versions.yml) + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), aligned ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] } diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml index 4434311..b18e405 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -1,79 +1,67 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "fastq_align_bowtie2" -description: Align reads with Bowtie2 and collect BAM statistics +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools keywords: - - alignment - - bowtie2 - - bam - - map - - fastq + - align + - fasta + - genome + - reference components: - bowtie2/align + - samtools/sort - samtools/index - - bam_stats_samtools + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - ch_reads: - type: channel + type: file description: | - Channel containing FASTQ reads - Structure: [ val(meta), path(reads) ] - pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. - ch_index: - type: channel + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean description: | - Channel containing Bowtie2 index files - Structure: path(index) - pattern: "*.bt2" - - ch_fasta: - type: channel + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean description: | - Channel containing reference FASTA - Structure: path(fasta) - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + Use samtools sort (true) or samtools view (false) + default: false + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs output: - bam: - type: channel - description: | - Aligned BAM file - Structure: [ val(meta), path(bam) ] - pattern: "*.bam" - - bai: - type: channel - description: | - BAM index file - Structure: [ val(meta), path(bai) ] - pattern: "*.bai" - - log_out: - type: channel - description: | - Bowtie2 alignment log - Structure: [ val(meta), path(log) ] - pattern: "*.log" - - stats: - type: channel - description: | - Samtools stats output - Structure: [ val(meta), path(stats) ] - pattern: "*.stats" - - flagstat: - type: channel - description: | - Samtools flagstat output - Structure: [ val(meta), path(flagstat) ] - pattern: "*.flagstat" - - idxstats: - type: channel - description: | - Samtools idxstats output - Structure: [ val(meta), path(idxstats) ] - pattern: "*.idxstats" + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" - versions: - type: channel - description: | - Version information - Structure: path(versions.yml) + type: file + description: File containing software versions pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" authors: - - "@jjaureguy760" + - "@drpatelh" maintainers: - - "@jjaureguy760" + - "@drpatelh" diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test new file mode 100644 index 0000000..6eca398 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test @@ -0,0 +1,189 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_BOWTIE2" + script "../main.nf" + config "./nextflow.config" + workflow "FASTQ_ALIGN_BOWTIE2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_bowtie2" + tag "subworkflows/bam_sort_stats_samtools" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("test_align_bowtie2_single_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_paired_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_single_end - stub") { + + options "-stub" + + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_paired_end - stub") { + + options "-stub" + + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap new file mode 100644 index 0000000..2dc8896 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "test_align_bowtie2_single_end - stub": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.bowtie2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T15:14:25.504699933" + }, + "test_align_bowtie2_single_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.bowtie2.log:md5,7b8a9e61b7646da1089b041333c41a87" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.stats:md5,48b911852e91d77db59154f7355ede4f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.flagstat:md5,e9ce9093133116bc54fd335cfe698372" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.idxstats:md5,e16eb632f7f462514b0873c7ac8ac905" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T15:14:08.108143527" + }, + "test_align_bowtie2_paired_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.stats:md5,cb422b3fcd4327488cb6bc5ac15a48ff" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.flagstat:md5,49f3d51a8804ce58fe9cecd2549d279b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.idxstats:md5,29ff2fa56d35b2a47625b8f517f1a947" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T15:14:17.07821488" + }, + "test_align_bowtie2_paired_end - stub": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T15:14:34.088967148" + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config new file mode 100644 index 0000000..9086ebf --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted" } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf index 2524b46..e06a5fa 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -1,56 +1,41 @@ // -// Alignment with BWA-MEM +// Alignment with BWA // include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' workflow FASTQ_ALIGN_BWA { take: - ch_reads // channel: [ val(meta), path(reads) ] - ch_index // channel: path(index) - ch_fasta // channel: path(fasta) + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ val(meta3), path(fasta) ] main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // - // Align reads with BWA-MEM (outputs sorted BAM) + // Map reads with BWA // - BWA_MEM ( - ch_reads, - ch_index, - ch_fasta, - true // sort_bam - ) - ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) - // - // Index BAM file - // - SAMTOOLS_INDEX ( BWA_MEM.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + BWA_MEM ( ch_reads, ch_index, ch_fasta, val_sort_bam ) // - // Join BAM and BAI + // Sort, index BAM file and run samtools stats, flagstat and idxstats // - ch_bam_bai = BWA_MEM.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], failOnMismatch: true) - // - // Run BAM stats - // - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) - ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) emit: - bam = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - versions = ch_versions // channel: path(versions.yml) + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml index 31ebdc4..fa21840 100644 --- a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -1,73 +1,73 @@ # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json -name: "fastq_align_bwa" -description: Align reads with BWA-MEM and collect BAM statistics +name: fastq_align_bwa +description: Align reads to a reference genome using bwa then sort with samtools keywords: - - alignment - - bwa - - bam - - map - - fastq + - align + - fasta + - genome + - reference components: - bwa/mem + - bwa/align + - samtools/sort - samtools/index - - bam_stats_samtools + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools input: - ch_reads: - type: channel description: | - Channel containing FASTQ reads - Structure: [ val(meta), path(reads) ] - pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + Structure: [ val(meta), [ path(reads) ] ] - ch_index: - type: channel description: | - Channel containing BWA index files - Structure: path(index) - pattern: "*.{amb,ann,bwt,pac,sa}" + BWA genome index files + Structure: [ val(meta), path(index) ] + - val_sort_bam: + type: boolean + description: If true bwa modules sort resulting bam files + pattern: "true|false" - ch_fasta: - type: channel + type: file description: | - Channel containing reference FASTA - Structure: path(fasta) - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + Optional reference fasta file. This only needs to be given if val_sort_bam = true. + Structure: [ val(meta), path(fasta) ] output: + - bam_orig: + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] - bam: - type: channel description: | - Aligned BAM file + BAM file ordered by samtools Structure: [ val(meta), path(bam) ] - pattern: "*.bam" - bai: - type: channel description: | - BAM index file + BAI index of the ordered BAM file Structure: [ val(meta), path(bai) ] - pattern: "*.bai" + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] - stats: - type: channel description: | - Samtools stats output + File containing samtools stats output Structure: [ val(meta), path(stats) ] - pattern: "*.stats" - flagstat: - type: channel description: | - Samtools flagstat output + File containing samtools flagstat output Structure: [ val(meta), path(flagstat) ] - pattern: "*.flagstat" - idxstats: - type: channel description: | - Samtools idxstats output + File containing samtools idxstats output Structure: [ val(meta), path(idxstats) ] - pattern: "*.idxstats" - versions: - type: channel description: | - Version information - Structure: path(versions.yml) - pattern: "versions.yml" + Files containing software versions + Structure: [ path(versions.yml) ] authors: - - "@jjaureguy760" + - "@JoseEspinosa" maintainers: - - "@jjaureguy760" + - "@JoseEspinosa" diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test new file mode 100644 index 0000000..7262325 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test @@ -0,0 +1,77 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_BWA" + script "../main.nf" + config "./nextflow.config" + workflow "FASTQ_ALIGN_BWA" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_bwa" + tag "subworkflows/bam_sort_stats_samtools" + tag "bwa" + tag "bwa/mem" + tag "bwa/index" + + + test("fastq_align_bwa_single_end") { + setup { + run("BWA_INDEX") { + script "../../../../modules/nf-core/bwa/index/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ],[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]]) + input[1] = BWA_INDEX.out.index + input[2] = false + input[3] = Channel.value([[id: 'genome'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } + + test("fastq_align_bwa_paired_end") { + setup { + run("BWA_INDEX") { + script "../../../../modules/nf-core/bwa/index/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ] ) + input[1] = BWA_INDEX.out.index + input[2] = false + input[3] = Channel.value([[id: 'genome'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + ) + } + } +} diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap new file mode 100644 index 0000000..9a16da2 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/main.nf.test.snap @@ -0,0 +1,264 @@ +{ + "fastq_align_bwa_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,5dbdcfdba65fac634dcbb6984cffe2c4" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ba4b90f87517a16a6ae6142f37a75d79" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,4c5e6fa0e71327b79034eebd652f2121" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,75934f2a51780a80d2ab4674301a018d" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ], + "7": [ + + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,4c5e6fa0e71327b79034eebd652f2121" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ba4b90f87517a16a6ae6142f37a75d79" + ] + ], + "bam_orig": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,5dbdcfdba65fac634dcbb6984cffe2c4" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,18d602435a02a4d721b78d1812622159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,85d20a901eef23ca50c323638a2eb602" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,75934f2a51780a80d2ab4674301a018d" + ] + ], + "versions": [ + + ] + } + ], + "timestamp": "2026-02-18T12:47:43.306112", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "fastq_align_bwa_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,f7af092ddd5203f647ba96b926392c3e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam:md5,c406a43adde2d9673e71d8a8c7db7cfd" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.bai:md5,f79a40341ecfaae11d8621b138d4c2ea" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.stats:md5,0883b19c92a783883b3e11d5bfcc5d6a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + "7": [ + + ], + "bai": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.bai:md5,f79a40341ecfaae11d8621b138d4c2ea" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam:md5,c406a43adde2d9673e71d8a8c7db7cfd" + ] + ], + "bam_orig": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,f7af092ddd5203f647ba96b926392c3e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.stats:md5,0883b19c92a783883b3e11d5bfcc5d6a" + ] + ], + "versions": [ + + ] + } + ], + "timestamp": "2026-02-18T12:47:30.203617", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config new file mode 100644 index 0000000..2f85e80 --- /dev/null +++ b/pipelines/nf-atacseq/subworkflows/nf-core/fastq_align_bwa/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + } +} diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa index 923c055..182b3f7 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa @@ -1,331 +1,335 @@ >chr_test -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -AAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTTAAACCCGGGTTT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -TGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGACTGAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -AGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -GATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATC -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -CGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGAT -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -TCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGA -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -GTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG -TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG +GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC +GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG +TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG +GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG +TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT +CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG +TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG +TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG +CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA +TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG +GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC +ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG +AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC +ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT +GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG +AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT +CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA +AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT +ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA +AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT +ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT +TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG +CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG +TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT +AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT +TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA +CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG +TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT +CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA +TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT +CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG +CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG +TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA +TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA +CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA +GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC +TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA +TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC +TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT +ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT +TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG +TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT +TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA +TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT +ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC +GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA +ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA +CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC +CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG +AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA +GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC +TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT +AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA +TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG +TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG +TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT +TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA +CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT +ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT +CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG +CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA +GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT +AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT +AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC +ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA +GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA +GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC +TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT +CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA +CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC +AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC +AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT +TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT +CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG +AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT +CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT +AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT +TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC +CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC +CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG +TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA +CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG +ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA +GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT +GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT +ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC +CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA +CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT +GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC +TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA +TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG +AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG +TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT +CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG +CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT +AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA +GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA +TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC +AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA +ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA +AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT +GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA +CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC +TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG +TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA +ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT +AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT +GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA +CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC +GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG +TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT +TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT +AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT +AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG +GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT +TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT +GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG +ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC +GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC +TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC +CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA +GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA +GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT +ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA +AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC +TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG +TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA +TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT +ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT +ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG +GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA +GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC +TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA +GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA +ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC +TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT +AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG +TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC +TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT +CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG +TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT +ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG +AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT +GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA +GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG +TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC +ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT +CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT +TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA +TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA +TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG +TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC +ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG +AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC +AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG +TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA +TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT +GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA +CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC +AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT +TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC +AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT +CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC +TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT +GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT +AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG +GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG +TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC +CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA +GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG +TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA +TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG +CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT +CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT +CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT +AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT +AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA +TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA +CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC +CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA +GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG +TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA +AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA +CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT +ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA +AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT +ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC +TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG +AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG +TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT +TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG +TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA +GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA +TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG +GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC +ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA +AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT +CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT +AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT +GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG +GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT +TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC +ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT +ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT +GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA +ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA +CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC +CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG +GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC +GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA +TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC +AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC +TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT +ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT +CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC +AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG +ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG +CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT +GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG +TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG +GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC +ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT +TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT +CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC +CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC +ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT +CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA +AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC +TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA +TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA +TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC +AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA +CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT +GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC +AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA +GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA +ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT +AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT +ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT +TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA +GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT +CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA +TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT +TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT +GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA +CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC +AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA +TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA +CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG +CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG +CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC +GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC +ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA +TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG +GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG +CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG +AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT +ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT +CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT +ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT +AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC +AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT +TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA +ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT +GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA +CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA +GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC +TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG +CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC +GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC +ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT +TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC +TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT +TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG +TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT +AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT +GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA +GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG +GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT +TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC +CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT +TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG +GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT +GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG +TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG +TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA +GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG +TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT +GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA +GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT +GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC +CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT +TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC +TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT +TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC +CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT +AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT +TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT +CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA +CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT +CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA +AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT +TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG +GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG +ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA +ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG +ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT +TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA +CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA +AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA +CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC +TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA +TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT +TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG +TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT +AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG +GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA +ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA +AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC +GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC +CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT +ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC +CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC +GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT +TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC +CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG +GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG +TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT +CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC +GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG +TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT +AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA +GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC +CACTGCGCTAACACTAACAA diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb index 0719bfe..5d6da8b 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.amb @@ -1 +1 @@ -19800 1 0 +20000 1 0 diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann index 01f4a1e..a633aab 100644 --- a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann +++ b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.ann @@ -1,3 +1,3 @@ -19800 1 11 +20000 1 11 0 chr_test (null) -0 19800 0 +0 20000 0 diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt index 7b2e7ab..9ed4852 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.bwt differ diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac index dd39245..d99d805 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.pac differ diff --git a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa index 76e12a6..b19e11c 100644 Binary files a/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa and b/pipelines/nf-atacseq/tests/data/bwa_index/chr_test.fa.sa differ diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa deleted file mode 120000 index 60a78a3..0000000 --- a/pipelines/nf-atacseq/tests/data/chr_test.fa +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/chr_test.fa \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa b/pipelines/nf-atacseq/tests/data/chr_test.fa new file mode 100644 index 0000000..182b3f7 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/chr_test.fa @@ -0,0 +1,335 @@ +>chr_test +GAAAGGCATAATAAGTAGCACGTACTAACGCGTCTTCGCTGAAAATAGTTAACGGAGATC +GTGCGAATAACCTGTCTAATAGCTACTAAAGCTATCTCCAGGTAGATTCCATACCTGGAG +TGTATACCCTACCATAGGATTACTATGATCGTTAATGAAAGACCAAGAACTTGCAATTTG +GCATTCAATTAACTCTACCCCATATATCAGTTCCTGATCTTGAGTCACAAGGAACAGGTG +TCAGATGTTGATCCAAACCCTACGGCGACTGCAAATAGGAGATCCATAAGGAGTTAACCT +CGAATCCCCAAAGCTGACCCCAGTCCCCAGACCACTTCAAATCCAGTCTCACACAATGTG +TTTAGACTGGGTAGTTCGTTTTATCGCGTTAATTGTTATCCAATGTCGGAAAATCATGAG +TAGAGGATACTAACTCGCGCCGGTCTCGTAAGGTGAAAATTAAGGATTTATCGGCGTATG +CCTGTGAATATGTATAGATTAGATATATGTGCAAATCTGGGGCAAAAGTAGGAGGACCAA +TGCTGAGGAGCGACGTTTTCCACGCGTGCACTTTGACCACATGTACAACTCGAACAGTGG +GTCAAGTGTTTGTGAAAAGGAATGCTAAAATTACTGACTCTTTAACTCTAGAATTCAGGC +ATTTCCTGGGCAAGAAAATGTAGGTGCGGGCTTGCCAATGTAAGGCTTAATTAACCTCCG +AAGTGCAGGTATTGCTGACCTTTTCTTCGTTATGGGATCTGACGAATTACCTACTGTACC +ATTCTCCACTCTCATGCTATTTTAAGTAGAGGCTGCCTATGCCTTTGTGATCTGGCCCTT +GGCAAGCCGTAGCTGCACTTATTCAACGACATAAACCGATTGGTACATTATTCTCGATGG +AGTCACGTGGGGCGCGTTTGATGAATCTCCACTCGTACACCGCCCTCATTGGGCCAAACT +CAACCTTACTTACATGGCTGATATTCATTCCAGTCTTAACTGGGAGAATAGAACTACACA +AAAGAAGATAAGTGTGTATCAGCTTCATTGTCAAGAAGTTCTTGAGCGGGATATTTATGT +ACACAAGCTGTTATGGCGCGTTAGAACTGTCCCCGGACCAAGTTACTTAGAGATTTGGTA +AAGGAGTTAGATAGTGATGATAAATAGATGTCCACAACCTTGTAATCGCCACAGTTTTAT +ATCTGCCAAAGGGAGTGGTCTGGCGAATTAATTTACACCGTTTCCTCGTTAAACTGTAAT +TTATATTGGGAAGAGGCCTGATCGTGCTTCCGCGGTGTTTAACTAAACAGCCATGATACG +CATTGATAGTTTCTCTCCTATTCCCAAGCTACCAGACATCATTAATACTACCGCAACGAG +TAAACTGTAATATCTACGATAATGATGACATTCTTTGCAAGTGGGGTATCAGTGGCAGTT +AAACTCTAGATGCTATCGCTCTTCTCGAGCTTAGTGTGTACTCACCAGTCGCAGGAAGTT +TGGCTGTTTGAAGTTTAATCACTACTCTAGCTTATCCGCGCTAAACATTCTGATCGTGCA +CGTGTCGGACTCAAAATGTCCCAGTATTTACAGGGCTCAAGTGGTGTGACTCGTAATTAG +TGGCCATTTTAAATTGACATTTGTTTTACTCATATCGTTCTCGGTTTATATGACGACTCT +CGATTAATTTGTTGACGTTCTGTCTGCGCGGATCGGTGGAGGCAGACAATAGTGCCGAAA +TGTTACTTGGGGAATACTAAGTTCCAAGTCCCCTAGTTATATCGAGGAGTGATGAGATCT +CCTACTGCATTGCCACACCTTCCCCATACACTTCCTAAATAAGCTGACCCTAGAATAAAG +CTGAGGAATTTCGTACTGAAAGGTTTTGAAGCATGATATTTATTAAGATCTTTATCGTCG +TATACCACATGGCGTCTCCTGGTGTATTGAAATGTTCATACGACTGCAAAAGGAGTAACA +TACGTGGTTAGATACCCGTTCCGGTTATGTCTGCCTCTAAAGCCAGAAGGCAGGTTCTCA +CCACTAGACTGTTTATTACTCCTTTAAACTTATTCTGGACCGTACAGTCTGAACCGGTCA +GATTGGGTTATATACACGCCAAAATCATTTTCAGCGCGATTAAATTGTCATAACCTAACC +TACTCGGGTAAACTCTGACGTCATCTGCTGAACTTCTGGAGCGAAGGGTAATTAAATTTA +TAGTTTTACCCTATATTATTTAAAGGAATCTGCTTCCCATCATCCTGTTATCTATGTGTC +TGTTGCCTTGAGGGACTTTCGTCTCTGAGGTGACGTGCTAATTGTTTGGTTAATCACATT +ATTTGTTCACGGACAAATCATAGTAGAGTGAGCAACATTACTGGGGTCGCGTGAAATAGT +TATAGGGCTTATTATAACCTTGTCTAAGTATATGGTAAGCTCAGTCACGTCTTCTCGACG +TGGAAAATATTGAACCGACGCCCACAGCGGTTATTGCATACTCTAGGGTGTATATAACTT +TTGAAGTACTACAGAGACAGATCATTGAGGATAAGAGCCTAATGATCAGGACATAGTGGA +TGCAAGGTCTAAATGGGGCGTTTGTACCTATGTCCCACTTGGCGAAAACTGTTGATGATT +ACTTGCGAGGCAATTGTGGAGGACTGGAAGACGACAAGTATTTTAATGATACATTACCTC +GTTTGAATTCACCCATACTTAATTGTGTGACGAATATCCCAGCGATATACGACCTGTCAA +ACATTCAATCGGTAAAGGAATTTCATAAAGCGACTAATTGACATTGATCAACCACTGGGA +CAACTACCTATATCTAGAAAACAGATTTAAAACTGCCCGTTTCTTATACGACTGCCAGAC +CACACCTCCAGCGCAGCTTACCTTTAAATACAAGCCTAGCGCCCTCTATAACCCGACGCG +AGATGAGCCTCCAGCCATCAGACACAGGCTAAAATTGCCTTTATCGGAACTTCAATGTCA +GGTACACAAAAGGGAAAATCATTTGGAAATACTTTGATACTTATAAAGGATTCGTCCTTC +TCTACGTCCGGAGACCCATCTCGCACCATTTATCGGTTTAGGCCTAATTTTGAAAGGACT +AGCCACTATGACACTCATGAACGGCCTATTACCAACCATCGACTGAATGACGTACGGATA +TCCGGATAGGACGGAACTCGTTTATGCTATGCTGGTAACGCAGCTAGCCCGGGGCATTAG +TAGATGCGTCCCAAAACGAGTATGTGTATCTCGCACTCTTACAATTCTTGGTGAGAAGAG +TGAGGTCTAATATCAGGAGTATGACTTGGTCCTCTACCTAGAGGATGACATACGGAGTTT +TAGGTGGAGACAGAAAATTAGTATACTAGCCGAATGAAACTTAAATCTGAGACGATTGCA +CATCATCCGCAGACATGCGATTAGCCACATAATGGGTTCGTTGAGATGTCTCAGACCCAT +ACAAGTATCTCTATGATTAAGGTTAGCTAATTGTGGAGATCCTTGAAAGGAGACTTGGAT +CCGGTGCATTACCTTCATGATGCTTCCGACCTATGGTGCGCGAGTTGCGCTGTATTTGTG +CACCTAAGAGAAACGTGACACGCGTAGCAGCTCCTTAAGGCCCGGGTGGCTAGAATTTTA +GATGAATACGGTTTGTAAATTTAAATTAGTCCCAGTCGGCGTCCTTACCTCTACATCACT +AAGGCTATGCGGCGATTAACTTAATGTAGTGGGGACAGTAGTTGTTATCTCAGCCGTCTT +AAGTCTGCTTGTAACAACCCCTTTAAGTTAGAGCTTGTGTTTTAAAGTCAGCTTTTAGCC +ATACAAATAGTGCTTCTGTAGGTTTTGCCGATTACGCGTTATATAACTTTACTGTCCATA +GTGCTTCTTCTTGTAAAGAATGAACGTTAACAATAGATAAACGTAGGAATCCACGCCAGA +GTTGATAACTTAATGAGTATAGCCGGTTATACGTGGGGAATACACTAGGTAAGGTTAGAC +TTAGGTGTTTATTGGCGGTGAATTTGGACAAACTAAAATCGTGGCCGTAGCAAGTAAAAT +CGTTGTGAAACCTCAGACTATAATCCCCTGCTGGCTTGAAAGCGATCTACAAGCACTTCA +CGCTAGCAAAGAACGGGGTATGTCCCTCCAATACTTTTGACGTGAAGTGATATGTTAGTC +AAATAAAATTACACATCCTGGTTTTGACTGTTTTCAAACCATGAGTGTGCTAGAACTGTC +AAATTAGATCTGCTAAGGCGAAAACTATGAAAGCTAAGACAGCTTCTATCGAGGGTTGTT +TCTTATACCTTACCTATTAATTTTAGTTATAGCCGAGCTCAAGGAGAAATAAAGGAATTT +CCTCTCCAGATACCCAGAGTGATGTCTGTTGACTAGACCAAGTAAAGAAGTGTAAAGCCG +AGGCAACGGCTAGTACTTTGAATGACCTAATATAGTAACGAGGTTTTGTGATACACATAT +CGTGATGACATCACATCTTGCAAATCCAGTATAGAGTAGTTGCAATTACTTTCTTGTGGT +AGCACTTGCGTCTTACACGATTCAATATGACATCGGCACGTCGTGTAAGTCTCCAGGAGT +TATATAAGTTGTAATAATATATGAATTGAGGAAGTCAGTTTGATCGCTAACATGCAACCC +CAGATAATATATGAGAGGAAAGGAGATACGCACGATCATCTATTCAATTTATTGACTCGC +CCATAACGATCGGAAACCTTAATCCTGTACCACCTTCATCGGCTTTCCCAGAAGGATAAG +TGTTGGTCTAAAGAATGCGACCCTTTATAGTTGGGTCGTTCACTTGTTGATTTCTTGATA +CTGAGCGATTAGGATAGCCGAATTTTCTCTTGCTGACAGTTGTGAAAGATCTACAGTTAG +ATGTCAAGACGCTCATAGGGGATTCATTTATTTAGATTGGAGGCTGCCAGTTCTATTGTA +GGCAAGACCCTTTGAAACTTTAGTGGAATTGCCGTGCTTGTGCTGTTAGCCTCAACGCTT +GCGGTATTATCATAGGCTATTACGTGACCCGAGTGTACGGATATGTTTCTAATTAAAAGT +ATTAGAAAGTTATGAATAGGCGGTCGGTCGTACCTTGGTAACGCTGGGCTATTTAGGAAC +CTGCTTTGTCTTCGGTGTAGACTTGTTCACAACGTTGACCCGAAATTTAGTTCTCTCTAA +CTATTTAGCTCCAGTTTTGTATCCACGAAAGTTCAGTTGGTATTTTAGTCATTTTCTGAT +GAGCCGTACATGCAGCTATGTTTGTCCAACGGTATAACCGAATCAAACAAAGATCAGTCC +TAACATCGATGAGTGGAATTGGTTGTACACTGCGACGCTCCTAAGTGGGGATGATGCAAA +TAAAACGCCGGACAGCTCCGATCGCATCGTAAGTTACATTCGATAGAGCGAATATCAGCG +AGCTTCTTCGGTACCTTCTGTGCATCATGGAATAGCGTAGGAAGGTATTTCTCAAGAACG +TGCATCAAGTCAGAAATCTAGCATCACTCCGTCTACCGGTAATGTTCAACGGATAAAGCT +CGGAGTTCGAATCGGTAAATATGTAGGAACGCTAGAGATTCGAGCAGTACGGTAGTGTAG +CTATTCACTTAGGCAAGAACTATCGGGGACCACTCGCAGGATTCGATACATGATTCCTAT +AGCATGATTGCGATGCTGTTGCACTATACTCGACGACGCATGTATAGACAATCGCAGATA +GAATTTAGGTTGCCCCACTACACAAGTCTGTCTATTGTACACGTTGTGGCTTAGAATCGA +TTACGACCGGAAATAAATATTTTATCTTATTAGCTGTACCTATCTGGCATTTCTAAGGAC +AATTGATATGCCTACTTATCCAGTCCACCTCAGAATCCACGATCTTGGAATTACCTTTAA +ACCTGCTTGAAACAGGTCGTGATTCAATCAAATCTATCTGAAGTCCGTGGAGCATTTTCA +AAACGCTTTGATACCTTTCCGGTGACACAAAAGGAGGAACTAAAAGGGCACATACCCTAT +GATATAAAACTCAATGTGTCATTAAACAAAGGTATAAGTCTTTCAACTGACTATGAATGA +CCACTGCACGAGGAGGTTGTTAGAATGAAAAGCTGAGAAGGCAGTATCTCATCTTTTATC +TGTAGTAGGGTTCTTTCGTCTAACTGACTATTTGAGGCATTATTCTCAGGCTTTCAGTTG +TGTTTCGCTAACTAGACATACTACGTCTTATGTGAAGCTACGTCTGGTTGTTAAGTTTCA +ATCGAGTAAACTTTGAAAACGACCTACAGCCTTGACGAAGCTCCCACAACTGTGATAACT +AGTTCTTGCCCTGCACGCGCGGATTCTCACCTCTCAACAACCGCGTACCCTTCGCCCGTT +GCGTAAGGCATGTAATCCGCGCTTGAGCCATACCCACCGGCCAGATTAATCAGTCTGAGA +CGATACGCAGTTATAGCTGTAATGGGGAAATACCCCGGAAGTTTCTGATCCATTAAAACC +GCACGGATCTCGACGCAAAACTCCATGTTCCAACAATACGGCTTTAGGCAGGTGCCAACG +TCGACGCTGGCTAAGTAACTTACCACAGAGGATTCTGAGCTTCTTTGCGTTATTAGATGT +TTCTAACCTTAAAATAGTAAATAGAATACTGTGGACCAAGGCATAAATGCCGTGCTGGTT +AAAACCAGGTGCATTTAAAGCTCGATCAAGGCCGGTTTTGGGCTGTTTACTTTCTGAAAT +AACTGCGATGCCGGCCCGAGGAAGATCTAAACTACCAATGAAATTACAAGTGGCTTCAAG +GCCAAGCCATTTGAGTACTTGACTTATGTGAGTACTTTCCTAAACCATCAAGGGCAGGGT +TTGTTGCAATCGTATGGGCGTATATGGACAATTGAACGAGGCAATGTAGATGTCCCTCGT +GTAGGGGTATGCTAGCAACTTTTGTTATTTCTCCAAGAGCAATGCTCGTATAATCTTCAG +ACCACTATCTTTCGTGGGTTTTCTCGTATTCCGGCGTCGTATAGTATATCACAAGAGCTC +GTACATTCTAAAATATTAGTAATTTTCAAGGTGTAATTTTACACGATGTTAGACTCGTTC +TATCACACTGCTTGGTAGTTTAATATGCTGTAGTACTTGAGGATCGTCGGTGGAACGGTC +CTAGGATCTAAACTAGTGATTACGAACTCTTTGTGTAAAATATGAGCGTATTCGCACTCA +GTTGCAATTAAATAGCTAAATGATCGGTAAATATCCGGGGTAAATCAACTTGAGTTTAGA +GGATCCGTCGTTAAGAGATGATGTACATTCGTCGATTTAGGATCCTAACGTGGCGTTCGT +ATGAAAAGAGCTGAACTAAATAGGAAAACGTTAACCAGTGACTACGCCCCAACCATTGCA +AGATGTACCCCAATGATGGTTTTGGTATCGAAACTTCTCTTAATTGTGTTTCTTAAGTAC +TGGCAAAATTCGAGCCGGCATCGTTTGTTGATAGTTGGGTCTAGGATTTTACACCTTGTG +TTAGCACTGGGCCATTAATTCAATAGTAACAAGAATACTAATTACCAATGTGCGTGAAAA +TCTCCTTGACTGGTGCAACGTCATTCACAGTCGGATCTCAAGTTATTAGGTGCTAACTGT +ATACACCAAATTTAGGATAAGAGCCGGCTTAAGGCTAATCTAGACCCAATATTAATCAAT +ATTTTACGTAATGCATCCACGCGGCGTGCTCTTGGTGAGCAGCTGGGATTAAACGCGTAG +GTCGAACTATCGAGGGTTTACAAGAAAGCCAAGTGAAAATGAGACTATTGGCCATCGCGA +GATTTGAATAAATGTCCCTTGGTACTTATACGTTGGGCGAACGGGGATGAGCCAGGCTGC +TATCATCGTTTCGAGGTAGCTTCCAAGTGGATGAACTCAAAGACTGGCATTATGTGAAGA +GCATAGCGCTTTTCCCCGTATTATGGCAGCAGCTGGTTACCCATACTTGTGATCCCCGTA +ATTCTACTGTCATAGAAGGATGACCGAATCAATGAGCCGGGTGGTGTCCAAAAGCGATCC +TAATCCTTGCTGATTTACCTTGAGCGGTCACGTCTGTCTCAGCGACATTCGCCTTGCGTT +AGACTAGGCCGTAAGTAAGGAGTGCACTCCACAACGGCGTAATGCGTGCGGCGAGTAATG +TATTAGCATGTTAACCACATTCTTGGCAGCCAGATCAAAATCACTTTTCATCTGGTTGTC +TTAACAATCCGATAGAATCTAATGTAGCGATGCGTACTAGAAATAGTTACAATCTACAGT +CTTGCTGCACTTGCTGCTAATAATGAGCGAGGACCTATCCCTCCTTAAGCAAGTTCCTTG +TTCCGTGCGGGGAGCCCTGGCGCTAACTCTTTACATGATTAGTATCGCATGTTGTTACAT +ATATAATAGATTTACATCATTTCAAATGCAATGATTCGTGCTCCTAAAATGAGTCGTATG +AATAGCCACAGCGTACGGAAACCTGAATTGATTTGTAATTTAAAGATCAACTTAATCTGT +GTTGATCAGAGCGAGCATTGCAGAATACCCCTGCATCTAGGAATCGGTGCCAGTGTAAAA +GCCTGTTAGTAAAACCACGACTATGTAGTGTGTACCACACTCGGAGTGCGTCAAGCGAAG +TCAAACATGGAAATGAAACCATGCGTACGGAAAAGACCAGTGATTTATAAGGACATTCAC +ATAGACTCCAAAACTGACCCGATGGAGTCTACGCCGAACAGTTGGTATCAACATTTGTCT +CGATTTTCTGTTGGGAACATCCATCCCTACCCACAACGTACTGGACCATAATCAAGGGTT +TGGAACAGTACGCTCCTGTACTCAAGAAGTCCTTGCACGAAAGCAATAGGTTGAACTTCA +TCATATAGGCGATGACAGTGCTATCAGCCGGACTGGCTGTTCTCGTAGAAGTCACTCGAA +TCAATAAGATACGAATACTCCATCCTGTACGGGGACACTATATTATGCTAGCCGATTCTG +TAAATGTAGTCTTTACCGAGAATTGCTGACACTGATTTGAGTGTAGGAGGTCCGGTATAC +ACTTATCATCAACTTATTCCTACACTCGGTTTTCAATAGTTCGTAGCCCCAGGTTGCATG +AATATTATACCTCGGATAACACCTACTAATCCGTCCACAGCCTAGCACTTACTGGCGATC +AATGGAGCATGATGTACTTAGGGGACGGTATGAACATTCTTAACAGTTCCAAATGACCTG +TAGCAAATACAATAGCATCTTTGTTTAAGCATGGTCCTCTGCGGTTTGAAATGTCGCTAA +TCTAGTGATATTCCTTGTAAGCCACTGTTACTCTAATTTAGCCCACTCCAGAACGAGTTT +GTGTCCATGAAAATGTAACTCCCCAGACATGCAAATACGCCTTATTGCTGAATATCGGAA +CAAACAAAGTCGTTATCATCCTGAAATCGACGACAAGTACATATTAAAGGTTTGTTTGGC +AAAATAGGTAGCAAGTAGGATGTTCATAACAATTAAAGCGCGTAACTCCTAAATGTGCAT +TATGCGCCGAGGACCGATAGCTGACGCCGCTCTAGCTTCTATTGTTCCACTGTACGGTAC +AAAGATTGAATACGGAAACAGAATTCGTCAATTTGTTGAATTATGTTCTATTCGTTTTAT +CTGGTATATTTGTTACCTAACGTATTTAGGGAAAGTAGCTTCATGAAGAAATCTAATCCC +TCGCGTGACGAGTTTGCTGTGATTATTATGCGACCTGACTCTTGTAGTGTGGAGTTCGTT +GTCGTATCTGTACAAACTGCCGACACGTAGACAGGCCTGTCTAATAAACCAGGGACCTTT +AAGCGTCTTTGTAATTAAGTAAGTACCAGACCATCCTTAGATCAATATGATGCGCAACCG +GACCGGATCAAATGTTCCAAGCTCGGTAGGTTATCCTATAAGAGCCTCAGCAAAATGATG +TAAATTGTCAGCGTGTAGTACGGAAACAGATCACGGTATAATCAAGTCTAAATATTTAGC +CCCGGTCTTGGAATGGCCTTTTATGCAACCAATTTGTGGCGATTAATTTCTCAACAGTAA +GACAGAGAAAGCTAGAGAAGCTGGTATTATTCTGCATGTTGTCGAACCAGCTGTGTACAG +TCAACATTTTGCTATTTACTAAGTTGAAGCTTTCGGTTTCATGTGAAATATCTGGCCAAA +TCGAATGCACCCTTTGACCGGCAGTTTTCATAAGCCACGTGTTTGCATTTCTCTTTAACG +CATTGAAAATCACCGCGAACGACCTCACAACTGTCTAGCTTACCGATACGTTAGTGGTCT +CCTCGCAGAATCGAACGAACCCGAATAATATGGTGATATTCTTTAACGACTGATTAGGGT +CTTATTCGAGATTTTCAGTCTTTAAGCGTGAGCAGCGTGTTAATCACCTAGCAACATTAT +AGAAAGGAGAAAGGTACGAGCAGTTTAAAAGTTACTTCTAATTTTAACTATTGTCCAACT +AAGTGTAGATTATTTAGGCTTGTGTCCAAGTGAGATCATACTGTTTTCGTGTGATAGGTA +TCCGCATCATAACTAGTTATATTAGCACCGTGTATGAAGAAACGGTGGACCGTAGCACAA +CTCATTGTTATTTTGTCCCCTCTTGGTTTATTGGATCCTAGATTATATACGAATAGAGCC +CCTTTCGCAACAGCATCAGAATCAGACCTGCGCTCTCGACTGATAATAGCAATTTGTTAA +GAGCGGATAGACGCAGAAGAATAACATGATTTGTGCACTTAGTCCAGTCCAGATAAGAAG +TTGAGGCATTGACTTAACTTTTCATTGTCCGCTTGCTATCCCCACGATCCTGCTAAACTA +AAAGCTTTTGGCGCGGAAGAGCCGTTATGGAGGTTCGGCGAAATTGTATCACTAGCTAGA +CCATTTTCTGTAGGCTTTTAGCTTGATCGACGTAAATTCGATTCTATATGGTAGAAAGGT +ACGACCGTTATACGCTCACGTACAGCCTAAATTCACTTGTGGAGGCGATATAAGCTAATA +AGCGGTTCATTTTGAGGAACCGTTACTTTGAGATTCACTTACAGCAACTAAGGTTGTGTT +ACCGTTTCTTCTCAATTTACTGCTGGAGCGGCTATTATGCGTCCATCACCTTCATAGCCC +TAGTCATCAAGCCCATAGAGGTATGTTCGTGTGTAAACGAATTCCAAGACTAATTGGTGG +AAATTTCAGTTTGGATTGAATGAGGCTGATACTTCTATACACTTAAGGGTTCCCCGTAAG +TATATTGCCATAAGGGAGTAGTAACACTAAGGTTGTGAAAATATTGCACGACGTAGGTAT +TCTCAATTTCCTTCTAATTCTGTAGGATTTATGTAAGGCGACCGGGACTCTATTGTTTTG +TCTCCGAGAGTTTCTTAATCAATTGTCAGGCTAGTAGATCAAGTGTAATAAATGATTAGA +GGTCCTCATTTGGAGAATTTATCTATATCCTTGGTCGTCCACGCGGTATCGGAGTTGCTA +TACAATAAGTTGGTTCCAGAAAGCGTCTTAATTACATACTCTTGGTTTATCAACGAGATG +GTACCTAATACTCTCCTCTCAGTTCAGTAATAAGGACCGTTAACCGCACAATTGCATGTC +ACCATGTAACACATCCTAGGTTCAGTGGTGCAAACAAATCAAAGTCGTTCGATGTCACTA +AAACATTTTGCTTAGTAAGCTCACTTGGTTATGCAATATTCTTCACTTCCACAAGTGACT +CTACTTAAGGCGACGCACCTCCCTACAATTCGCATACGCCAGGTACACACAGCATGGAAT +AGTGTAGTACCTACTCATGCGCGAACGGTCGCCTGCAGAATTCCAACATGGAGGTCTTCT +GGCCTAGTGCTTGTGCTTCCGGGATACACCGCACTCATATCACAGTTTTCCCTGGCACAG +GTTATAGTCCGCTAGCGTGTTGAAGCTAGTTCACCCTTACTATGATCCAAGAAAAGCTTT +TCGGCCGGCCATCCTTCACCATACGTTTCGGGGTCTTAGTTCATTATCAGAGTCGGTGCC +ATTGTTCCATGTAGGTACGTGGAGGAAGTAACTCTTGATATGCTATACGTGTAGCATACT +ATACTCCAGAATCCGTCGCAACAATCCCTTTATCTGCCCCTTTATTTACATTCCCCGCAT +GTTTTGATTACTTAAATGTCGGGTACTGCTGGTATACACCGTATGCACCGAAAGACAGCA +ACCCCTCAAAGCTTCGACGAGTTACCTGGTGTGAGACTATCAGCTTATAACCCTTACTAA +CAGCAGTAGACGAATTCTCCTAGTATAAAGTCAATTACAGTTGACTAAATTCGAAGTAGC +CGAGTGGGTCTCATTAGACCCTACATGTATCTCTTGTTTTCAAAACGGCTGTGAAAGTCG +GAATATTATGTGAGTATGATTCACTCGGCGGAACACTCAAACTCGCTGAATCATTGATTC +GCCGATGATTAAGCCGACCCTCCCAATTACCGCTGCAGCACTACAATCTCAATTTAGGTA +TACGGATCTAGGTCCGTTCGTTACCAGTTACCAATACGCAACCGAGCTCGAAGAGAACAC +AAATTTACGAAGCAAAATTCGGAATCAGGGTATCGTGCAGAATGGCAGGAGAGCTGGAAC +TGTTGTCAGATTTCCCTCTAGTAATCGTACGAGAATATATTCTATGTCACACATTAACCT +ATAGGTAAAGCCTCATTATACTCCGTTTAATGCAGACTTATAGGATGCCATGCAACAAGT +CTAATCGTCGCGAGGACACTCAAAAGGATCAGTGGAAAGTAACACTTTGTGGTTCAATTC +AGAAAATCAGCTTGTTTGTACCTACAAGTACAAAACTTGGAGTGGTAGAGAGGTCAATCG +ATTAAGTTAAAAGGTTAACGCATGCGCCTAGTCATTAATTGGTTGCTGCGCAAAATAATG +CATGCGTAGTAAATCCCAGCCCCAAGTCGAATAGATTATTAACGCCGGAAGCAGCCATCT +GCGGAATCTTCGTTGTGTCGAGCGTCAAACGTTGCTCCATGGCTCCCTCCCTTTATCGGG +TTCTCTCATTGAGTCCAACTAAACATCTACAAAAGAACTTTGTTATGTGATATAGCTTAG +GTCTAATCTTAGGCTGACATGCATAACGCTTTGTCGAGGTCTATTAACATAGCCGAATGC +ATGCAAGCTTTGATGGATATTAACTTCCCAATGTCTAAGATTAAAGAAGAGGACACCCAT +TATGTCAATCATCTAGCTAAATCGAGCTGCGAGCCGGAGAGTAAACAGTTTCCTTTTCTT +CGGCGGTTATTTGAAAATTCCTTTCTTATGGCAGTGTTTCGAGCGAGCAGTATATTAGAC +CCAACCTCGATAATCGTTAATCACATAGCGACTATGATAGTATCATTACCAGCAGCATAC +ATAAAATTGTAAAGTGTGTTACTGTTTGCGTGGGTGATTATAGTACAGTCTTTTGCAAAT +CTACGGCCCTGACAGAACTTCACATTAAAGGCCATCCACAGAACAATGGACAACGTATAA +AACCTAAAAGGATATCGTTTTCCTGGGGTTTTCAGTTGTTTTAATGACCGGTAAATTTTC +TTACCCTATTGTGTTTCCTTACACAGAAATATCTGAATATTGAGGTACCTGTGAACATTA +TCATTCATACAACATATCCTATCGCCCATCCTGTGCGGCGACTACTCCAGCACTCACTAA +TTGTTAATCATCTCATACAACTCGTCAGAATTAACATTACCGCAAACTGCTTACTAGCGC +AATCAGGTCAAGAGGAGGACGGCTTTGTCACTTAAAAGAATAAGGTGTAGCTGCATAAAA +CAATGTGTATCTTCTGAGCTTCACAGCCGTGGGCTATCTATGGTTCCGGTCCTGTTGATT +GCTCCCGATGTTGAACAATACTTTCCACTTTCCGTGACAGAAACTTTAGAGCAAGAGGTC +AAACTTTACCCAAGCCCATAGGTAGAAGTTACGCGCGCATTGACGTTTGATCAAGGGACA +GCTGTGAATATCCGTCCCACGTAATCGTGACTTCTCATCAATATTATATTACTGCCGCTA +ATCAACAACTTCCTTGTTTCGACTGAAACGATTTTAGTCAAGTCGAAGACCTCATACGAT +AAGATTTGCAACATGTCTAAAAGAGAACGGGAACTGGCAAAAGGCTTGGTAGATCCGTCT +ATAGCGTAAAACTGATTAACCCATTAGGTCTGAATAACTTTACACAACCCTCCGCACTGT +TAAATGACGGGCTTTGCTCTGTTTTGACACATCAGCTAGAAACTCGCCACGAAGGCATAA +GGCTCCCATATAGCGTAGCTGACAAACATATGAGGTGGCTGCATAAACTAAATTGAGGCT +CGCGTTCGGATACTTGCCCATGTAGCAAGTCTTGGCAACCAACTATATAATCATCACGAA +TTGAGTGCTAAAGACATGCGAACAGTTGGGGCTGCTATATAGTATGACAGATATAGAAAT +TTTATAAAATGTCGTAGGAATCTGGAGGCCAAAATCATTAGACACTCTTGTAAAAGGTAT +GGTAATGTGTATGACCTCTTGGCATAGTGTCCAATTATTCTCGGTTTACTCTCAGAGACA +CAGTCATGTAAAAGTGGTGAGGAATTACCGCCGTGTTTTGCCAACCAAGAAGCATTGAAC +AGTAGATCAATAATGATATTCGGTAGCGTATTTACGCTTTGCGGTTTTCAGAAGAAACTA +TCACAATTGAAACTCTATTCTTCGCCTCATTCCGTACCGTTAGGAATGACTCGAATCGTA +CTGTCTGCCGCGGGGCATAGTGTATTGCTCCCCACCAGGTTCAGATAGTTCGAATCAGTG +CGCTGTACAATTGCCTTACGTGTAGATTTGCATCACCGCTTCACGTAGGCACCCAGAGTG +CTCACTAAAGCCACTAGAGAGATAGAGTTAGAAATTAAGTATCGGTTACGCCCCTCAGAC +GACATAACTCACTTCTACCGAATATCCTTTCTATCTTGGATACTACTAATGCTTCCGTTC +ACGCCGCAATCATGTGGATCCTCCAGTAAGCAGGGTGCTGTCATGACTATACAGTACGGA +TCCGTAAGCATTTTGAGGATGATAACATAGGGTCGGTTACTGTGGATTTCCGTTACTTAG +GAGAGCAGCTTTAGCTGACTTTGCTGAGGCTGCGCGTGTTAGACAGCAATTTACGAACGG +CGCACTCTATAGCAGGCACTCACAGTGGACCAGTAGTCCTATTGCAAGAGTTCATTATGG +AACATTTTAGTCCTCTATCACACGGACCATTGCAGTAGATAACTCTAATCCTATGTCTTT +ATTTGGTTGCCTGGAACCCCTTACCACTAGACACCCCAATAAGTAATCTTGCTTCCATGT +CGAATTGATACTCATCGAAAACATATAAAACTAATTATGCTTGTGTTCCTGTGGTCTGTT +ATATAGAGGCGCCCTATTGGCCGCGGGATAAGGATCATTTTGGCACACTAACGGGATCCT +AAAACTTTATCTTTCAACGACTCCTACATGCCTTTTAGGTTAGTACGCGAATCGCCTAAC +AAGCCAATGGGTATTGGAGAATTAGACAAAATGGTTGAGGAATAAAGTGGCGCAGGATTT +TGTCCGAGAAAGGGATAGCAAACGGTCGCAGGCAGGAGTAACAATTTTCAACCGACCTTA +ATAGAGCTCAAAAGCTACCGGAGAAAGCTTCGTCTATGCTTAATACATATGCTAACCTAT +GAATTTCGTAAGCGTAATATAAACTTATCAGATATTTTAAAAGCATCCTATTCAGTCGTA +CTTTTGGCAGGAAAGGTCAGGCGAAACAGAGTCTCCCTGCGGAGGCTTTTAAAATAAATA +GCGGGCCTAGCATCGATTCTAAAAGACGACCCCAGGTGCGTAACCGTGCCTCCCCAAGTC +TTCTTTTAACAATTACCTAGAGAACGGCGTCAGTCGCGAATGACCTTACGAACGTTTACG +CGGAGCCGAGTAAGATTAATAACTGCTTATTGATTTGCAATCGTTTGATACGGGTGGCCC +GAAGCTCAATATCAACATAAATAAAATTAGTCGGAATGGTCGCTTAAATCGCGCGCTGTC +ACTGTCTTCATATGAGGGAGTTGTGTAAGACTGCATTGATATATAGGTATGATTTCGGTT +TAGAACTTTGTCTGTTAGCAACTCCGCATGATTGAAGGAAATCCTCGTTGGTAAGATCTC +TTTAGCATTTGCACAGCTGACTCTAACAGCATAGTATGTGATCGTATTATGTCTGCAGTT +TGTAACACAGTGGGCGGCATGGATGGTACTTAATGGACGTAATGAGCAGTAGACCACCGG +TGTTACCTAACCATCATTAGAGTAGGCGAGATTGCGCTTGTACGACTTATATATAAGGGT +AACCGGAATACCGTTCCTCTTATCAACAACAGTTACTGGTCTTAATTCACATCGGATATT +GCGATCGCCAAGACTATCCCGTAAGTCGTAAGCTAACCAACTAGCGGTTAGGTTTATTGA +GGTTTTGATGGGAACTTCTCAGACACGTCGTCAACTACCTAATTTCTTGGATGGAGCTAG +GCTAACTGTCCCAGAACTTTCTGACACTCGAGATCCTCTAACTAATTGGAATCCAGGAAT +TCCCTTATTGCATCGCCACAAACGACCATAAATTACAGCATGTTTCATTGTCTAACGTGC +CTATCCACGAAATTGAATTCGGTTCACATTATATATCCCCTTCTACCGCTAATTTAATGT +TTAACGTTGATGGGGCAAAGCACATTCGAGAAGTACCGAAAAGTCTCAATCCAAAGACCG +GAGGAACTGGCTTCGGTAAGAATCGCGAGTATCCTTGGATGCCCTGCCTGATTATAACTT +GTTCCATGTAGATAGGCGTAGCTAATTCATAGCAATACAATAAACGAGTCAGAACTGTAG +TCTAACATAACAGCCTGCTCTCCAGGTAACAGCCCATTATTAGATATAGTATCACGATCG +TCGGTTGTATTAGTGGTGATAACTATCGATTCTGCCACTAATAGAATGTGCAGAAATAAA +GTATCTGAAAGAAAACGAAGTCACAGAGAATAAAGCTCACTTCATAAAAGTCGGTTGCAG +TAGACGCATATCAATTTTCCCTGCTGCATTTTAGAGTTCGGAATAGTTAAACATAATACT +GGAAGCGCTTCCGGCAATCAGGAATAACCCCATATAAACCAACCTTTGTTGCTATTGCCA +GCGCTATTCTCGTCAAAATTTCTCCCTATGGTCTTCACATCATGCATCACCGGACCCTTT +GATAGACGATGACCCAATTACAATCACTCCACGGATGAGCATCCCATTTTATACGAGGCC +CACTGGAAACAATTGCAATCGACGTGACCAAGTAGAGGAGCGTGCTCGAAAGGTGATGAT +TGCCGAATTCTAACAAGGATACTATAAGCCACGGAACGCTGACGTTGAACAGACCTGGTC +TCCTGGGCACTTCGCAGCACCTCAGTAGTAATTCCGGTAGATTAGGACTTAGCATTCCGT +TGATCTTACAGGATTTATAAATAAGGAGATCTGTCTTGTTTAATTAGGAGGACGCTTTTC +CCGCGTAAGTACGGGAAAACGTTCTTCTGATTTTGTTTGCCACTTGACATTGTAGCTGCT +AGGAGAAGGGATAATATCCGCGTTTTCTTTTACCGTAACGTCGGAGCATACCATGGTAAT +TGTCCGTGTCAAAACTAGATATCTAGGTTGCAAAATTCAGTCAGTAAGTCCTGAGGCCTT +CCGCATTATTAATTCTACAGACATATGAATTTGCTCCACCGGCTAGCACAGTCAACTCAA +CCCACGATAGGGGAACGAAATCACAAATAGGTTCACATGGTCAATACAAGGCAAACCATT +CCCCATAACTCACGCACTGACGGTAAGGCCATTTCAGGTCAAGCGGTGAATGCTGTGAAA +AGCAGCTCGACCACCTGCCGTGGATGGCAAACCGATAACAAAGGACTCCGATACTTCATT +TGTAAACGTTTGCAGTGCTGACGTAACTCATATCTACAGTCAAACCGAATGGTTTGATCG +GCATTATGTAAAGGAATCGACACACGTTGCGTCTTCTAGATTATTACACACCTGTCTGCG +ACGGATATAGGTAAATAAGTCAGCCTCCACTCTGCAGAAGATACTAGAAACGTATCAGTA +ATAGCTATCAGGATTTCGCCATCCTCGCACTGTGCCCGGATATCACAGCAAGATTCTAGG +ATGGCACTTGTGTGACTAGAGGTTTTACTCGTTGAGCCATTCTTACTATAGGCATGGGAT +TACAATGTGCATGTTTGTGATGTTATCCCATATCTTGCATGTATCAGCCTACCAATTAGA +CATATGACTAGATGTAGTCGATCAACGCAAGGGTGCGGACTTTGATTCCTTTTGAATTGA +AGTCAACTCAGATGCTCCTTAAGACGTTTTACAGTAGGTATTTTGTGGTACAAACCAGAA +CCAGTGCCAGTCGGTAGTTATTGTAGTGTGTTCTTAATACATATTTGGTATTGGAGTTTC +TAACATTTAAAAGGAGCCTATTACACTTACTTAATTTGCGTCTATATTTCTGTTACGATA +TGTCGTCTGTCGATTTTACGAGTTTCATACGTGCGGGTTCCCTGTTCGCAATGGGCCCCT +TGCTAATGTCCCGCATCTTTAGGATGCAAACTTACTCACGCCTCCTTTACCGAGACTTGG +TGGGAGAGAAGACTCCTGTAGAATCCCGATCTGAATGGTTTCAGTGTAAGGGTCCCTTCT +AGCCATATCATTGAATATTCTTGTACTTTAAGTAACTCGATCCTACCAGTACAATTCTAG +GTTTGCCTTATAGCCGGAATGAGTATCAGCGTCATTCACCCCGGCCGGATATTATTTGCA +ATGTCAGGGACACCCAAAATAGACCGGTTAGAAGGCATATGCGATGAGAGTTGGTGCCTA +AATTAAACGATACAATTGATATGACAAGGACTATACGATGAAATCCATGAGATAATTATC +GTAACTCGGCCAACCTAAAACCGTGCAAGATAGGAGCGGTCCTAGAAGTACTATCGACAC +CTTAAATACTCACTTGAGTTTTCCGATCCTATAGTGCCAATCATATGGCGCAGGAATATT +ACAAACTAAGAAAGTCAACAAAAGATGTAAATTGCAACACCTGGCATCGGTGGGGTTGTC +CCCTTAAACCCTGAAACCAACTGTTATGCTCAACATTATATCGAGGCTAAAACGCGTATC +GTGGCACATTAATAACGATCACATAAGCTTTGCGGCTAGCAATAATAATTTAGGACAGCT +TAGATTTTGACCCGTGCTAATCCTCAGTATGGAGTAATTTTACGGATCTCTCGTTGTAAC +CGTCCTCAGTCGTGTACATTTTAACCTTTGTAAACTAGTTTACGAACGAGTATTTAGAAG +GTCCGTACTCTCACCCAACTGACACATTGTACTAGCTCAAGATCGCAAACACTAAGGGTG +TGAGTCGCGGGATAGCGCTTAAATATGACTGCTAATGGTCAAGAGCACGCGCATAATATT +CCACTGGTTCTAGGTCACCACTACGGTCAGACGTTGACCTGCATGCCCTACATCCGGCAC +GGGCTACTAACGGCCTAATATTCTTTGAGCCATATCCATACTCGTCTATGCATATTCAGG +TATACGGCTATAGTGCGTTATTAACTTCGTCGTGATTAAATCCTTTAATTGTTCCATTAT +AAGTATACATGCTTAGATGCGTGAACTTGAGGGATATCGTTGCTCTAAAGTTGTCTTATA +GACTAAATCTAAACAAGCCGTGCAAGACTACTTAAATTACAAATCTTACAGACATCTCGC +CACTGCGCTAACACTAACAA diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai deleted file mode 120000 index 8158c3c..0000000 --- a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/chr_test.fa.fai \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/chr_test.fa.fai b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai new file mode 100644 index 0000000..4e99d5b --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/chr_test.fa.fai @@ -0,0 +1 @@ +chr_test 20000 10 60 61 diff --git a/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py new file mode 100644 index 0000000..cb9d937 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/generate_realistic_reference.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +""" +Generate a realistic ~20kb non-repetitive reference sequence for ATAC-seq testing. + +Properties: + - ~42% GC content (human-like) + - No homopolymer runs > 5bp + - High k-mer uniqueness (>99% unique 20-mers) + - Deterministic (seeded RNG) + +Output: chr_test.fa with contig name 'chr_test' +""" + +import random +import sys + +SEED = 42 +LENGTH = 20000 +CONTIG = "chr_test" +LINE_WIDTH = 60 +MAX_HOMOPOLYMER = 5 + +# Target base frequencies for ~42% GC +# A=29%, T=29%, G=21%, C=21% +BASES = "ATGC" +WEIGHTS = [0.29, 0.29, 0.21, 0.21] + + +def generate_sequence(length, seed=SEED): + """Generate a non-repetitive sequence with controlled GC content.""" + rng = random.Random(seed) + + seq = [] + homopolymer_count = 0 + last_base = None + + for _ in range(length): + # Pick a base using weighted random + base = rng.choices(BASES, weights=WEIGHTS, k=1)[0] + + # Prevent long homopolymers + if base == last_base: + homopolymer_count += 1 + if homopolymer_count >= MAX_HOMOPOLYMER: + # Force a different base + alternatives = [b for b in BASES if b != base] + alt_weights = [WEIGHTS[BASES.index(b)] for b in alternatives] + total = sum(alt_weights) + alt_weights = [w / total for w in alt_weights] + base = rng.choices(alternatives, weights=alt_weights, k=1)[0] + homopolymer_count = 1 + else: + homopolymer_count = 1 + + seq.append(base) + last_base = base + + return "".join(seq) + + +def validate_sequence(seq): + """Validate sequence properties.""" + gc = sum(1 for b in seq if b in "GC") / len(seq) + + # Check k-mer uniqueness + kmers_20 = set() + for i in range(len(seq) - 19): + kmers_20.add(seq[i : i + 20]) + unique_20 = len(kmers_20) + total_20 = len(seq) - 19 + uniqueness = unique_20 / total_20 + + # Check max homopolymer + max_hp = 1 + current_hp = 1 + for i in range(1, len(seq)): + if seq[i] == seq[i - 1]: + current_hp += 1 + max_hp = max(max_hp, current_hp) + else: + current_hp = 1 + + return { + "length": len(seq), + "gc_content": gc, + "unique_20mers": unique_20, + "total_20mers": total_20, + "uniqueness_pct": uniqueness * 100, + "max_homopolymer": max_hp, + } + + +def write_fasta(seq, contig, filepath, line_width=LINE_WIDTH): + """Write sequence as FASTA.""" + with open(filepath, "w") as f: + f.write(f">{contig}\n") + for i in range(0, len(seq), line_width): + f.write(seq[i : i + line_width] + "\n") + + +def main(): + output = sys.argv[1] if len(sys.argv) > 1 else "chr_test.fa" + + print(f"Generating {LENGTH}bp non-repetitive reference sequence...") + seq = generate_sequence(LENGTH) + + stats = validate_sequence(seq) + print(f" Length: {stats['length']}bp") + print(f" GC content: {stats['gc_content']:.1%}") + print(f" Unique 20-mers: {stats['unique_20mers']}/{stats['total_20mers']} ({stats['uniqueness_pct']:.1f}%)") + print(f" Max homopolymer: {stats['max_homopolymer']}bp") + + # Validate + assert stats["gc_content"] > 0.38 and stats["gc_content"] < 0.46, f"GC content out of range: {stats['gc_content']}" + assert stats["uniqueness_pct"] > 99.0, f"Uniqueness too low: {stats['uniqueness_pct']}" + assert stats["max_homopolymer"] <= MAX_HOMOPOLYMER, f"Homopolymer too long: {stats['max_homopolymer']}" + + write_fasta(seq, CONTIG, output) + print(f" Wrote {output}") + + +if __name__ == "__main__": + main() diff --git a/pipelines/nf-atacseq/tests/data/generate_test_data.sh b/pipelines/nf-atacseq/tests/data/generate_test_data.sh index f5cb288..9cb2eb4 100755 --- a/pipelines/nf-atacseq/tests/data/generate_test_data.sh +++ b/pipelines/nf-atacseq/tests/data/generate_test_data.sh @@ -1,11 +1,18 @@ #!/bin/bash # ============================================================================= -# WASP2 nf-atacseq Test Data Generator +# WASP2 nf-atacseq Test Data Generator (v2 — realistic reference) # ============================================================================= -# Creates ATAC-seq-like test data by symlinking shared core data and generating -# pipeline-specific files (shorter fragment FASTQs, BWA index, samplesheet). +# Generates self-contained ATAC-seq test data with a non-repetitive reference +# so BWA alignment produces meaningful mapping rates (>80%). # -# Prerequisites: samtools, bgzip, tabix, wgsim, bwa (WASP2_dev2 conda env) +# Previous version used the shared chr_test.fa which is a repetitive ATGC +# pattern yielding ~0% mapping. This version generates its own reference. +# +# To produce non-zero allele counts, reads are simulated from BOTH haplotypes: +# half from the REF haplotype, half from an ALT haplotype with het SNPs applied. +# +# Prerequisites: python3, samtools, bgzip, tabix, wgsim, bwa +# (all available in WASP2_dev2 conda env or WASP2 micromamba env) # # Usage: # cd pipelines/nf-atacseq/tests/data @@ -17,106 +24,307 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" -SHARED_DATA="../../../../tests/shared_data" +# BWA may not be in PATH; check common conda/micromamba locations +if ! command -v bwa &>/dev/null; then + for candidate in \ + /usr/local/Cellar/micromamba/*/envs/WASP2/bin/bwa \ + /usr/local/Cellar/micromamba/*/envs/WASP2_dev2/bin/bwa \ + "${HOME}/miniforge3/envs/WASP2/bin/bwa" \ + "${HOME}/miniconda3/envs/WASP2/bin/bwa"; do + if [[ -x "$candidate" ]]; then + export PATH="$(dirname "$candidate"):$PATH" + break + fi + done +fi echo "===================================================================" -echo " WASP2 nf-atacseq Test Data Generator" +echo " WASP2 nf-atacseq Test Data Generator (v2)" echo "===================================================================" -# Validate shared core data exists -if [[ ! -f "$SHARED_DATA/chr_test.fa" ]]; then - echo "ERROR: Shared core data not found at $SHARED_DATA" - echo " Run: cd tests/shared_data && bash generate_core_data.sh" - exit 1 -fi - # ----------------------------------------------------------------------------- -# Symlink shared reference and variants +# Check prerequisites # ----------------------------------------------------------------------------- -echo "[1/4] Symlinking shared reference data..." +echo "[0/7] Checking prerequisites..." + +check_tool() { + if ! command -v "$1" &>/dev/null; then + echo "ERROR: $1 is required but not found in PATH" + echo " Try: conda activate WASP2_dev2" + exit 1 + fi + echo " OK: $1" +} + +check_tool python3 +check_tool samtools +check_tool bwa +check_tool wgsim +check_tool bgzip +check_tool tabix +echo "" +# ----------------------------------------------------------------------------- +# Clean stale symlinks and old data (one-time migration from v1) +# ----------------------------------------------------------------------------- +echo "[1/7] Cleaning stale data..." for f in chr_test.fa chr_test.fa.fai variants.vcf.gz variants.vcf.gz.tbi annotation.gtf regions.bed; do - if [[ ! -e "$f" ]]; then - ln -sf "$SHARED_DATA/$f" "$f" - echo " ✓ Linked $f" - else - echo " - $f already exists" + if [[ -L "$f" ]]; then + rm -f "$f" + echo " Removed symlink: $f" fi done +rm -rf bwa_index +rm -f sample1_R1.fq.gz sample1_R2.fq.gz +rm -f chr_test.fa chr_test.fa.fai variants.vcf variants.vcf.gz variants.vcf.gz.tbi regions.bed +echo " Cleaned previous outputs" +echo "" + +# ----------------------------------------------------------------------------- +# Generate realistic non-repetitive reference +# ----------------------------------------------------------------------------- +echo "[2/7] Generating realistic reference genome..." +python3 "${SCRIPT_DIR}/generate_realistic_reference.py" chr_test.fa +samtools faidx chr_test.fa +echo " Created chr_test.fa + .fai" +echo "" +# ----------------------------------------------------------------------------- +# Generate VCF with ~30 het SNPs + ALT haplotype reference +# ----------------------------------------------------------------------------- +echo "[3/7] Creating VCF with 30 het SNPs and ALT haplotype..." + +python3 - <<'PYEOF' +import random + +# Read reference +with open("chr_test.fa") as f: + lines = f.readlines() +seq = "".join(l.strip() for l in lines[1:]) + +# Deterministic SNP positions spread across the reference +rng = random.Random(99) +positions = sorted(rng.sample(range(200, 19800), 30)) + +# Transition mapping for plausible variants +transitions = {"A": "G", "G": "A", "T": "C", "C": "T"} + +# --- Write VCF --- +vcf_lines = [] +vcf_lines.append("##fileformat=VCFv4.2") +vcf_lines.append("##source=WASP2_nf_atacseq_test_data_v2") +vcf_lines.append("##reference=chr_test.fa") +vcf_lines.append("##contig=") +vcf_lines.append('##INFO=') +vcf_lines.append('##FORMAT=') +vcf_lines.append('##FORMAT=') +vcf_lines.append("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tsample1") + +snp_map = {} # pos -> (ref, alt) +for i, pos in enumerate(positions): + ref = seq[pos - 1] # 1-based + alt = transitions[ref] + snp_id = f"snp{i+1:03d}" + vcf_lines.append( + f"chr_test\t{pos}\t{snp_id}\t{ref}\t{alt}\t100\tPASS\tDP=50\tGT:DP\t0|1:50" + ) + snp_map[pos] = (ref, alt) + +with open("variants.vcf", "w") as f: + f.write("\n".join(vcf_lines) + "\n") + +print(f" Created variants.vcf with {len(positions)} het SNPs") + +# --- Write ALT haplotype reference --- +alt_seq = list(seq) +for pos, (ref, alt) in snp_map.items(): + assert alt_seq[pos - 1] == ref, f"Mismatch at {pos}: expected {ref}, got {alt_seq[pos-1]}" + alt_seq[pos - 1] = alt + +with open("chr_test_alt.fa", "w") as f: + f.write(">chr_test\n") + alt_str = "".join(alt_seq) + for i in range(0, len(alt_str), 60): + f.write(alt_str[i:i+60] + "\n") + +print(f" Created chr_test_alt.fa (ALT haplotype with {len(snp_map)} substitutions)") +PYEOF + +# Compress and index +rm -f variants.vcf.gz variants.vcf.gz.tbi +bgzip -c variants.vcf > variants.vcf.gz +tabix -p vcf variants.vcf.gz +echo " Created variants.vcf.gz + .tbi" echo "" # ----------------------------------------------------------------------------- -# Simulate ATAC-seq-like reads (shorter fragments, 150-250bp) +# Create regions BED covering all SNP positions # ----------------------------------------------------------------------------- -echo "[2/4] Simulating ATAC-seq reads..." +echo "[4/7] Creating regions BED..." + +python3 - <<'PYEOF' +import random + +rng = random.Random(99) +positions = sorted(rng.sample(range(200, 19800), 30)) -NUM_READS=500 +# Create ~500bp regions centered on each SNP, merge overlapping +regions = [] +for pos in positions: + start = max(0, pos - 250) + end = min(20000, pos + 250) + regions.append((start, end)) + +# Merge overlapping regions +merged = [regions[0]] +for start, end in regions[1:]: + if start <= merged[-1][1]: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + else: + merged.append((start, end)) + +with open("regions.bed", "w") as f: + for i, (start, end) in enumerate(merged): + f.write(f"chr_test\t{start}\t{end}\tpeak_{i+1}\n") + +print(f" Created regions.bed with {len(merged)} peaks covering {len(positions)} SNPs") +PYEOF +echo "" + +# ----------------------------------------------------------------------------- +# Simulate ATAC-seq reads from BOTH haplotypes (REF + ALT) +# ----------------------------------------------------------------------------- +echo "[5/7] Simulating ATAC-seq paired-end reads (dual haplotype)..." + +# 20kb genome, 75bp reads, ~20x total coverage +# Split: ~1350 pairs from REF, ~1350 pairs from ALT +NUM_READS_PER_HAP=1350 READ_LEN=75 FRAG_SIZE=180 FRAG_STD=30 ERROR_RATE=0.001 -SEED=100 - -if [[ -f "sample1_R1.fq.gz" && -f "sample1_R2.fq.gz" ]]; then - echo " FASTQs already exist, skipping" -else - wgsim -N $NUM_READS \ - -1 $READ_LEN \ - -2 $READ_LEN \ - -r 0 -R 0 -X 0 \ - -e $ERROR_RATE \ - -S $SEED \ - -d $FRAG_SIZE \ - -s $FRAG_STD \ - "$SHARED_DATA/chr_test.fa" \ - sample1_R1.fq \ - sample1_R2.fq \ - > /dev/null 2>&1 - - gzip -f sample1_R1.fq - gzip -f sample1_R2.fq - echo " ✓ Created sample1_R{1,2}.fq.gz (${NUM_READS} pairs, ${READ_LEN}bp, ${FRAG_SIZE}bp frags)" -fi +# Simulate from REF haplotype +wgsim -N $NUM_READS_PER_HAP \ + -1 $READ_LEN \ + -2 $READ_LEN \ + -r 0 -R 0 -X 0 \ + -e $ERROR_RATE \ + -S 100 \ + -d $FRAG_SIZE \ + -s $FRAG_STD \ + chr_test.fa \ + ref_R1.fq \ + ref_R2.fq \ + > /dev/null 2>&1 +echo " Simulated ${NUM_READS_PER_HAP} pairs from REF haplotype" + +# Simulate from ALT haplotype +wgsim -N $NUM_READS_PER_HAP \ + -1 $READ_LEN \ + -2 $READ_LEN \ + -r 0 -R 0 -X 0 \ + -e $ERROR_RATE \ + -S 200 \ + -d $FRAG_SIZE \ + -s $FRAG_STD \ + chr_test_alt.fa \ + alt_R1.fq \ + alt_R2.fq \ + > /dev/null 2>&1 +echo " Simulated ${NUM_READS_PER_HAP} pairs from ALT haplotype" + +# Combine and compress +cat ref_R1.fq alt_R1.fq | gzip -c > sample1_R1.fq.gz +cat ref_R2.fq alt_R2.fq | gzip -c > sample1_R2.fq.gz +echo " Combined into sample1_R{1,2}.fq.gz ($((NUM_READS_PER_HAP * 2)) total pairs)" + +# Clean up temporary files +rm -f ref_R1.fq ref_R2.fq alt_R1.fq alt_R2.fq chr_test_alt.fa echo "" # ----------------------------------------------------------------------------- -# Build BWA index (for local testing) +# Build BWA index # ----------------------------------------------------------------------------- -echo "[3/4] Building BWA index..." +echo "[6/7] Building BWA index..." BWA_INDEX_DIR="bwa_index" -if [[ -f "${BWA_INDEX_DIR}/chr_test.fa.bwt" ]]; then - echo " BWA index already exists, skipping" -else - mkdir -p "$BWA_INDEX_DIR" - cp "$SHARED_DATA/chr_test.fa" "$BWA_INDEX_DIR/" - bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2 - echo " ✓ Created BWA index ($(du -sh $BWA_INDEX_DIR | cut -f1))" -fi - +mkdir -p "$BWA_INDEX_DIR" +cp chr_test.fa "$BWA_INDEX_DIR/" +bwa index "$BWA_INDEX_DIR/chr_test.fa" 2>&1 | tail -2 +echo " Created BWA index" echo "" # ----------------------------------------------------------------------------- -# Create test samplesheet +# Create samplesheets (both test and local variants) # ----------------------------------------------------------------------------- -echo "[4/4] Creating test samplesheet..." +echo "[7/7] Creating samplesheets..." -SAMPLESHEET="samplesheet_test.csv" -if [[ -f "$SAMPLESHEET" ]]; then - echo " $SAMPLESHEET already exists, skipping" -else - cat > "$SAMPLESHEET" << EOF +# test samplesheet uses absolute paths +cat > samplesheet_test.csv << EOF sample,fastq_1,fastq_2,sample_name -test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,SAMPLE1 +test_sample1,${SCRIPT_DIR}/sample1_R1.fq.gz,${SCRIPT_DIR}/sample1_R2.fq.gz,sample1 EOF - echo " ✓ Created $SAMPLESHEET" -fi +echo " Created samplesheet_test.csv" + +# local samplesheet uses ${projectDir} relative paths (for nextflow) +cat > samplesheet_local.csv << 'EOF' +sample,fastq_1,fastq_2,sample_name +test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1 +EOF +echo " Created samplesheet_local.csv" + +# ----------------------------------------------------------------------------- +# Quick validation +# ----------------------------------------------------------------------------- +echo "" +echo "===================================================================" +echo " Validation" +echo "===================================================================" + +# Check BWA alignment quality +echo "" +echo "--- Quick alignment test (first 100 pairs) ---" +bwa mem -t 2 \ + -R "@RG\tID:sample1\tSM:sample1\tPL:ILLUMINA\tLB:lib1" \ + "$BWA_INDEX_DIR/chr_test.fa" \ + <(gunzip -c sample1_R1.fq.gz | head -400) \ + <(gunzip -c sample1_R2.fq.gz | head -400) \ + 2>/dev/null \ +| samtools flagstat - 2>/dev/null + +echo "" + +# Check VCF REF alleles match reference +echo "--- VCF REF allele validation ---" +python3 - <<'PYEOF' +seq_lines = open("chr_test.fa").readlines() +seq = "".join(l.strip() for l in seq_lines[1:]) + +errors = 0 +total = 0 +with open("variants.vcf") as f: + for line in f: + if line.startswith("#"): + continue + fields = line.strip().split("\t") + pos = int(fields[1]) + ref = fields[3] + actual = seq[pos - 1] + total += 1 + if ref != actual: + print(f" MISMATCH at pos {pos}: VCF REF={ref}, actual={actual}") + errors += 1 + +if errors == 0: + print(f" All {total} REF alleles match reference") +else: + print(f" {errors}/{total} mismatches found!") +PYEOF echo "" echo "===================================================================" -echo " SUCCESS! nf-atacseq test data generated." +echo " SUCCESS! nf-atacseq test data generated (v2)." echo "===================================================================" echo "Total: $(du -sh . | cut -f1)" echo "" diff --git a/pipelines/nf-atacseq/tests/data/real_counts.tsv b/pipelines/nf-atacseq/tests/data/real_counts.tsv new file mode 120000 index 0000000..08351b0 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/real_counts.tsv @@ -0,0 +1 @@ +../../../../tests/shared_data/expected_counts.tsv \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/real_test.bam b/pipelines/nf-atacseq/tests/data/real_test.bam new file mode 120000 index 0000000..21f7b54 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/real_test.bam @@ -0,0 +1 @@ +../../../../tests/shared_data/sample1.bam \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/real_test.bam.bai b/pipelines/nf-atacseq/tests/data/real_test.bam.bai new file mode 120000 index 0000000..0037730 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/real_test.bam.bai @@ -0,0 +1 @@ +../../../../tests/shared_data/sample1.bam.bai \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/real_wasp_data.json b/pipelines/nf-atacseq/tests/data/real_wasp_data.json new file mode 120000 index 0000000..bd05953 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/real_wasp_data.json @@ -0,0 +1 @@ +../../../../tests/shared_data/wasp_data.json \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed deleted file mode 120000 index da6c378..0000000 --- a/pipelines/nf-atacseq/tests/data/regions.bed +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/regions.bed \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/regions.bed b/pipelines/nf-atacseq/tests/data/regions.bed new file mode 100644 index 0000000..9b399eb --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/regions.bed @@ -0,0 +1,15 @@ +chr_test 2668 3393 peak_1 +chr_test 4316 4816 peak_2 +chr_test 4939 5439 peak_3 +chr_test 5808 6315 peak_4 +chr_test 6486 7995 peak_5 +chr_test 8090 8679 peak_6 +chr_test 11052 11552 peak_7 +chr_test 12204 13167 peak_8 +chr_test 13187 13687 peak_9 +chr_test 13766 14266 peak_10 +chr_test 15071 15819 peak_11 +chr_test 15982 16482 peak_12 +chr_test 17274 18099 peak_13 +chr_test 18526 19026 peak_14 +chr_test 19230 19730 peak_15 diff --git a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz index 2d8e601..88debd7 100644 Binary files a/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz differ diff --git a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz index 76535bc..8767790 100644 Binary files a/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz and b/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz differ diff --git a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv index d50c362..cf1884a 100644 --- a/pipelines/nf-atacseq/tests/data/samplesheet_test.csv +++ b/pipelines/nf-atacseq/tests/data/samplesheet_test.csv @@ -1,2 +1,2 @@ sample,fastq_1,fastq_2,sample_name -test_sample1,${projectDir}/tests/data/sample1_R1.fq.gz,${projectDir}/tests/data/sample1_R2.fq.gz,sample1 +test_sample1,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R1.fq.gz,/Users/jeffjaureguy/Desktop/WASP2/pipelines/nf-atacseq/tests/data/sample1_R2.fq.gz,sample1 diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf b/pipelines/nf-atacseq/tests/data/variants.vcf new file mode 100644 index 0000000..e3e67f2 --- /dev/null +++ b/pipelines/nf-atacseq/tests/data/variants.vcf @@ -0,0 +1,38 @@ +##fileformat=VCFv4.2 +##source=WASP2_nf_atacseq_test_data_v2 +##reference=chr_test.fa +##contig= +##INFO= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample1 +chr_test 2918 snp001 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3037 snp002 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3077 snp003 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 3143 snp004 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 4566 snp005 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 5189 snp006 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6058 snp007 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6065 snp008 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6736 snp009 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 6756 snp010 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7166 snp011 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7300 snp012 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 7745 snp013 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 8340 snp014 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 8429 snp015 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 11302 snp016 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12454 snp017 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12676 snp018 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12752 snp019 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 12917 snp020 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 13437 snp021 A G 100 PASS DP=50 GT:DP 0|1:50 +chr_test 14016 snp022 G A 100 PASS DP=50 GT:DP 0|1:50 +chr_test 15321 snp023 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 15569 snp024 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 16232 snp025 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17524 snp026 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17593 snp027 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 17849 snp028 T C 100 PASS DP=50 GT:DP 0|1:50 +chr_test 18776 snp029 C T 100 PASS DP=50 GT:DP 0|1:50 +chr_test 19480 snp030 T C 100 PASS DP=50 GT:DP 0|1:50 diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz deleted file mode 120000 index 380b7aa..0000000 --- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/variants.vcf.gz \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz b/pipelines/nf-atacseq/tests/data/variants.vcf.gz new file mode 100644 index 0000000..d7a7f83 Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz differ diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi deleted file mode 120000 index 7a95bbe..0000000 --- a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi +++ /dev/null @@ -1 +0,0 @@ -../../../../tests/shared_data/variants.vcf.gz.tbi \ No newline at end of file diff --git a/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi new file mode 100644 index 0000000..98e77de Binary files /dev/null and b/pipelines/nf-atacseq/tests/data/variants.vcf.gz.tbi differ diff --git a/pipelines/nf-atacseq/tests/default.nf.test b/pipelines/nf-atacseq/tests/default.nf.test new file mode 100644 index 0000000..bcba730 --- /dev/null +++ b/pipelines/nf-atacseq/tests/default.nf.test @@ -0,0 +1,28 @@ +nextflow_pipeline { + + name "Test default pipeline run" + script "../main.nf" + + tag "pipeline" + tag "default" + + test("Should run default stub-run mode") { + + options "-stub" + + when { + params { + outdir = "$outputDir" + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/pipeline_info/versions.yml") + ).match() } + ) + } + } +} diff --git a/pipelines/nf-atacseq/tests/main.nf.test b/pipelines/nf-atacseq/tests/main.nf.test index 48607fd..d47aeb7 100644 --- a/pipelines/nf-atacseq/tests/main.nf.test +++ b/pipelines/nf-atacseq/tests/main.nf.test @@ -13,12 +13,17 @@ nextflow_pipeline { when { params { - outdir = "$outputDir/results" + outdir = "$outputDir" } } then { - assert workflow.success + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/pipeline_info/versions.yml") + ).match("stub_versions") } + ) } } diff --git a/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test b/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test index 886c80a..e916334 100644 --- a/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test +++ b/pipelines/nf-atacseq/tests/modules/local/wasp2_filter_remapped.nf.test @@ -7,8 +7,72 @@ nextflow_process { tag "modules" tag "modules_local" tag "wasp2" + tag "wasp2_filter_remapped" - test("Should filter remapped reads and produce WASP-corrected BAM - stub") { + // ------------------------------------------------------------------------- + // Real test: requires WASP2 container with wasp2-map and samtools. + // Input files must be actual BAM/JSON from a prior make-reads + remap step. + // To generate these, run the pipeline test profile or use: + // cd tests/data && bash generate_test_data.sh + // then run make-reads + bwa remap manually on the outputs. + // + // Until real intermediate files are generated, this test is tagged + // 'wasp2_filter_remapped_real' so it can be selectively skipped. + // ------------------------------------------------------------------------- + + test("wasp2_filter_remapped - real") { + + tag "wasp2_filter_remapped_real" + + when { + process { + """ + // Uses shared test data symlinked into tests/data/ + // real_test.bam -> shared_data/sample1.bam (aligned reads) + // These serve as stand-ins; a full integration test requires + // actual make-reads output. The process will validate via + // wasp2-map filter-remapped + samtools index + samtools flagstat. + input[0] = [ + [ id:'test_real', single_end:false ], + file("${projectDir}/tests/data/real_test.bam"), + file("${projectDir}/tests/data/real_test.bam.bai"), + file("${projectDir}/tests/data/real_test.bam"), + file("${projectDir}/tests/data/real_test.bam"), + file("${projectDir}/tests/data/real_wasp_data.json") + ] + """ + } + } + + then { + assertAll( + // Process completes successfully + { assert process.success }, + + // BAM output: filtered WASP-corrected BAM + index + { assert process.out.bam.size() == 1 }, + { assert path(process.out.bam[0][1]).exists() }, + { assert path(process.out.bam[0][1]).toFile().size() > 0 }, + { assert path(process.out.bam[0][2]).exists() }, + { assert path(process.out.bam[0][2]).toFile().size() > 0 }, + + // Stats output: samtools flagstat results + { assert process.out.stats.size() == 1 }, + { assert path(process.out.stats[0][1]).exists() }, + { assert path(process.out.stats[0][1]).text.contains("mapped") }, + + // versions.yml emitted with both tools + { assert process.out.versions.size() == 1 }, + { assert path(process.out.versions[0]).text.contains("wasp2") }, + { assert path(process.out.versions[0]).text.contains("samtools") }, + + // meta map preserved + { assert process.out.bam[0][0].id == 'test_real' } + ) + } + } + + test("wasp2_filter_remapped - stub") { options "-stub-run" @@ -28,10 +92,23 @@ nextflow_process { } then { - assert process.success - assert process.out.bam - assert process.out.stats - assert process.out.versions + assertAll( + { assert process.success }, + + // All output channels emitted + { assert process.out.bam.size() == 1 }, + { assert process.out.stats.size() == 1 }, + { assert process.out.versions.size() == 1 }, + + // Stub versions contain expected tools + { assert snapshot(process.out.versions).match("versions_stub") }, + + // Stats file exists with stub content + { assert path(process.out.stats[0][1]).exists() }, + + // meta map preserved through stub + { assert process.out.bam[0][0].id == 'test_sample' } + ) } } } diff --git a/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test b/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test index 1ceadaa..79e7969 100644 --- a/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test +++ b/pipelines/nf-atacseq/tests/modules/local/wasp2_make_reads.nf.test @@ -7,14 +7,15 @@ nextflow_process { tag "modules" tag "modules_local" tag "wasp2" + tag "wasp2_make_reads" - test("Should generate swapped-allele reads for remapping - real") { + test("wasp2_make_reads - paired_end - real") { when { process { """ input[0] = [ - [ id:'test_real', single_end:false, sample_name:'SAMPLE1' ], + [ id:'test_real', single_end:false, sample_name:'sample1' ], file("${projectDir}/tests/data/real_test.bam"), file("${projectDir}/tests/data/real_test.bam.bai") ] @@ -25,16 +26,42 @@ nextflow_process { then { assertAll( + // Process completes successfully { assert process.success }, + + // FASTQ outputs: paired-end swapped-allele reads + { assert process.out.fastq.size() == 1 }, + { assert path(process.out.fastq[0][1]).exists() }, + { assert path(process.out.fastq[0][2]).exists() }, + { assert path(process.out.fastq[0][1]).toFile().size() > 0 }, + { assert path(process.out.fastq[0][2]).toFile().size() > 0 }, + + // to_remap BAM: reads that overlap variants and need remapping { assert process.out.to_remap_bam.size() == 1 }, + { assert path(process.out.to_remap_bam[0][1]).exists() }, + { assert path(process.out.to_remap_bam[0][1]).toFile().size() > 0 }, + + // keep BAM: reads that don't overlap variants (pass through) { assert process.out.keep_bam.size() == 1 }, + { assert path(process.out.keep_bam[0][1]).exists() }, + + // JSON: WASP data tracking file for filter-remapped step { assert process.out.json.size() == 1 }, - { assert process.out.versions.size() == 1 } + { assert path(process.out.json[0][1]).exists() }, + { assert path(process.out.json[0][1]).text.length() > 2 }, + + // versions.yml emitted + { assert process.out.versions.size() == 1 }, + { assert path(process.out.versions[0]).text.contains("wasp2") }, + + // meta map preserved + { assert process.out.fastq[0][0].id == 'test_real' }, + { assert process.out.to_remap_bam[0][0].id == 'test_real' } ) } } - test("Should generate swapped-allele reads for remapping - stub") { + test("wasp2_make_reads - paired_end - stub") { options "-stub-run" @@ -42,7 +69,7 @@ nextflow_process { process { """ input[0] = [ - [ id:'test_sample', single_end:false, sample_name:'NA12878' ], + [ id:'test_sample', single_end:false, sample_name:'sample1' ], file("${projectDir}/tests/data/stub_test.bam"), file("${projectDir}/tests/data/stub_test.bam.bai") ] @@ -52,12 +79,22 @@ nextflow_process { } then { - assert process.success - assert process.out.fastq - assert process.out.to_remap_bam - assert process.out.keep_bam - assert process.out.json - assert process.out.versions + assertAll( + { assert process.success }, + + // All output channels emitted + { assert process.out.fastq.size() == 1 }, + { assert process.out.to_remap_bam.size() == 1 }, + { assert process.out.keep_bam.size() == 1 }, + { assert process.out.json.size() == 1 }, + { assert process.out.versions.size() == 1 }, + + // Stub versions contain expected tool + { assert snapshot(process.out.versions).match("versions_stub") }, + + // meta map preserved through stub + { assert process.out.fastq[0][0].id == 'test_sample' } + ) } } } diff --git a/pipelines/nf-atacseq/tests/nextflow.config b/pipelines/nf-atacseq/tests/nextflow.config index 979614b..a87dc0f 100644 --- a/pipelines/nf-atacseq/tests/nextflow.config +++ b/pipelines/nf-atacseq/tests/nextflow.config @@ -3,7 +3,14 @@ * Loaded via nf-test.config configFile directive */ params { - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' +} + +process { + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 6.h + ] } diff --git a/pipelines/nf-atacseq/workflows/atacseq.nf b/pipelines/nf-atacseq/workflows/atacseq.nf index caeff59..2972171 100644 --- a/pipelines/nf-atacseq/workflows/atacseq.nf +++ b/pipelines/nf-atacseq/workflows/atacseq.nf @@ -53,6 +53,7 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_fasta = PREPARE_GENOME.out.fasta + ch_fasta_meta = ch_fasta.map { fasta -> [[id: 'genome'], fasta] } ch_vcf = params.vcf ? Channel.fromPath(params.vcf, checkIfExists: true).collect() : Channel.empty() // @@ -60,7 +61,6 @@ workflow ATACSEQ { // if (!params.skip_fastqc) { FASTQC ( ch_samplesheet ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }) } @@ -69,13 +69,12 @@ workflow ATACSEQ { // if (!params.skip_trimming) { FASTP ( - ch_samplesheet, - [], // adapter_fasta + ch_samplesheet.map { meta, reads -> [meta, reads, []] }, + false, // discard_trimmed_pass false, // save_trimmed_fail false // save_merged ) ch_reads = FASTP.out.reads - ch_versions = ch_versions.mix(FASTP.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.json.collect { it[1] }) } else { ch_reads = ch_samplesheet @@ -94,8 +93,9 @@ workflow ATACSEQ { if (params.aligner == 'bwa') { FASTQ_ALIGN_BWA ( ch_reads, - PREPARE_GENOME.out.bwa_index, - ch_fasta + PREPARE_GENOME.out.bwa_index.map { index -> [[id: 'genome'], index] }, + true, + ch_fasta_meta ) ch_aligned_bam = FASTQ_ALIGN_BWA.out.bam ch_aligned_bai = FASTQ_ALIGN_BWA.out.bai @@ -107,6 +107,8 @@ workflow ATACSEQ { FASTQ_ALIGN_BOWTIE2 ( ch_reads, PREPARE_GENOME.out.bowtie2_index, + false, + true, ch_fasta ) ch_aligned_bam = FASTQ_ALIGN_BOWTIE2.out.bam @@ -130,16 +132,16 @@ workflow ATACSEQ { // SUBWORKFLOW: Mark duplicates with Picard and run BAM stats (optional) // ch_fasta_fai = PREPARE_GENOME.out.fasta_fai + ch_fasta_fai_meta = ch_fasta_fai.map { fai -> [[id: 'genome'], fai] } if (!params.skip_dedup) { BAM_MARKDUPLICATES_PICARD ( ch_bam_indexed.map { meta, bam, bai -> [meta, bam] }, - ch_fasta, - ch_fasta_fai + ch_fasta_meta, + ch_fasta_fai_meta ) ch_bam_dedup = BAM_MARKDUPLICATES_PICARD.out.bam .join(BAM_MARKDUPLICATES_PICARD.out.bai, by: [0], failOnMismatch: true) - ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) // Add deduplication stats to MultiQC ch_multiqc_files = ch_multiqc_files.mix(BAM_MARKDUPLICATES_PICARD.out.metrics.collect { it[1] }) @@ -155,7 +157,7 @@ workflow ATACSEQ { // if (!params.skip_peak_calling) { MACS2_CALLPEAK ( - ch_bam_dedup.map { meta, bam, bai -> [meta, bam] }, + ch_bam_dedup.map { meta, bam, bai -> [meta, bam, []] }, params.macs_gsize ) ch_peaks = MACS2_CALLPEAK.out.peak @@ -238,16 +240,14 @@ workflow ATACSEQ { // ch_multiqc_report = Channel.empty() if (!params.skip_multiqc) { - ch_multiqc_config = Channel.fromPath("${projectDir}/assets/multiqc_config.yml", checkIfExists: false).ifEmpty([]) + def multiqc_config_file = file("${projectDir}/assets/multiqc_config.yml") MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - [], // extra_multiqc_config - [] // multiqc_logo + ch_multiqc_files.collect().map { files -> + [ [id: 'multiqc'], files, multiqc_config_file.exists() ? [multiqc_config_file] : [], [], [], [] ] + } ) - ch_multiqc_report = MULTIQC.out.report - ch_versions = ch_versions.mix(MULTIQC.out.versions) + ch_multiqc_report = MULTIQC.out.report.map { meta, report -> report } } emit: diff --git a/pipelines/nf-outrider/conf/test_local.config b/pipelines/nf-outrider/conf/test_local.config index b988b75..7f22492 100644 --- a/pipelines/nf-outrider/conf/test_local.config +++ b/pipelines/nf-outrider/conf/test_local.config @@ -26,3 +26,12 @@ params { outrider_min_samples = 3 outrider_min_count = 1 // Low threshold for simulated test data (~5x coverage) } + +// Override base.config resourceLimits so local workstations don't OOM +process { + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 1.h + ] +} diff --git a/pipelines/nf-outrider/nextflow.config b/pipelines/nf-outrider/nextflow.config index a70e9d3..03e66cd 100644 --- a/pipelines/nf-outrider/nextflow.config +++ b/pipelines/nf-outrider/nextflow.config @@ -6,10 +6,17 @@ ---------------------------------------------------------------------------------------- */ +// Plugin configuration +plugins { + id 'nf-validation@1.1.3' +} + // Pipeline metadata manifest { name = 'wasp2/nf-outrider' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'WASP2 + OUTRIDER for aberrant expression and mono-allelic expression detection' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -65,11 +72,6 @@ params { // Processing options skip_multiqc = false - // Resource limits - max_cpus = 16 - max_memory = '128.GB' - max_time = '240.h' - // Institutional config support (nf-core compatible) custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master' custom_config_version = 'master' @@ -77,6 +79,7 @@ params { // Generic options help = false version = false + validate_params = true tracedir = "${params.outdir}/pipeline_info" } @@ -84,11 +87,11 @@ params { includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -// Load nf-core institutional configs +// Load nf-core custom profiles from https://github.com/nf-core/configs try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}") } // Execution profiles @@ -102,7 +105,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { docker.enabled = true @@ -158,15 +160,15 @@ profiles { def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { enabled = true @@ -196,32 +198,33 @@ process { process.shell = ['/bin/bash', '-euo', 'pipefail'] // Function to ensure resources don't exceed limits +// Resource capping is handled by process.resourceLimits in conf/base.config. +// This function is retained for backward compatibility with process label closures. def check_max(obj, type) { if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println "WARNING: Invalid max_memory '${params.max_memory}', using default" + def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid memory config: ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println "WARNING: Invalid max_time '${params.max_time}', using default" + def max = (params.max_time as nextflow.util.Duration) ?: 240.h + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid time config: ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println "WARNING: Invalid max_cpus '${params.max_cpus}', using default" + try { return Math.min(obj, (params.max_cpus ?: 16) as int) } + catch (Exception e) { + log.warn "Invalid CPU config: ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-rnaseq/conf/test_local.config b/pipelines/nf-rnaseq/conf/test_local.config index 5eaae60..e8063d6 100644 --- a/pipelines/nf-rnaseq/conf/test_local.config +++ b/pipelines/nf-rnaseq/conf/test_local.config @@ -25,3 +25,12 @@ params { // Lower thresholds for small test dataset min_count = 1 } + +// Override base.config resourceLimits so local workstations don't OOM +process { + resourceLimits = [ + cpus: 2, + memory: 4.GB, + time: 2.h + ] +} diff --git a/pipelines/nf-rnaseq/nextflow.config b/pipelines/nf-rnaseq/nextflow.config index f26865d..5625145 100644 --- a/pipelines/nf-rnaseq/nextflow.config +++ b/pipelines/nf-rnaseq/nextflow.config @@ -4,6 +4,22 @@ ======================================================================================== */ +plugins { + id 'nf-validation@1.1.3' +} + +// Pipeline manifest +manifest { + name = 'wasp2/nf-rnaseq' + author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' + description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2' + mainScript = 'main.nf' + nextflowVersion = '!>=23.04.0' + version = '1.0.0' +} + // Global default params params { // Pipeline options @@ -39,10 +55,8 @@ params { // ML Output options output_format = null // ML output formats: zarr,parquet,anndata (comma-separated) - // Resource limits - max_cpus = 16 - max_memory = '128.GB' - max_time = '240.h' + // Validation + validate_params = true // Trace directory tracedir = "${params.outdir}/pipeline_info" @@ -56,11 +70,11 @@ params { includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -// Load nf-core institutional configs +// Load nf-core custom profiles from https://github.com/nf-core/configs try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}") } profiles { @@ -74,7 +88,6 @@ profiles { conda.enabled = true docker.enabled = false singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" } docker { @@ -84,6 +97,13 @@ profiles { singularity.enabled = false } + arm { + // Apple Silicon / ARM64 compatibility — forces linux/amd64 containers + // via Rosetta 2 emulation. Combine with a container profile: + // nextflow run main.nf -profile docker,arm [options] + includeConfig 'conf/arm.config' + } + singularity { singularity.enabled = true singularity.autoMounts = true @@ -136,37 +156,22 @@ profiles { } } -// Container overrides -def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0' -def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' -process { - withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' { - container = wasp2_container - } - withName: 'STAR_ALIGN.*' { - container = star_container - } -} - -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] - // Execution reports def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.tracedir}/timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.tracedir}/report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.tracedir}/trace_${trace_timestamp}.txt" } dag { @@ -181,44 +186,49 @@ env { R_ENVIRON_USER = "/.Renviron" } -// Pipeline manifest -manifest { - name = 'wasp2/nf-rnaseq' - author = 'WASP2 Team' - homePage = 'https://github.com/mcvickerlab/WASP2' - description = 'RNA-seq Allele-Specific Expression (ASE) pipeline with WASP2' - mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0.0' +// Container overrides +def wasp2_container = 'ghcr.io/mcvickerlab/wasp2:1.4.0' +def star_container = 'community.wave.seqera.io/library/htslib_samtools_star_gawk:ae438e9a604351a4' +process { + withName: 'WASP2_UNIFIED_MAKE_READS|WASP2_FILTER_REMAPPED|WASP2_COUNT_ALLELES|WASP2_ANALYZE_IMBALANCE|WASP2_ML_OUTPUT' { + container = wasp2_container + } + withName: 'STAR_ALIGN.*' { + container = star_container + } } -// Function to check max resource limits +// Capture exit codes from upstream processes when piping +process.shell = ['/bin/bash', '-euo', 'pipefail'] + +// Function to ensure resources don't exceed limits +// Resource capping is handled by process.resourceLimits in conf/base.config. +// This function is retained for backward compatibility with process label closures. def check_max(obj, type) { if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid!" + def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid memory config: ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid!" + def max = (params.max_time as nextflow.util.Duration) ?: 240.h + if (obj.compareTo(max) == 1) + return max + else return obj + } catch (Exception e) { + log.warn "Invalid time config: ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { - return Math.min(obj, params.max_cpus as int) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid!" + try { return Math.min(obj, (params.max_cpus ?: 16) as int) } + catch (Exception e) { + log.warn "Invalid CPU config: ${e.message}. Using ${obj}" return obj } } diff --git a/pipelines/nf-scatac/conf/test_local.config b/pipelines/nf-scatac/conf/test_local.config index e6bbcd9..a4f54cb 100644 --- a/pipelines/nf-scatac/conf/test_local.config +++ b/pipelines/nf-scatac/conf/test_local.config @@ -23,3 +23,12 @@ params { skip_anndata = false create_zarr = false } + +// Override base.config resourceLimits so local workstations don't OOM +process { + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 1.h + ] +} diff --git a/pipelines/nf-scatac/nextflow.config b/pipelines/nf-scatac/nextflow.config index 679d953..0a5352e 100644 --- a/pipelines/nf-scatac/nextflow.config +++ b/pipelines/nf-scatac/nextflow.config @@ -4,9 +4,16 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// Plugin configuration +plugins { + id 'nf-validation@1.1.3' +} + manifest { name = 'wasp2/nf-scatac' author = 'WASP2 Team' + homePage = 'https://github.com/mcvickerlab/WASP2' + doi = 'https://doi.org/10.1038/nmeth.3582' description = 'Single-Cell ATAC-seq Allelic Imbalance Pipeline' mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' @@ -36,11 +43,6 @@ params { // ML Output options output_format = null // ML output formats: zarr,parquet,anndata (comma-separated) - // Resource limits - max_cpus = 16 - max_memory = '128.GB' - max_time = '240.h' - // Institutional config support (nf-core compatible) custom_config_base = 'https://raw.githubusercontent.com/nf-core/configs/master' custom_config_version = 'master' @@ -55,11 +57,11 @@ params { includeConfig 'conf/base.config' includeConfig 'conf/modules.config' -// Load nf-core institutional configs +// Load nf-core custom profiles from https://github.com/nf-core/configs try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/configs: ${params.custom_config_base}") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}") } // Container version - override all WASP2/SCATAC processes to use 1.4.0 @@ -77,6 +79,11 @@ profiles { process.beforeScript = 'echo $HOSTNAME' cleanup = false } + conda { + conda.enabled = true + docker.enabled = false + singularity.enabled = false + } docker { docker.enabled = true conda.enabled = false @@ -92,12 +99,6 @@ profiles { conda.enabled = false docker.enabled = false } - conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - process.conda = "${projectDir}/../../environment.yml" - } test { includeConfig 'conf/test.config' } @@ -145,32 +146,43 @@ dag { file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } +// Export these variables to prevent local Python/Perl libs from conflicting +env { + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" +} + process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Resource limit checker with logging for configuration errors +// Function to ensure resources don't exceed limits +// Resource capping is handled by process.resourceLimits in conf/base.config. +// This function is retained for backward compatibility with process label closures. def check_max(obj, type) { if (type == 'memory') { try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit + def max = (params.max_memory as nextflow.util.MemoryUnit) ?: 128.GB + if (obj.compareTo(max) == 1) + return max else return obj } catch (Exception e) { - log.warn "Invalid memory config (${obj}, max=${params.max_memory}): ${e.message}. Using ${obj}" + log.warn "Invalid memory config: ${e.message}. Using ${obj}" return obj } } else if (type == 'time') { try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration + def max = (params.max_time as nextflow.util.Duration) ?: 240.h + if (obj.compareTo(max) == 1) + return max else return obj } catch (Exception e) { - log.warn "Invalid time config (${obj}, max=${params.max_time}): ${e.message}. Using ${obj}" + log.warn "Invalid time config: ${e.message}. Using ${obj}" return obj } } else if (type == 'cpus') { - try { return Math.min(obj, params.max_cpus as int) } + try { return Math.min(obj, (params.max_cpus ?: 16) as int) } catch (Exception e) { - log.warn "Invalid CPU config (${obj}, max=${params.max_cpus}): ${e.message}. Using ${obj}" + log.warn "Invalid CPU config: ${e.message}. Using ${obj}" return obj } }