From ccae713de8c4190169072a4c070930b46941bcd3 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 15 Dec 2024 18:31:46 -0600 Subject: [PATCH 1/4] build: nf-core modules install samtools/convert --- modules.json | 323 ++++++++++++------ .../nf-core/samtools/convert/environment.yml | 8 + modules/nf-core/samtools/convert/main.nf | 60 ++++ modules/nf-core/samtools/convert/meta.yml | 103 ++++++ .../samtools/convert/tests/main.nf.test | 107 ++++++ .../samtools/convert/tests/main.nf.test.snap | 131 +++++++ .../nf-core/samtools/convert/tests/tags.yml | 2 + 7 files changed, 636 insertions(+), 98 deletions(-) create mode 100644 modules/nf-core/samtools/convert/environment.yml create mode 100644 modules/nf-core/samtools/convert/main.nf create mode 100644 modules/nf-core/samtools/convert/meta.yml create mode 100644 modules/nf-core/samtools/convert/tests/main.nf.test create mode 100644 modules/nf-core/samtools/convert/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/convert/tests/tags.yml diff --git a/modules.json b/modules.json index 42021a66..b0f90fb8 100644 --- a/modules.json +++ b/modules.json @@ -7,257 +7,361 @@ "nf-core": { "bbmap/pileup": { "branch": "master", - "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "bedtools/bamtobed": { "branch": "master", - "git_sha": "1d1cb7bfef6cf67fbc7faafa6992ad8bdc3045b3", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "bedtools/intersect": { "branch": "master", - "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", - "installed_by": ["modules"] + "git_sha": "749d72f162cb1493dca66b454064ed201de2cd95", + "installed_by": [ + "modules" + ] }, "bedtools/merge": { "branch": "master", - "git_sha": "a5377837fe9013bde89de8689829e83e84086536", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "bedtools/sort": { "branch": "master", - "git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", - "git_sha": "e4bad511789f16d0df39ee306b2cd50418365048", - "installed_by": ["fastq_align_bowtie2"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "fastq_align_bowtie2" + ] }, "bowtie2/build": { "branch": "master", - "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "bwa/index": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/index/bwa-index.diff" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", + "installed_by": [ + "modules" + ] }, "bwa/mem": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", - "installed_by": ["fastq_align_bwa"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "fastq_align_bwa" + ] }, "bwamem2/index": { "branch": "master", - "git_sha": "7081e04c18de9480948d34513a1c1e2d0fa9126d", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwamem2/index/bwamem2-index.diff" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", + "installed_by": [ + "modules" + ] }, "bwamem2/mem": { "branch": "master", - "git_sha": "3afb95b2e15fc4a2347470255a7ef654f650c8ec", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", - "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "deeptools/bamcoverage": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "dragmap/align": { "branch": "master", - "git_sha": "dd2757cc22c5de8943fa38ba7cd6f8cc1eb65ac1", - "installed_by": ["modules"], - "patch": "modules/nf-core/dragmap/align/dragmap-align.diff" + "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9", + "installed_by": [ + "modules" + ] }, "dragmap/hashtable": { "branch": "master", - "git_sha": "ae9e01cb5e77faada314047e78423b22b4f5bbc5", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "fastp": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gffread": { "branch": "master", - "git_sha": "6c996d7fbe0816dcbb68ce587ad5f873313682a1", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "hisat2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["fastq_align_hisat2"] + "installed_by": [ + "fastq_align_hisat2" + ] }, "homer/findpeaks": { "branch": "master", "git_sha": "cac282aeb4099300e04b60167a3a12d8c96c4978", - "installed_by": ["homer_groseq"] + "installed_by": [ + "homer_groseq" + ] }, "homer/maketagdirectory": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": ["homer_groseq"] + "installed_by": [ + "homer_groseq" + ] }, "homer/makeucscfile": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": ["homer_groseq"] + "installed_by": [ + "homer_groseq" + ] }, "homer/pos2bed": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": ["homer_groseq"] + "installed_by": [ + "homer_groseq" + ] }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pints/caller": { "branch": "master", - "git_sha": "ac55541b663c35e9ff50f79d32049ce7492a6ea5", - "installed_by": ["modules"], - "patch": "modules/nf-core/pints/caller/pints-caller.diff" + "git_sha": "8d5737116be7d55f5642ea96cc6d9396389a1811", + "installed_by": [ + "modules" + ] }, "preseq/ccurve": { "branch": "master", "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "preseq/lcextrap": { "branch": "master", - "git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "rseqc/bamstat": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/inferexperiment": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/innerdistance": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/junctionannotation": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/junctionsaturation": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/readdistribution": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/readduplication": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] }, "rseqc/tin": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "bam_rseqc" + ] + }, + "samtools/convert": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": ["bam_stats_samtools"] + "installed_by": [ + "bam_stats_samtools" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": ["bam_stats_samtools"] + "installed_by": [ + "bam_stats_samtools" + ] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["bam_dedup_stats_samtools_umitools", "bam_sort_stats_samtools"] + "installed_by": [ + "bam_dedup_stats_samtools_umitools", + "bam_sort_stats_samtools" + ] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": ["bam_sort_stats_samtools"] + "installed_by": [ + "bam_sort_stats_samtools" + ] }, "samtools/stats": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": ["bam_stats_samtools"] + "installed_by": [ + "bam_stats_samtools" + ] }, "star/align": { "branch": "master", "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", - "installed_by": ["fastq_align_star", "modules"] + "installed_by": [ + "fastq_align_star" + ] }, "star/genomegenerate": { "branch": "master", - "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "subread/featurecounts": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": [ + "modules" + ] }, "umitools/dedup": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", - "installed_by": ["bam_dedup_stats_samtools_umitools"] + "installed_by": [ + "bam_dedup_stats_samtools_umitools" + ] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "unzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["homer_groseq"] + "installed_by": [ + "homer_groseq" + ] } } }, @@ -266,12 +370,16 @@ "bam_dedup_stats_samtools_umitools": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_rseqc": { "branch": "master", "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_sort_stats_samtools": { "branch": "master", @@ -286,47 +394,66 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", - "installed_by": ["bam_dedup_stats_samtools_umitools", "bam_sort_stats_samtools"] + "installed_by": [ + "bam_dedup_stats_samtools_umitools", + "bam_sort_stats_samtools" + ] }, "fastq_align_bowtie2": { "branch": "master", - "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", - "installed_by": ["subworkflows"] + "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", + "installed_by": [ + "subworkflows" + ] }, "fastq_align_bwa": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", - "installed_by": ["subworkflows"] + "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", + "installed_by": [ + "subworkflows" + ] }, "fastq_align_hisat2": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "fastq_align_star": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["subworkflows"] + "git_sha": "6a367612c2c27aa659293c656afa764361223db4", + "installed_by": [ + "subworkflows" + ] }, "homer_groseq": { "branch": "master", "git_sha": "cac282aeb4099300e04b60167a3a12d8c96c4978", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } diff --git a/modules/nf-core/samtools/convert/environment.yml b/modules/nf-core/samtools/convert/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/convert/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf new file mode 100644 index 00000000..cf9253d1 --- /dev/null +++ b/modules/nf-core/samtools/convert/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_CONVERT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.bai") , emit: bai , optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + + """ + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + def index_extension = output_extension == "bam" ? "bai" : "crai" + + """ + touch ${prefix}.${output_extension} + touch ${prefix}.${index_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml new file mode 100644 index 00000000..d5bfa161 --- /dev/null +++ b/modules/nf-core/samtools/convert/meta.yml @@ -0,0 +1,103 @@ +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file +keywords: + - view + - index + - bam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference index file to create the CRAM file + pattern: "*.{fai}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: filtered/converted BAM file + pattern: "*{.bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: filtered/converted CRAM file + pattern: "*{cram}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: filtered/converted BAM index + pattern: "*{.bai}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: filtered/converted CRAM index + pattern: "*{.crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test b/modules/nf-core/samtools/convert/tests/main.nf.test new file mode 100644 index 00000000..91a0c69e --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_process { + + name "Test Process SAMTOOLS_CONVERT" + script "../main.nf" + process "SAMTOOLS_CONVERT" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/convert" + + test("sarscov2 - [bam, bai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("bam_to_cram_alignment") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("bam_to_cram_index") }, + { assert snapshot(process.out.versions).match("bam_to_cram_versions") } + ) + } + } + + test("homo_sapiens - [cram, crai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_alignment") }, + { assert snapshot(file(process.out.bai[0][1]).name).match("cram_to_bam_alignment_index") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("sarscov2 - [bam, bai], fasta, fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } +} diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test.snap b/modules/nf-core/samtools/convert/tests/main.nf.test.snap new file mode 100644 index 00000000..a021254e --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "cram_to_bam_alignment": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.300147176" + }, + "bam_to_cram_alignment": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.625470184" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:35.516411351" + }, + "bam_to_cram_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:24.694454205" + }, + "stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ], + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:45.799885099" + }, + "bam_to_cram_index": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.640009334" + }, + "cram_to_bam_alignment_index": { + "content": [ + "test.bam.bai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.304477426" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/convert/tests/tags.yml b/modules/nf-core/samtools/convert/tests/tags.yml new file mode 100644 index 00000000..030d5eb5 --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/convert: + - "modules/nf-core/samtools/convert/**" From 6f60c17eb790f666a063665efa1a4add71a5212f Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 15 Dec 2024 18:32:08 -0600 Subject: [PATCH 2/4] feat: Publish CRAM - Add --bam flag https://nf-co.re/docs/guidelines/pipelines/recommendations/file_formats --- CHANGELOG.md | 8 + conf/modules.config | 36 ++- docs/usage.md | 25 ++ modules.json | 250 +++++------------- nextflow.config | 1 + nextflow_schema.json | 6 + workflows/nascent.nf | 13 +- workflows/tests/aligner/bwa.nf.test.snap | 74 ++++-- workflows/tests/aligner/star.nf.test.snap | 13 +- workflows/tests/bam_publishing.nf.test | 102 +++++++ workflows/tests/bam_publishing.nf.test.snap | 55 ++++ workflows/tests/cram_publishing.nf.test | 70 +++++ workflows/tests/cram_publishing.nf.test.snap | 37 +++ .../tests/inputs/uniqmap/main.nf.test.snap | 13 +- .../grohmm/tuning/main.nf.test.snap | 22 +- 15 files changed, 489 insertions(+), 236 deletions(-) create mode 100644 workflows/tests/bam_publishing.nf.test create mode 100644 workflows/tests/bam_publishing.nf.test.snap create mode 100644 workflows/tests/cram_publishing.nf.test create mode 100644 workflows/tests/cram_publishing.nf.test.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3646b7..63d7a8e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev - [date] +### Added + +- [#201](https://github.com/nf-core/nascent/pull/201) - Add CRAM output format support with `--bam` parameter to control output format + +### Changed + +- [#201](https://github.com/nf-core/nascent/pull/201) - Default output format changed from BAM to CRAM for space efficiency + ### Fixed TODO Remove bedtools bedgraph diff --git a/conf/modules.config b/conf/modules.config index 7c2e892f..58595124 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -185,11 +185,23 @@ process { ] } - withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.bam, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.ba*", + enabled: params.bam, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -243,6 +255,26 @@ process { } } + withName: SAMTOOLS_CONVERT { + ext.prefix = { "${meta.id}.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.{cram,crai}", + enabled: !params.bam + ] + } + + withName: SAMTOOLS_MERGE { + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.bam, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + ///////////////////// // Quality Control // ///////////////////// @@ -333,7 +365,7 @@ process { ext.prefix = { "${meta.id}.minus" } } - withName:DREG_PREP { + withName: DREG_PREP { ext.prefix = { "${meta.id}.dreg" } } diff --git a/docs/usage.md b/docs/usage.md index f43e47e7..e99666b7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -60,6 +60,31 @@ The sample column is essentially a concatenation of the group and replicate colu By default, the pipeline uses [BWA](https://bio-bwa.sourceforge.net/) (i.e. `--aligner bwa`) to map the raw FastQ reads to the reference genome. Research as to which aligner works best with Nascent Transcript and Transcription Start Site assays is pending. +## Output Format Options + +### CRAM vs BAM Files + +By default, the pipeline outputs alignment files in CRAM format to save storage space. CRAM files are typically 30-50% smaller than equivalent BAM files while maintaining full data integrity. + +**Default behavior (CRAM output):** + +```bash +nextflow run nf-core/nascent --input samplesheet.csv --outdir results +``` + +**To output BAM files instead:** + +```bash +nextflow run nf-core/nascent --input samplesheet.csv --outdir results --bam +``` + +**Key considerations:** + +- **CRAM files**: Space-efficient, require reference genome for some downstream tools +- **BAM files**: Larger file size, more widely compatible with downstream tools +- Internal processing always uses BAM format for compatibility +- Only final output format is affected by this parameter + ## Reference genome files The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--star_index '/path/to/BWA/index/'`) or via a custom config file. diff --git a/modules.json b/modules.json index b0f90fb8..54057116 100644 --- a/modules.json +++ b/modules.json @@ -8,360 +8,257 @@ "bbmap/pileup": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", "git_sha": "749d72f162cb1493dca66b454064ed201de2cd95", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/sort": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bowtie2/align": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "fastq_align_bowtie2" - ] + "installed_by": ["fastq_align_bowtie2"] }, "bowtie2/build": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/index": { "branch": "master", "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "fastq_align_bwa" - ] + "installed_by": ["fastq_align_bwa"] }, "bwamem2/index": { "branch": "master", "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "deeptools/bamcoverage": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "dragmap/align": { "branch": "master", "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "dragmap/hashtable": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastp": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gffread": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "hisat2/align": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "fastq_align_hisat2" - ] + "installed_by": ["fastq_align_hisat2"] }, "homer/findpeaks": { "branch": "master", "git_sha": "cac282aeb4099300e04b60167a3a12d8c96c4978", - "installed_by": [ - "homer_groseq" - ] + "installed_by": ["homer_groseq"] }, "homer/maketagdirectory": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": [ - "homer_groseq" - ] + "installed_by": ["homer_groseq"] }, "homer/makeucscfile": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": [ - "homer_groseq" - ] + "installed_by": ["homer_groseq"] }, "homer/pos2bed": { "branch": "master", "git_sha": "b108745a1d59778e21577c217a963e17b8ba9bc2", - "installed_by": [ - "homer_groseq" - ] + "installed_by": ["homer_groseq"] }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pints/caller": { "branch": "master", "git_sha": "8d5737116be7d55f5642ea96cc6d9396389a1811", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "preseq/ccurve": { "branch": "master", "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "preseq/lcextrap": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rseqc/bamstat": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/inferexperiment": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/innerdistance": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/junctionannotation": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/junctionsaturation": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/readdistribution": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/readduplication": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "rseqc/tin": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "bam_rseqc" - ] + "installed_by": ["bam_rseqc"] }, "samtools/convert": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": [ - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools"] }, "samtools/idxstats": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": [ - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools"] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "bam_dedup_stats_samtools_umitools", - "bam_sort_stats_samtools" - ] + "installed_by": ["bam_dedup_stats_samtools_umitools", "bam_sort_stats_samtools"] }, "samtools/merge": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", - "installed_by": [ - "bam_sort_stats_samtools" - ] + "installed_by": ["bam_sort_stats_samtools"] }, "samtools/stats": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", - "installed_by": [ - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools"] }, "star/align": { "branch": "master", "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", - "installed_by": [ - "fastq_align_star" - ] + "installed_by": ["fastq_align_star"] }, "star/genomegenerate": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "subread/featurecounts": { "branch": "master", "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "umitools/dedup": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", - "installed_by": [ - "bam_dedup_stats_samtools_umitools" - ] + "installed_by": ["bam_dedup_stats_samtools_umitools"] }, "untar": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "unzip": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "homer_groseq" - ] + "installed_by": ["homer_groseq"] } } }, @@ -370,16 +267,12 @@ "bam_dedup_stats_samtools_umitools": { "branch": "master", "git_sha": "0b27602842d3d79fd0e8db79f4afa764967fc3d1", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "bam_rseqc": { "branch": "master", "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "bam_sort_stats_samtools": { "branch": "master", @@ -394,66 +287,47 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", - "installed_by": [ - "bam_dedup_stats_samtools_umitools", - "bam_sort_stats_samtools" - ] + "installed_by": ["bam_dedup_stats_samtools_umitools", "bam_sort_stats_samtools"] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_bwa": { "branch": "master", "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_hisat2": { "branch": "master", "git_sha": "763d4b5c05ffda3ac1ac969dc67f7458cfb2eb1d", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "fastq_align_star": { "branch": "master", "git_sha": "6a367612c2c27aa659293c656afa764361223db4", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "homer_groseq": { "branch": "master", "git_sha": "cac282aeb4099300e04b60167a3a12d8c96c4978", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } diff --git a/nextflow.config b/nextflow.config index affc0fed..29d1c8e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,7 @@ params { // Alignment aligner = 'bwa' skip_alignment = false + bam = false // Transcript identification method assay_type = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d2b1e488..5cb47c04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -66,6 +66,12 @@ "description": "Skip the adapter trimming step.", "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", "fa_icon": "fas fa-fast-forward" + }, + "bam": { + "type": "boolean", + "fa_icon": "fas fa-file-archive", + "description": "Output BAM alignment files instead of CRAM.", + "help_text": "By default, the pipeline outputs CRAM alignment files which are smaller and more efficient. Use this parameter if you need BAM files for downstream analysis that doesn't support CRAM format." } } }, diff --git a/workflows/nascent.nf b/workflows/nascent.nf index f377ee8b..41089df5 100644 --- a/workflows/nascent.nf +++ b/workflows/nascent.nf @@ -19,7 +19,7 @@ include { UNTAR as UNTAR_STAR_INDEX } from '../mo include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_GENE } from '../modules/nf-core/subread/featurecounts/main' include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_PREDICTED } from '../modules/nf-core/subread/featurecounts/main' - +include { SAMTOOLS_CONVERT } from '../modules/nf-core/samtools/convert/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -256,6 +256,17 @@ workflow NASCENT { ch_genome_bam_bai = ch_genome_bam.join(ch_genome_bai, by: [0], remainder: true) + // Publish CRAM files by default, BAM files if --bam is specified + // https://nf-co.re/docs/guidelines/pipelines/recommendations/file_formats + if (!params.bam) { + SAMTOOLS_CONVERT( + ch_genome_bam_bai, + ch_fasta, + PREPARE_GENOME.out.fai.map { [[:], it] }, + ) + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + } + QUALITY_CONTROL( ch_genome_bam_bai, PREPARE_GENOME.out.gene_bed diff --git a/workflows/tests/aligner/bwa.nf.test.snap b/workflows/tests/aligner/bwa.nf.test.snap index 446d1da3..8b56a5c8 100644 --- a/workflows/tests/aligner/bwa.nf.test.snap +++ b/workflows/tests/aligner/bwa.nf.test.snap @@ -1,27 +1,27 @@ { "Should work with gzipped references": { "content": [ - 52 + 55 ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-05-11T15:15:07.710999" + "timestamp": "2025-06-29T19:45:51.976463" }, "Should work with BWA Index": { "content": [ - 56 + 59 ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-05-11T15:14:14.361165" + "timestamp": "2025-06-29T19:44:37.964822" }, "Should run with defaults": { "content": [ - 137, + 143, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -95,6 +95,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -110,18 +113,18 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/cd4_REP3.sorted.bam", - "bwa/cd4_REP3.sorted.bam.bai", - "bwa/cd4_REP4.sorted.bam", - "bwa/cd4_REP4.sorted.bam.bai", - "bwa/jurkat_REP1.sorted.bam", - "bwa/jurkat_REP1.sorted.bam.bai", - "bwa/jurkat_REP2.sorted.bam", - "bwa/jurkat_REP2.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/cd4_REP3.cram", + "bwa/cd4_REP3.cram.crai", + "bwa/cd4_REP4.cram", + "bwa/cd4_REP4.cram.crai", + "bwa/jurkat_REP1.cram", + "bwa/jurkat_REP1.cram.crai", + "bwa/jurkat_REP2.cram", + "bwa/jurkat_REP2.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -323,9 +326,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -387,7 +387,7 @@ "jurkat_REP2.sorted.bam.flagstat:md5,86ed47bd41a745ab59de473082c7742d", "jurkat_REP2.sorted.bam.idxstats:md5,3db8f88c1f836eb5d10f37ca1df81ae9", "jurkat_REP2.sorted.bam.stats:md5,141032fd40420e3aa80d37c7bf7ed2b1", - "cd4.bed:md5,864532d867843d8ad1545b90d5dcd762", + "cd4.bed:md5,76ee3b56d3e518f88a34b42039ec719c", "jurkat.bed:md5,862a5e81119acc691845f3b426847401", "cd4_REP1.minus.bigWig:md5,5280319275c98dcce023779fa389884d", "cd4_REP1.plus.bigWig:md5,72ccab3173f2018a22a4b36841247ba2", @@ -414,14 +414,40 @@ "versions.yml:md5,7c0dcd0a18b3c753def73b96cb825792", "cd4_merged.bed:md5,4d8f9dc54f886f379b95609908a08662", "jurkat_merged.bed:md5,cae11a1bfb707ea2df5fe612ae7268c8", - "cd4_chr21_1_unidirectional_peaks.bed:md5,864532d867843d8ad1545b90d5dcd762", + "cd4_chr21_1_unidirectional_peaks.bed:md5,76ee3b56d3e518f88a34b42039ec719c", "jurkat_chr21_1_unidirectional_peaks.bed:md5,862a5e81119acc691845f3b426847401" + ], + [ + [ + "cd4_REP1.cram", + "6d0e2703ce223d704904349160c348d7" + ], + [ + "cd4_REP2.cram", + "8fcf09eb7bd84405b7c3b5b30e517ad9" + ], + [ + "cd4_REP3.cram", + "8c61a09cc0baefcb3fe7d9c960862979" + ], + [ + "cd4_REP4.cram", + "f3ed6ed58e1cdf617762102702bd93ae" + ], + [ + "jurkat_REP1.cram", + "b6d4bf9f7335daee4614d0dc89525e5e" + ], + [ + "jurkat_REP2.cram", + "db611c9dc35283581edfa6f2d3854cf" + ] ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-21T04:07:46.607615194" + "timestamp": "2025-06-29T19:43:06.80498" } } \ No newline at end of file diff --git a/workflows/tests/aligner/star.nf.test.snap b/workflows/tests/aligner/star.nf.test.snap index de9b0b04..ce117380 100644 --- a/workflows/tests/aligner/star.nf.test.snap +++ b/workflows/tests/aligner/star.nf.test.snap @@ -1,13 +1,13 @@ { "Should run with gzipped gtf": { "content": [ - 87 + 90 ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:05:06.864437" + "timestamp": "2025-06-29T19:53:50.826998" }, "Should run with defaults": { "content": [ @@ -433,6 +433,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -449,8 +452,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:05:05.079173" + "timestamp": "2025-06-29T19:53:50.72646" } } \ No newline at end of file diff --git a/workflows/tests/bam_publishing.nf.test b/workflows/tests/bam_publishing.nf.test new file mode 100644 index 00000000..f179bf94 --- /dev/null +++ b/workflows/tests/bam_publishing.nf.test @@ -0,0 +1,102 @@ +nextflow_pipeline { + + name "BAM Publishing with --bam parameter" + script "../../main.nf" + tag "bam" + tag "publishing" + + test("Should publish BAM files when --bam parameter is used") { + + when { + params { + outdir = "$outputDir" + aligner = "bowtie2" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get BAM index files + def bam_index_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam.bai']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files" }, + { assert bam_index_files.size() > 0 : "No BAM index files found" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() }, + // BAM index files + bam_index_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should publish BAM files with different aligner when --bam is used") { + + when { + params { + outdir = "$outputDir" + aligner = "bwa" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should handle --bam parameter with STAR aligner") { + + when { + params { + outdir = "$outputDir" + aligner = "star" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used with STAR" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files with STAR" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() } + ).match() } + ) + } + } +} diff --git a/workflows/tests/bam_publishing.nf.test.snap b/workflows/tests/bam_publishing.nf.test.snap new file mode 100644 index 00000000..11bc2574 --- /dev/null +++ b/workflows/tests/bam_publishing.nf.test.snap @@ -0,0 +1,55 @@ +{ + "Should handle --bam parameter with STAR aligner": { + "content": [ + 86, + [ + "cd4.bam", + "jurkat.bam" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:35:22.265415" + }, + "Should publish BAM files when --bam parameter is used": { + "content": [ + 71, + [ + "cd4_REP1.sorted.bam", + "cd4_REP2.sorted.bam", + "jurkat.sorted.bam", + "cd4.bam", + "jurkat.bam" + ], + [ + "cd4_REP1.sorted.bam.bai", + "cd4_REP2.sorted.bam.bai", + "jurkat.sorted.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-29T14:47:13.244785" + }, + "Should publish BAM files with different aligner when --bam is used": { + "content": [ + 86, + [ + "cd4_REP1.sorted.bam", + "cd4_REP2.sorted.bam", + "jurkat.sorted.bam", + "cd4.bam", + "jurkat.bam" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:31:10.551067" + } +} \ No newline at end of file diff --git a/workflows/tests/cram_publishing.nf.test b/workflows/tests/cram_publishing.nf.test new file mode 100644 index 00000000..95bc3c62 --- /dev/null +++ b/workflows/tests/cram_publishing.nf.test @@ -0,0 +1,70 @@ +nextflow_pipeline { + + name "CRAM Publishing Default" + script "../../main.nf" + tag "cram" + tag "publishing" + + test("Should publish CRAM files by default") { + + when { + params { + outdir = "$outputDir" + aligner = "bowtie2" + } + } + + then { + // Get all CRAM files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get all BAM files (should be none with default settings) + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get CRAM index files + def cram_index_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram.crai']) + + assertAll( + { assert workflow.success }, + { assert cram_files.size() > 0 : "No CRAM files found in output" }, + { assert bam_files.size() == 0 : "BAM files found when CRAM should be default" }, + { assert cram_index_files.size() > 0 : "No CRAM index files found" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // CRAM files (names only, content validation removed due to reference mismatch) + cram_files.collect{ file -> file.getName() }, + // CRAM index files + cram_index_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should publish CRAM files with different aligner") { + + when { + params { + outdir = "$outputDir" + aligner = "bwa" + } + } + + then { + // Get all CRAM files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get all BAM files (should be none with default settings) + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + + assertAll( + { assert workflow.success }, + { assert cram_files.size() > 0 : "No CRAM files found in output" }, + { assert bam_files.size() == 0 : "BAM files found when CRAM should be default" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // CRAM files (names only, content validation removed due to reference mismatch) + cram_files.collect{ file -> file.getName() } + ).match() } + ) + } + } +} diff --git a/workflows/tests/cram_publishing.nf.test.snap b/workflows/tests/cram_publishing.nf.test.snap new file mode 100644 index 00000000..1ced2a8c --- /dev/null +++ b/workflows/tests/cram_publishing.nf.test.snap @@ -0,0 +1,37 @@ +{ + "Should publish CRAM files by default": { + "content": [ + 74, + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" + ], + [ + "cd4_REP1.cram.crai", + "cd4_REP2.cram.crai", + "jurkat.cram.crai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:06:40.237907" + }, + "Should publish CRAM files with different aligner": { + "content": [ + 89, + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:08:55.886843" + } +} \ No newline at end of file diff --git a/workflows/tests/inputs/uniqmap/main.nf.test.snap b/workflows/tests/inputs/uniqmap/main.nf.test.snap index fc5bce3c..4fc67673 100644 --- a/workflows/tests/inputs/uniqmap/main.nf.test.snap +++ b/workflows/tests/inputs/uniqmap/main.nf.test.snap @@ -1,7 +1,7 @@ { "output_files": { "content": [ - 72, + 75, "# uniqMapDirectory = uniqmap.GRCh38_chr21.50nt", "# cmd = findPeaks cd4_tagdir -style groseq -o cd4-uniqmap.GRCh38_chr21.peaks.txt -uniqmap uniqmap.GRCh38_chr21.50nt", "# uniqMapDirectory = uniqmap.GRCh38_chr21.50nt", @@ -22,9 +22,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:12:52.684402" + "timestamp": "2025-06-29T20:29:53.268154" }, "software_versions": { "content": [ @@ -84,6 +84,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -100,8 +103,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T21:41:42.367523" + "timestamp": "2025-06-29T20:29:53.145336" } } \ No newline at end of file diff --git a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap index a2c2bb2b..5dee1387 100644 --- a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap +++ b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 106, + 109, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -79,6 +79,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -94,12 +97,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -242,9 +245,6 @@ "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -346,6 +346,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-04-24T13:00:22.623091" + "timestamp": "2025-06-29T20:52:06.293766" } } \ No newline at end of file From 784e66f94d0d34871e97ce40be1f048e60f068b6 Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Sun, 15 Dec 2024 19:57:24 -0600 Subject: [PATCH 3/4] test: bam => cram bump snaps --- conf/modules.config | 1 - tests/.nftignore | 2 +- workflows/tests/aligner/bowtie2.nf.test | 9 +++-- workflows/tests/aligner/bowtie2.nf.test.snap | 27 ++++++++----- workflows/tests/aligner/bwa.nf.test | 9 +++-- workflows/tests/aligner/bwa.nf.test.snap | 40 +++++-------------- workflows/tests/aligner/bwamem2.nf.test | 9 +++-- workflows/tests/aligner/bwamem2.nf.test.snap | 29 ++++++++------ workflows/tests/aligner/hisat2.nf.test | 9 +++-- workflows/tests/aligner/hisat2.nf.test.snap | 29 ++++++++------ workflows/tests/aligner/star.nf.test | 9 +++-- workflows/tests/aligner/star.nf.test.snap | 23 ++++++++--- workflows/tests/inputs/gff/main.nf.test | 10 +++-- workflows/tests/inputs/gff/main.nf.test.snap | 29 ++++++++------ .../tests/inputs/gzipped_gff/main.nf.test | 10 +++-- .../inputs/gzipped_gff/main.nf.test.snap | 27 ++++++++----- workflows/tests/inputs/only_gff/main.nf.test | 10 +++-- .../tests/inputs/only_gff/main.nf.test.snap | 27 ++++++++----- .../grohmm/only_gff/main.nf.test | 9 +++-- .../grohmm/only_gff/main.nf.test.snap | 25 ++++++------ .../grohmm/tuning/main.nf.test | 6 +-- 21 files changed, 192 insertions(+), 157 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 58595124..bc890a56 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -256,7 +256,6 @@ process { } withName: SAMTOOLS_CONVERT { - ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, diff --git a/tests/.nftignore b/tests/.nftignore index 60cbdcea..54ba86a7 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -23,7 +23,7 @@ hisat2/log/*.hisat2.summary.log **/*.command.log **/*.bedGraph.gz **/tagInfo.txt -**/*.{bam,bai} +**/*.{cram,crai} **/*.featureCounts.txt **/*.featureCounts.txt.summary **/*.pdf diff --git a/workflows/tests/aligner/bowtie2.nf.test b/workflows/tests/aligner/bowtie2.nf.test index 4f91e553..154dec64 100644 --- a/workflows/tests/aligner/bowtie2.nf.test +++ b/workflows/tests/aligner/bowtie2.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +32,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bowtie2.nf.test.snap b/workflows/tests/aligner/bowtie2.nf.test.snap index b2f1b071..20c2f906 100644 --- a/workflows/tests/aligner/bowtie2.nf.test.snap +++ b/workflows/tests/aligner/bowtie2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 71, + 74, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -63,6 +63,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bowtie2", - "bowtie2/cd4_REP1.sorted.bam", - "bowtie2/cd4_REP1.sorted.bam.bai", - "bowtie2/cd4_REP2.sorted.bam", - "bowtie2/cd4_REP2.sorted.bam.bai", - "bowtie2/jurkat.sorted.bam", - "bowtie2/jurkat.sorted.bam.bai", + "bowtie2/cd4_REP1.cram", + "bowtie2/cd4_REP1.cram.crai", + "bowtie2/cd4_REP2.cram", + "bowtie2/cd4_REP2.cram.crai", + "bowtie2/jurkat.cram", + "bowtie2/jurkat.cram.crai", "bowtie2/log", "bowtie2/log/cd4_REP1.bowtie2.log", "bowtie2/log/cd4_REP2.bowtie2.log", @@ -230,9 +233,6 @@ "quantification/gene/cd4.featureCounts.txt.summary", "quantification/gene/jurkat.featureCounts.txt", "quantification/gene/jurkat.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -296,12 +296,17 @@ "jurkat.bed:md5,5e170e72c4e2b27a7bb0a6de7b735c1c", "jurkat.peaks.txt:md5,100cb761b6b7abad3901775e499a6aa1", "versions.yml:md5,7c0dcd0a18b3c753def73b96cb825792" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T21:52:41.120419" + "timestamp": "2025-07-06T17:57:23.93854" } } \ No newline at end of file diff --git a/workflows/tests/aligner/bwa.nf.test b/workflows/tests/aligner/bwa.nf.test index ed8e201a..06d2a004 100644 --- a/workflows/tests/aligner/bwa.nf.test +++ b/workflows/tests/aligner/bwa.nf.test @@ -20,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -33,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getHeaderMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bwa.nf.test.snap b/workflows/tests/aligner/bwa.nf.test.snap index 8b56a5c8..605b4c48 100644 --- a/workflows/tests/aligner/bwa.nf.test.snap +++ b/workflows/tests/aligner/bwa.nf.test.snap @@ -387,7 +387,7 @@ "jurkat_REP2.sorted.bam.flagstat:md5,86ed47bd41a745ab59de473082c7742d", "jurkat_REP2.sorted.bam.idxstats:md5,3db8f88c1f836eb5d10f37ca1df81ae9", "jurkat_REP2.sorted.bam.stats:md5,141032fd40420e3aa80d37c7bf7ed2b1", - "cd4.bed:md5,76ee3b56d3e518f88a34b42039ec719c", + "cd4.bed:md5,864532d867843d8ad1545b90d5dcd762", "jurkat.bed:md5,862a5e81119acc691845f3b426847401", "cd4_REP1.minus.bigWig:md5,5280319275c98dcce023779fa389884d", "cd4_REP1.plus.bigWig:md5,72ccab3173f2018a22a4b36841247ba2", @@ -414,40 +414,22 @@ "versions.yml:md5,7c0dcd0a18b3c753def73b96cb825792", "cd4_merged.bed:md5,4d8f9dc54f886f379b95609908a08662", "jurkat_merged.bed:md5,cae11a1bfb707ea2df5fe612ae7268c8", - "cd4_chr21_1_unidirectional_peaks.bed:md5,76ee3b56d3e518f88a34b42039ec719c", + "cd4_chr21_1_unidirectional_peaks.bed:md5,864532d867843d8ad1545b90d5dcd762", "jurkat_chr21_1_unidirectional_peaks.bed:md5,862a5e81119acc691845f3b426847401" ], [ - [ - "cd4_REP1.cram", - "6d0e2703ce223d704904349160c348d7" - ], - [ - "cd4_REP2.cram", - "8fcf09eb7bd84405b7c3b5b30e517ad9" - ], - [ - "cd4_REP3.cram", - "8c61a09cc0baefcb3fe7d9c960862979" - ], - [ - "cd4_REP4.cram", - "f3ed6ed58e1cdf617762102702bd93ae" - ], - [ - "jurkat_REP1.cram", - "b6d4bf9f7335daee4614d0dc89525e5e" - ], - [ - "jurkat_REP2.cram", - "db611c9dc35283581edfa6f2d3854cf" - ] + "cd4_REP1.cram", + "cd4_REP2.cram", + "cd4_REP3.cram", + "cd4_REP4.cram", + "jurkat_REP1.cram", + "jurkat_REP2.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.6" }, - "timestamp": "2025-06-29T19:43:06.80498" + "timestamp": "2025-07-06T23:42:11.465363652" } -} \ No newline at end of file +} diff --git a/workflows/tests/aligner/bwamem2.nf.test b/workflows/tests/aligner/bwamem2.nf.test index 2bebf8ff..e529d294 100644 --- a/workflows/tests/aligner/bwamem2.nf.test +++ b/workflows/tests/aligner/bwamem2.nf.test @@ -17,8 +17,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -30,8 +30,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bwamem2.nf.test.snap b/workflows/tests/aligner/bwamem2.nf.test.snap index 8799e53e..c98777e6 100644 --- a/workflows/tests/aligner/bwamem2.nf.test.snap +++ b/workflows/tests/aligner/bwamem2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -105,12 +108,12 @@ }, [ "bwamem2", - "bwamem2/cd4_REP1.sorted.bam", - "bwamem2/cd4_REP1.sorted.bam.bai", - "bwamem2/cd4_REP2.sorted.bam", - "bwamem2/cd4_REP2.sorted.bam.bai", - "bwamem2/jurkat.sorted.bam", - "bwamem2/jurkat.sorted.bam.bai", + "bwamem2/cd4_REP1.cram", + "bwamem2/cd4_REP1.cram.crai", + "bwamem2/cd4_REP2.cram", + "bwamem2/cd4_REP2.cram.crai", + "bwamem2/jurkat.cram", + "bwamem2/jurkat.cram.crai", "bwamem2/samtools_stats", "bwamem2/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwamem2/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -249,9 +252,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -324,12 +324,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-11T12:54:16.867564" + "timestamp": "2025-07-06T23:47:48.402842458" } } \ No newline at end of file diff --git a/workflows/tests/aligner/hisat2.nf.test b/workflows/tests/aligner/hisat2.nf.test index 8deb6f2b..b41d4640 100644 --- a/workflows/tests/aligner/hisat2.nf.test +++ b/workflows/tests/aligner/hisat2.nf.test @@ -20,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -33,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/hisat2.nf.test.snap b/workflows/tests/aligner/hisat2.nf.test.snap index cf17c8dd..81be575e 100644 --- a/workflows/tests/aligner/hisat2.nf.test.snap +++ b/workflows/tests/aligner/hisat2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 83, + 86, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -72,6 +72,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -112,12 +115,12 @@ "coverage_graphs/jurkat.minus.bigWig", "coverage_graphs/jurkat.plus.bigWig", "hisat2", - "hisat2/cd4_REP1.sorted.bam", - "hisat2/cd4_REP1.sorted.bam.bai", - "hisat2/cd4_REP2.sorted.bam", - "hisat2/cd4_REP2.sorted.bam.bai", - "hisat2/jurkat.sorted.bam", - "hisat2/jurkat.sorted.bam.bai", + "hisat2/cd4_REP1.cram", + "hisat2/cd4_REP1.cram.crai", + "hisat2/cd4_REP2.cram", + "hisat2/cd4_REP2.cram.crai", + "hisat2/jurkat.cram", + "hisat2/jurkat.cram.crai", "hisat2/log", "hisat2/log/cd4_REP1.hisat2.summary.log", "hisat2/log/cd4_REP2.hisat2.summary.log", @@ -245,9 +248,6 @@ "quantification/gene/cd4.featureCounts.txt.summary", "quantification/gene/jurkat.featureCounts.txt", "quantification/gene/jurkat.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -338,12 +338,17 @@ "GRCh38_chr21.6.ht2:md5,242e36d01cd1719b6bd05f157c644eed", "GRCh38_chr21.7.ht2:md5,24e7d0673a77e07fbe40400f9a6b3db6", "GRCh38_chr21.8.ht2:md5,5e0626bdb7f7a267990f72ae45c3e44a" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-21T04:25:30.229280071" + "timestamp": "2025-07-06T23:52:35.006090205" } } \ No newline at end of file diff --git a/workflows/tests/aligner/star.nf.test b/workflows/tests/aligner/star.nf.test index 1fbcd996..b521aa96 100644 --- a/workflows/tests/aligner/star.nf.test +++ b/workflows/tests/aligner/star.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +32,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/star.nf.test.snap b/workflows/tests/aligner/star.nf.test.snap index ce117380..7507b9a0 100644 --- a/workflows/tests/aligner/star.nf.test.snap +++ b/workflows/tests/aligner/star.nf.test.snap @@ -11,7 +11,7 @@ }, "Should run with defaults": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -78,6 +78,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -220,10 +223,13 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "star", + "star/cd4_REP1.cram", + "star/cd4_REP1.cram.crai", + "star/cd4_REP2.cram", + "star/cd4_REP2.cram.crai", + "star/jurkat.cram", + "star/jurkat.cram.crai", "star/log", "star/log/cd4_REP1.Log.final.out", "star/log/cd4_REP1.Log.out", @@ -354,13 +360,18 @@ "jurkat_merged.bed:md5,95a5279ec2387dfa0b4c2e7820083527", "cd4_chr21_1_unidirectional_peaks.bed:md5,26765aa153cb1d6bb668f5786da5763e", "jurkat_chr21_1_unidirectional_peaks.bed:md5,15a3bec7a3ffb53c1e621665b3f45873" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-20T22:02:00.825316" + "timestamp": "2025-07-06T23:56:14.617739031" }, "gzip_software_versions": { "content": [ diff --git a/workflows/tests/inputs/gff/main.nf.test b/workflows/tests/inputs/gff/main.nf.test index 258a30bf..52b01cd9 100644 --- a/workflows/tests/inputs/gff/main.nf.test +++ b/workflows/tests/inputs/gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' } } @@ -19,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/gff/main.nf.test.snap b/workflows/tests/inputs/gff/main.nf.test.snap index fee78d85..b2c67297 100644 --- a/workflows/tests/inputs/gff/main.nf.test.snap +++ b/workflows/tests/inputs/gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with an unzipped GFF file": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -234,9 +237,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -309,12 +309,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.03.1" }, - "timestamp": "2025-05-11T19:48:55.653909" + "timestamp": "2025-07-06T21:31:16.708314" } } \ No newline at end of file diff --git a/workflows/tests/inputs/gzipped_gff/main.nf.test b/workflows/tests/inputs/gzipped_gff/main.nf.test index 33bc949f..7319402c 100644 --- a/workflows/tests/inputs/gzipped_gff/main.nf.test +++ b/workflows/tests/inputs/gzipped_gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff.gz' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' } } @@ -19,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/gzipped_gff/main.nf.test.snap b/workflows/tests/inputs/gzipped_gff/main.nf.test.snap index 9f1ef491..a1443cfc 100644 --- a/workflows/tests/inputs/gzipped_gff/main.nf.test.snap +++ b/workflows/tests/inputs/gzipped_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with a gzipped GFF file": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -234,9 +237,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -309,12 +309,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T21:38:08.846055" + "timestamp": "2025-07-06T21:27:10.363164" } } \ No newline at end of file diff --git a/workflows/tests/inputs/only_gff/main.nf.test b/workflows/tests/inputs/only_gff/main.nf.test index b9fd1389..c05a9154 100644 --- a/workflows/tests/inputs/only_gff/main.nf.test +++ b/workflows/tests/inputs/only_gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' gtf = null bed = null } @@ -21,8 +22,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -34,8 +35,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/only_gff/main.nf.test.snap b/workflows/tests/inputs/only_gff/main.nf.test.snap index 198d7127..de608638 100644 --- a/workflows/tests/inputs/only_gff/main.nf.test.snap +++ b/workflows/tests/inputs/only_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with only a GFF file": { "content": [ - 87, + 90, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -78,6 +78,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -93,12 +96,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -237,9 +240,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -312,12 +312,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T22:11:14.562542" + "timestamp": "2025-07-06T21:29:11.669792" } } \ No newline at end of file diff --git a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test index 638a7ba0..4639de63 100644 --- a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test +++ b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test @@ -25,8 +25,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -38,8 +38,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap index b52aae88..2039c9e0 100644 --- a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap +++ b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run groHMM with only a GFF file": { "content": [ - 106, + 109, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -88,6 +88,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -103,12 +106,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -251,9 +254,6 @@ "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -349,7 +349,8 @@ "jurkat_merged.bed:md5,a94e1ec47f0564122ae1eb0f73be4ebd", "cd4_chr21_1_unidirectional_peaks.bed:md5,60e0da7e5691e55d86eb9df9f6ea0c46", "jurkat_chr21_1_unidirectional_peaks.bed:md5,3584ff1a08cdecc92b6fcf6b2db8dc90" - ] + ], + ["cd4_REP1.cram", "cd4_REP2.cram", "jurkat.cram"] ], "meta": { "nf-test": "0.9.2", @@ -357,4 +358,4 @@ }, "timestamp": "2025-05-11T20:01:27.91741" } -} \ No newline at end of file +} diff --git a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test index 847b907c..ea9b2858 100644 --- a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test +++ b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test @@ -17,8 +17,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -30,7 +30,7 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files + // All cram files // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } ).match() } ) From fabe14783af979ca0ee4f11651db383adbdb2acf Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Mon, 23 Dec 2024 17:58:57 -0600 Subject: [PATCH 4/4] style: Run lsp format --- workflows/nascent.nf | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/workflows/nascent.nf b/workflows/nascent.nf index 41089df5..1348730e 100644 --- a/workflows/nascent.nf +++ b/workflows/nascent.nf @@ -81,7 +81,7 @@ workflow NASCENT { ch_bwamem2_index, ch_dragmap, ch_bowtie2_index, - ch_hisat2_index + ch_hisat2_index, ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_fasta = PREPARE_GENOME.out.fasta.map { fasta -> [[id: fasta.baseName], fasta] } @@ -122,7 +122,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam ch_genome_bai = FASTQ_ALIGN_BWA.out.bai @@ -137,7 +137,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta + ch_fasta, ) ch_genome_bam = ALIGN_BWAMEM2.out.bam ch_genome_bai = ALIGN_BWAMEM2.out.bai @@ -152,7 +152,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.dragmap, false, - ch_fasta + ch_fasta, ) ch_genome_bam = ALIGN_DRAGMAP.out.bam ch_genome_bai = ALIGN_DRAGMAP.out.bai @@ -168,7 +168,7 @@ workflow NASCENT { PREPARE_GENOME.out.bowtie2_index, false, false, - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam ch_genome_bai = FASTQ_ALIGN_BOWTIE2.out.bai @@ -193,7 +193,7 @@ workflow NASCENT { ch_reads, ch_hisat2_index, [[:], []], - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam ch_genome_bai = FASTQ_ALIGN_HISAT2.out.bai @@ -208,7 +208,7 @@ workflow NASCENT { if (!ch_star_index) { ch_star_index = STAR_GENOMEGENERATE( ch_fasta, - PREPARE_GENOME.out.gtf.map { [[:], it] } + PREPARE_GENOME.out.gtf.map { [[:], it] }, ).index } else if (ch_star_index.endsWith('.tar.gz')) { @@ -228,7 +228,7 @@ workflow NASCENT { '', '', ch_fasta, - Channel.of([[:], []]) + Channel.of([[:], []]), ) ch_genome_bam = FASTQ_ALIGN_STAR.out.bam ch_genome_bai = FASTQ_ALIGN_STAR.out.bai @@ -243,7 +243,7 @@ workflow NASCENT { if (params.with_umi) { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS( ch_genome_bam.join(ch_genome_bai, by: [0]), - params.umitools_dedup_stats + params.umitools_dedup_stats, ) ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam ch_genome_bai = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bai @@ -269,7 +269,7 @@ workflow NASCENT { QUALITY_CONTROL( ch_genome_bam_bai, - PREPARE_GENOME.out.gene_bed + PREPARE_GENOME.out.gene_bed, ) ch_versions = ch_versions.mix(QUALITY_CONTROL.out.versions) @@ -277,7 +277,7 @@ workflow NASCENT { ch_genome_bam_bai, PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai + PREPARE_GENOME.out.fai, ) ch_versions = ch_versions.mix(COVERAGE_GRAPHS.out.versions) @@ -315,7 +315,7 @@ workflow NASCENT { ch_gxf, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.chrom_sizes, - ch_uniqmap + ch_uniqmap, ) ch_grohmm_multiqc = TRANSCRIPT_INDENTIFICATION.out.grohmm_td_plot.collect() ch_homer_multiqc = TRANSCRIPT_INDENTIFICATION.out.homer_peaks @@ -344,7 +344,7 @@ workflow NASCENT { storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'nascent_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true + newLine: true, ) .set { ch_collated_versions } @@ -382,7 +382,7 @@ workflow NASCENT { ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) @@ -408,7 +408,7 @@ workflow NASCENT { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: