diff --git a/CHANGELOG.md b/CHANGELOG.md index cb3646b7..63d7a8e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## dev - [date] +### Added + +- [#201](https://github.com/nf-core/nascent/pull/201) - Add CRAM output format support with `--bam` parameter to control output format + +### Changed + +- [#201](https://github.com/nf-core/nascent/pull/201) - Default output format changed from BAM to CRAM for space efficiency + ### Fixed TODO Remove bedtools bedgraph diff --git a/conf/modules.config b/conf/modules.config index 7c2e892f..bc890a56 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -185,11 +185,23 @@ process { ] } - withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/${params.aligner}" }, mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.bam, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.ba*", + enabled: params.bam, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -243,6 +255,25 @@ process { } } + withName: SAMTOOLS_CONVERT { + publishDir = [ + path: { "${params.outdir}/${params.aligner}" }, + mode: params.publish_dir_mode, + pattern: "*.{cram,crai}", + enabled: !params.bam + ] + } + + withName: SAMTOOLS_MERGE { + publishDir = [ + path: { "${params.outdir}/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.bam, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + ///////////////////// // Quality Control // ///////////////////// @@ -333,7 +364,7 @@ process { ext.prefix = { "${meta.id}.minus" } } - withName:DREG_PREP { + withName: DREG_PREP { ext.prefix = { "${meta.id}.dreg" } } diff --git a/docs/usage.md b/docs/usage.md index f43e47e7..e99666b7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -60,6 +60,31 @@ The sample column is essentially a concatenation of the group and replicate colu By default, the pipeline uses [BWA](https://bio-bwa.sourceforge.net/) (i.e. `--aligner bwa`) to map the raw FastQ reads to the reference genome. Research as to which aligner works best with Nascent Transcript and Transcription Start Site assays is pending. +## Output Format Options + +### CRAM vs BAM Files + +By default, the pipeline outputs alignment files in CRAM format to save storage space. CRAM files are typically 30-50% smaller than equivalent BAM files while maintaining full data integrity. + +**Default behavior (CRAM output):** + +```bash +nextflow run nf-core/nascent --input samplesheet.csv --outdir results +``` + +**To output BAM files instead:** + +```bash +nextflow run nf-core/nascent --input samplesheet.csv --outdir results --bam +``` + +**Key considerations:** + +- **CRAM files**: Space-efficient, require reference genome for some downstream tools +- **BAM files**: Larger file size, more widely compatible with downstream tools +- Internal processing always uses BAM format for compatibility +- Only final output format is affected by this parameter + ## Reference genome files The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--star_index '/path/to/BWA/index/'`) or via a custom config file. diff --git a/modules.json b/modules.json index 42021a66..54057116 100644 --- a/modules.json +++ b/modules.json @@ -7,74 +7,72 @@ "nf-core": { "bbmap/pileup": { "branch": "master", - "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "bedtools/bamtobed": { "branch": "master", - "git_sha": "1d1cb7bfef6cf67fbc7faafa6992ad8bdc3045b3", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", - "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "git_sha": "749d72f162cb1493dca66b454064ed201de2cd95", "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", - "git_sha": "a5377837fe9013bde89de8689829e83e84086536", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "bedtools/sort": { "branch": "master", - "git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "bowtie2/align": { "branch": "master", - "git_sha": "e4bad511789f16d0df39ee306b2cd50418365048", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["fastq_align_bowtie2"] }, "bowtie2/build": { "branch": "master", - "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/index/bwa-index.diff" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", + "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["fastq_align_bwa"] }, "bwamem2/index": { "branch": "master", - "git_sha": "7081e04c18de9480948d34513a1c1e2d0fa9126d", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwamem2/index/bwamem2-index.diff" + "git_sha": "90aef30f432332bdf0ce9f4b9004aa5d5c4960bb", + "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", - "git_sha": "3afb95b2e15fc4a2347470255a7ef654f650c8ec", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "cat/cat": { "branch": "master", - "git_sha": "c60c14b285b89bdd0607e371417dadb80385ad6e", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "deeptools/bamcoverage": { @@ -84,18 +82,17 @@ }, "dragmap/align": { "branch": "master", - "git_sha": "dd2757cc22c5de8943fa38ba7cd6f8cc1eb65ac1", - "installed_by": ["modules"], - "patch": "modules/nf-core/dragmap/align/dragmap-align.diff" + "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9", + "installed_by": ["modules"] }, "dragmap/hashtable": { "branch": "master", - "git_sha": "ae9e01cb5e77faada314047e78423b22b4f5bbc5", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "fastp": { "branch": "master", - "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "fastqc": { @@ -105,7 +102,7 @@ }, "gffread": { "branch": "master", - "git_sha": "6c996d7fbe0816dcbb68ce587ad5f873313682a1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "gunzip": { @@ -145,9 +142,8 @@ }, "pints/caller": { "branch": "master", - "git_sha": "ac55541b663c35e9ff50f79d32049ce7492a6ea5", - "installed_by": ["modules"], - "patch": "modules/nf-core/pints/caller/pints-caller.diff" + "git_sha": "8d5737116be7d55f5642ea96cc6d9396389a1811", + "installed_by": ["modules"] }, "preseq/ccurve": { "branch": "master", @@ -156,49 +152,54 @@ }, "preseq/lcextrap": { "branch": "master", - "git_sha": "2c6b1144ed58b6184ad58fc4e6b6a90219b4bf4f", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "rseqc/bamstat": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["bam_rseqc"] }, "rseqc/inferexperiment": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["bam_rseqc"] }, "rseqc/innerdistance": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["bam_rseqc"] }, "rseqc/junctionannotation": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["bam_rseqc"] }, "rseqc/junctionsaturation": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["bam_rseqc"] }, "rseqc/readdistribution": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["bam_rseqc"] }, "rseqc/readduplication": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", - "installed_by": ["bam_rseqc", "modules"] + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["bam_rseqc"] }, "rseqc/tin": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["bam_rseqc"] }, + "samtools/convert": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, "samtools/flagstat": { "branch": "master", "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", @@ -232,16 +233,16 @@ "star/align": { "branch": "master", "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", - "installed_by": ["fastq_align_star", "modules"] + "installed_by": ["fastq_align_star"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "a21faa6a3481af92a343a10926f59c189a2c16c9", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "subread/featurecounts": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "umitools/dedup": { @@ -290,12 +291,12 @@ }, "fastq_align_bowtie2": { "branch": "master", - "git_sha": "0eacd714effe5aac1c1de26593873960b3346cab", + "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", "installed_by": ["subworkflows"] }, "fastq_align_bwa": { "branch": "master", - "git_sha": "e0ff65e1fb313677de09f5f477ae3da30ce19b7b", + "git_sha": "0fa40bbf85955171ec197334ac70a1197fba600d", "installed_by": ["subworkflows"] }, "fastq_align_hisat2": { @@ -305,7 +306,7 @@ }, "fastq_align_star": { "branch": "master", - "git_sha": "b4919e9a2b4d8b71061e601633db4600a3858fa1", + "git_sha": "6a367612c2c27aa659293c656afa764361223db4", "installed_by": ["subworkflows"] }, "homer_groseq": { diff --git a/modules/nf-core/samtools/convert/environment.yml b/modules/nf-core/samtools/convert/environment.yml new file mode 100644 index 00000000..62054fc9 --- /dev/null +++ b/modules/nf-core/samtools/convert/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf new file mode 100644 index 00000000..cf9253d1 --- /dev/null +++ b/modules/nf-core/samtools/convert/main.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_CONVERT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.bai") , emit: bai , optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + + """ + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + def index_extension = output_extension == "bam" ? "bai" : "crai" + + """ + touch ${prefix}.${output_extension} + touch ${prefix}.${index_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml new file mode 100644 index 00000000..d5bfa161 --- /dev/null +++ b/modules/nf-core/samtools/convert/meta.yml @@ -0,0 +1,103 @@ +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file +keywords: + - view + - index + - bam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Reference index file to create the CRAM file + pattern: "*.{fai}" +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: filtered/converted BAM file + pattern: "*{.bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: filtered/converted CRAM file + pattern: "*{cram}" + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: filtered/converted BAM index + pattern: "*{.bai}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: filtered/converted CRAM index + pattern: "*{.crai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@FriederikeHanssen" + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test b/modules/nf-core/samtools/convert/tests/main.nf.test new file mode 100644 index 00000000..91a0c69e --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_process { + + name "Test Process SAMTOOLS_CONVERT" + script "../main.nf" + process "SAMTOOLS_CONVERT" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/convert" + + test("sarscov2 - [bam, bai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("bam_to_cram_alignment") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("bam_to_cram_index") }, + { assert snapshot(process.out.versions).match("bam_to_cram_versions") } + ) + } + } + + test("homo_sapiens - [cram, crai], fasta, fai") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_alignment") }, + { assert snapshot(file(process.out.bai[0][1]).name).match("cram_to_bam_alignment_index") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("sarscov2 - [bam, bai], fasta, fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'fai' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } +} diff --git a/modules/nf-core/samtools/convert/tests/main.nf.test.snap b/modules/nf-core/samtools/convert/tests/main.nf.test.snap new file mode 100644 index 00000000..a021254e --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "cram_to_bam_alignment": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.300147176" + }, + "bam_to_cram_alignment": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.625470184" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:35.516411351" + }, + "bam_to_cram_versions": { + "content": [ + [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:24.694454205" + }, + "stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ], + "bai": [ + + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5bc6eb42ab2a1ea6661f8ee998467ad6" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T07:52:45.799885099" + }, + "bam_to_cram_index": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:36.640009334" + }, + "cram_to_bam_alignment_index": { + "content": [ + "test.bam.bai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T11:14:51.304477426" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/convert/tests/tags.yml b/modules/nf-core/samtools/convert/tests/tags.yml new file mode 100644 index 00000000..030d5eb5 --- /dev/null +++ b/modules/nf-core/samtools/convert/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/convert: + - "modules/nf-core/samtools/convert/**" diff --git a/nextflow.config b/nextflow.config index affc0fed..29d1c8e8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,7 @@ params { // Alignment aligner = 'bwa' skip_alignment = false + bam = false // Transcript identification method assay_type = null diff --git a/nextflow_schema.json b/nextflow_schema.json index d2b1e488..5cb47c04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -66,6 +66,12 @@ "description": "Skip the adapter trimming step.", "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", "fa_icon": "fas fa-fast-forward" + }, + "bam": { + "type": "boolean", + "fa_icon": "fas fa-file-archive", + "description": "Output BAM alignment files instead of CRAM.", + "help_text": "By default, the pipeline outputs CRAM alignment files which are smaller and more efficient. Use this parameter if you need BAM files for downstream analysis that doesn't support CRAM format." } } }, diff --git a/tests/.nftignore b/tests/.nftignore index 60cbdcea..54ba86a7 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -23,7 +23,7 @@ hisat2/log/*.hisat2.summary.log **/*.command.log **/*.bedGraph.gz **/tagInfo.txt -**/*.{bam,bai} +**/*.{cram,crai} **/*.featureCounts.txt **/*.featureCounts.txt.summary **/*.pdf diff --git a/workflows/nascent.nf b/workflows/nascent.nf index f377ee8b..1348730e 100644 --- a/workflows/nascent.nf +++ b/workflows/nascent.nf @@ -19,7 +19,7 @@ include { UNTAR as UNTAR_STAR_INDEX } from '../mo include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_GENE } from '../modules/nf-core/subread/featurecounts/main' include { SUBREAD_FEATURECOUNTS as SUBREAD_FEATURECOUNTS_PREDICTED } from '../modules/nf-core/subread/featurecounts/main' - +include { SAMTOOLS_CONVERT } from '../modules/nf-core/samtools/convert/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' @@ -81,7 +81,7 @@ workflow NASCENT { ch_bwamem2_index, ch_dragmap, ch_bowtie2_index, - ch_hisat2_index + ch_hisat2_index, ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) ch_fasta = PREPARE_GENOME.out.fasta.map { fasta -> [[id: fasta.baseName], fasta] } @@ -122,7 +122,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam ch_genome_bai = FASTQ_ALIGN_BWA.out.bai @@ -137,7 +137,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.bwa_index, false, - ch_fasta + ch_fasta, ) ch_genome_bam = ALIGN_BWAMEM2.out.bam ch_genome_bai = ALIGN_BWAMEM2.out.bai @@ -152,7 +152,7 @@ workflow NASCENT { ch_reads, PREPARE_GENOME.out.dragmap, false, - ch_fasta + ch_fasta, ) ch_genome_bam = ALIGN_DRAGMAP.out.bam ch_genome_bai = ALIGN_DRAGMAP.out.bai @@ -168,7 +168,7 @@ workflow NASCENT { PREPARE_GENOME.out.bowtie2_index, false, false, - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam ch_genome_bai = FASTQ_ALIGN_BOWTIE2.out.bai @@ -193,7 +193,7 @@ workflow NASCENT { ch_reads, ch_hisat2_index, [[:], []], - ch_fasta + ch_fasta, ) ch_genome_bam = FASTQ_ALIGN_HISAT2.out.bam ch_genome_bai = FASTQ_ALIGN_HISAT2.out.bai @@ -208,7 +208,7 @@ workflow NASCENT { if (!ch_star_index) { ch_star_index = STAR_GENOMEGENERATE( ch_fasta, - PREPARE_GENOME.out.gtf.map { [[:], it] } + PREPARE_GENOME.out.gtf.map { [[:], it] }, ).index } else if (ch_star_index.endsWith('.tar.gz')) { @@ -228,7 +228,7 @@ workflow NASCENT { '', '', ch_fasta, - Channel.of([[:], []]) + Channel.of([[:], []]), ) ch_genome_bam = FASTQ_ALIGN_STAR.out.bam ch_genome_bai = FASTQ_ALIGN_STAR.out.bai @@ -243,7 +243,7 @@ workflow NASCENT { if (params.with_umi) { BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS( ch_genome_bam.join(ch_genome_bai, by: [0]), - params.umitools_dedup_stats + params.umitools_dedup_stats, ) ch_genome_bam = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bam ch_genome_bai = BAM_DEDUP_STATS_SAMTOOLS_UMITOOLS.out.bai @@ -256,9 +256,20 @@ workflow NASCENT { ch_genome_bam_bai = ch_genome_bam.join(ch_genome_bai, by: [0], remainder: true) + // Publish CRAM files by default, BAM files if --bam is specified + // https://nf-co.re/docs/guidelines/pipelines/recommendations/file_formats + if (!params.bam) { + SAMTOOLS_CONVERT( + ch_genome_bam_bai, + ch_fasta, + PREPARE_GENOME.out.fai.map { [[:], it] }, + ) + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + } + QUALITY_CONTROL( ch_genome_bam_bai, - PREPARE_GENOME.out.gene_bed + PREPARE_GENOME.out.gene_bed, ) ch_versions = ch_versions.mix(QUALITY_CONTROL.out.versions) @@ -266,7 +277,7 @@ workflow NASCENT { ch_genome_bam_bai, PREPARE_GENOME.out.chrom_sizes, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai + PREPARE_GENOME.out.fai, ) ch_versions = ch_versions.mix(COVERAGE_GRAPHS.out.versions) @@ -304,7 +315,7 @@ workflow NASCENT { ch_gxf, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.chrom_sizes, - ch_uniqmap + ch_uniqmap, ) ch_grohmm_multiqc = TRANSCRIPT_INDENTIFICATION.out.grohmm_td_plot.collect() ch_homer_multiqc = TRANSCRIPT_INDENTIFICATION.out.homer_peaks @@ -333,7 +344,7 @@ workflow NASCENT { storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'nascent_software_' + 'mqc_' + 'versions.yml', sort: true, - newLine: true + newLine: true, ) .set { ch_collated_versions } @@ -371,7 +382,7 @@ workflow NASCENT { ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) @@ -397,7 +408,7 @@ workflow NASCENT { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList(), [], - [] + [], ) emit: diff --git a/workflows/tests/aligner/bowtie2.nf.test b/workflows/tests/aligner/bowtie2.nf.test index 4f91e553..154dec64 100644 --- a/workflows/tests/aligner/bowtie2.nf.test +++ b/workflows/tests/aligner/bowtie2.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +32,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bowtie2.nf.test.snap b/workflows/tests/aligner/bowtie2.nf.test.snap index b2f1b071..20c2f906 100644 --- a/workflows/tests/aligner/bowtie2.nf.test.snap +++ b/workflows/tests/aligner/bowtie2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 71, + 74, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -63,6 +63,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bowtie2", - "bowtie2/cd4_REP1.sorted.bam", - "bowtie2/cd4_REP1.sorted.bam.bai", - "bowtie2/cd4_REP2.sorted.bam", - "bowtie2/cd4_REP2.sorted.bam.bai", - "bowtie2/jurkat.sorted.bam", - "bowtie2/jurkat.sorted.bam.bai", + "bowtie2/cd4_REP1.cram", + "bowtie2/cd4_REP1.cram.crai", + "bowtie2/cd4_REP2.cram", + "bowtie2/cd4_REP2.cram.crai", + "bowtie2/jurkat.cram", + "bowtie2/jurkat.cram.crai", "bowtie2/log", "bowtie2/log/cd4_REP1.bowtie2.log", "bowtie2/log/cd4_REP2.bowtie2.log", @@ -230,9 +233,6 @@ "quantification/gene/cd4.featureCounts.txt.summary", "quantification/gene/jurkat.featureCounts.txt", "quantification/gene/jurkat.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -296,12 +296,17 @@ "jurkat.bed:md5,5e170e72c4e2b27a7bb0a6de7b735c1c", "jurkat.peaks.txt:md5,100cb761b6b7abad3901775e499a6aa1", "versions.yml:md5,7c0dcd0a18b3c753def73b96cb825792" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T21:52:41.120419" + "timestamp": "2025-07-06T17:57:23.93854" } } \ No newline at end of file diff --git a/workflows/tests/aligner/bwa.nf.test b/workflows/tests/aligner/bwa.nf.test index ed8e201a..06d2a004 100644 --- a/workflows/tests/aligner/bwa.nf.test +++ b/workflows/tests/aligner/bwa.nf.test @@ -20,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -33,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getHeaderMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bwa.nf.test.snap b/workflows/tests/aligner/bwa.nf.test.snap index 446d1da3..605b4c48 100644 --- a/workflows/tests/aligner/bwa.nf.test.snap +++ b/workflows/tests/aligner/bwa.nf.test.snap @@ -1,27 +1,27 @@ { "Should work with gzipped references": { "content": [ - 52 + 55 ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-05-11T15:15:07.710999" + "timestamp": "2025-06-29T19:45:51.976463" }, "Should work with BWA Index": { "content": [ - 56 + 59 ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-05-11T15:14:14.361165" + "timestamp": "2025-06-29T19:44:37.964822" }, "Should run with defaults": { "content": [ - 137, + 143, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -95,6 +95,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -110,18 +113,18 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/cd4_REP3.sorted.bam", - "bwa/cd4_REP3.sorted.bam.bai", - "bwa/cd4_REP4.sorted.bam", - "bwa/cd4_REP4.sorted.bam.bai", - "bwa/jurkat_REP1.sorted.bam", - "bwa/jurkat_REP1.sorted.bam.bai", - "bwa/jurkat_REP2.sorted.bam", - "bwa/jurkat_REP2.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/cd4_REP3.cram", + "bwa/cd4_REP3.cram.crai", + "bwa/cd4_REP4.cram", + "bwa/cd4_REP4.cram.crai", + "bwa/jurkat_REP1.cram", + "bwa/jurkat_REP1.cram.crai", + "bwa/jurkat_REP2.cram", + "bwa/jurkat_REP2.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -323,9 +326,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -416,12 +416,20 @@ "jurkat_merged.bed:md5,cae11a1bfb707ea2df5fe612ae7268c8", "cd4_chr21_1_unidirectional_peaks.bed:md5,864532d867843d8ad1545b90d5dcd762", "jurkat_chr21_1_unidirectional_peaks.bed:md5,862a5e81119acc691845f3b426847401" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "cd4_REP3.cram", + "cd4_REP4.cram", + "jurkat_REP1.cram", + "jurkat_REP2.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-21T04:07:46.607615194" + "timestamp": "2025-07-06T23:42:11.465363652" } -} \ No newline at end of file +} diff --git a/workflows/tests/aligner/bwamem2.nf.test b/workflows/tests/aligner/bwamem2.nf.test index 2bebf8ff..e529d294 100644 --- a/workflows/tests/aligner/bwamem2.nf.test +++ b/workflows/tests/aligner/bwamem2.nf.test @@ -17,8 +17,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -30,8 +30,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/bwamem2.nf.test.snap b/workflows/tests/aligner/bwamem2.nf.test.snap index 8799e53e..c98777e6 100644 --- a/workflows/tests/aligner/bwamem2.nf.test.snap +++ b/workflows/tests/aligner/bwamem2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -105,12 +108,12 @@ }, [ "bwamem2", - "bwamem2/cd4_REP1.sorted.bam", - "bwamem2/cd4_REP1.sorted.bam.bai", - "bwamem2/cd4_REP2.sorted.bam", - "bwamem2/cd4_REP2.sorted.bam.bai", - "bwamem2/jurkat.sorted.bam", - "bwamem2/jurkat.sorted.bam.bai", + "bwamem2/cd4_REP1.cram", + "bwamem2/cd4_REP1.cram.crai", + "bwamem2/cd4_REP2.cram", + "bwamem2/cd4_REP2.cram.crai", + "bwamem2/jurkat.cram", + "bwamem2/jurkat.cram.crai", "bwamem2/samtools_stats", "bwamem2/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwamem2/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -249,9 +252,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -324,12 +324,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-11T12:54:16.867564" + "timestamp": "2025-07-06T23:47:48.402842458" } } \ No newline at end of file diff --git a/workflows/tests/aligner/hisat2.nf.test b/workflows/tests/aligner/hisat2.nf.test index 8deb6f2b..b41d4640 100644 --- a/workflows/tests/aligner/hisat2.nf.test +++ b/workflows/tests/aligner/hisat2.nf.test @@ -20,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -33,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/hisat2.nf.test.snap b/workflows/tests/aligner/hisat2.nf.test.snap index cf17c8dd..81be575e 100644 --- a/workflows/tests/aligner/hisat2.nf.test.snap +++ b/workflows/tests/aligner/hisat2.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 83, + 86, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -72,6 +72,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_FLAGSTAT": { "samtools": 1.21 }, @@ -112,12 +115,12 @@ "coverage_graphs/jurkat.minus.bigWig", "coverage_graphs/jurkat.plus.bigWig", "hisat2", - "hisat2/cd4_REP1.sorted.bam", - "hisat2/cd4_REP1.sorted.bam.bai", - "hisat2/cd4_REP2.sorted.bam", - "hisat2/cd4_REP2.sorted.bam.bai", - "hisat2/jurkat.sorted.bam", - "hisat2/jurkat.sorted.bam.bai", + "hisat2/cd4_REP1.cram", + "hisat2/cd4_REP1.cram.crai", + "hisat2/cd4_REP2.cram", + "hisat2/cd4_REP2.cram.crai", + "hisat2/jurkat.cram", + "hisat2/jurkat.cram.crai", "hisat2/log", "hisat2/log/cd4_REP1.hisat2.summary.log", "hisat2/log/cd4_REP2.hisat2.summary.log", @@ -245,9 +248,6 @@ "quantification/gene/cd4.featureCounts.txt.summary", "quantification/gene/jurkat.featureCounts.txt", "quantification/gene/jurkat.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -338,12 +338,17 @@ "GRCh38_chr21.6.ht2:md5,242e36d01cd1719b6bd05f157c644eed", "GRCh38_chr21.7.ht2:md5,24e7d0673a77e07fbe40400f9a6b3db6", "GRCh38_chr21.8.ht2:md5,5e0626bdb7f7a267990f72ae45c3e44a" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.04.2" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-21T04:25:30.229280071" + "timestamp": "2025-07-06T23:52:35.006090205" } } \ No newline at end of file diff --git a/workflows/tests/aligner/star.nf.test b/workflows/tests/aligner/star.nf.test index 1fbcd996..b521aa96 100644 --- a/workflows/tests/aligner/star.nf.test +++ b/workflows/tests/aligner/star.nf.test @@ -19,8 +19,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +32,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/aligner/star.nf.test.snap b/workflows/tests/aligner/star.nf.test.snap index de9b0b04..7507b9a0 100644 --- a/workflows/tests/aligner/star.nf.test.snap +++ b/workflows/tests/aligner/star.nf.test.snap @@ -1,17 +1,17 @@ { "Should run with gzipped gtf": { "content": [ - 87 + 90 ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:05:06.864437" + "timestamp": "2025-06-29T19:53:50.826998" }, "Should run with defaults": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -78,6 +78,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -220,10 +223,13 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "star", + "star/cd4_REP1.cram", + "star/cd4_REP1.cram.crai", + "star/cd4_REP2.cram", + "star/cd4_REP2.cram.crai", + "star/jurkat.cram", + "star/jurkat.cram.crai", "star/log", "star/log/cd4_REP1.Log.final.out", "star/log/cd4_REP1.Log.out", @@ -354,13 +360,18 @@ "jurkat_merged.bed:md5,95a5279ec2387dfa0b4c2e7820083527", "cd4_chr21_1_unidirectional_peaks.bed:md5,26765aa153cb1d6bb668f5786da5763e", "jurkat_chr21_1_unidirectional_peaks.bed:md5,15a3bec7a3ffb53c1e621665b3f45873" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "25.04.6" }, - "timestamp": "2025-05-20T22:02:00.825316" + "timestamp": "2025-07-06T23:56:14.617739031" }, "gzip_software_versions": { "content": [ @@ -433,6 +444,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -449,8 +463,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:05:05.079173" + "timestamp": "2025-06-29T19:53:50.72646" } } \ No newline at end of file diff --git a/workflows/tests/bam_publishing.nf.test b/workflows/tests/bam_publishing.nf.test new file mode 100644 index 00000000..f179bf94 --- /dev/null +++ b/workflows/tests/bam_publishing.nf.test @@ -0,0 +1,102 @@ +nextflow_pipeline { + + name "BAM Publishing with --bam parameter" + script "../../main.nf" + tag "bam" + tag "publishing" + + test("Should publish BAM files when --bam parameter is used") { + + when { + params { + outdir = "$outputDir" + aligner = "bowtie2" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get BAM index files + def bam_index_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam.bai']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files" }, + { assert bam_index_files.size() > 0 : "No BAM index files found" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() }, + // BAM index files + bam_index_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should publish BAM files with different aligner when --bam is used") { + + when { + params { + outdir = "$outputDir" + aligner = "bwa" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should handle --bam parameter with STAR aligner") { + + when { + params { + outdir = "$outputDir" + aligner = "star" + bam = true + } + } + + then { + // Get all BAM files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get all CRAM files (should be none when --bam is used) + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + + assertAll( + { assert workflow.success }, + { assert bam_files.size() > 0 : "No BAM files found in output when --bam parameter was used with STAR" }, + { assert cram_files.size() == 0 : "CRAM files found when --bam parameter should produce BAM files with STAR" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // BAM files (names only, content validation removed for consistency) + bam_files.collect{ file -> file.getName() } + ).match() } + ) + } + } +} diff --git a/workflows/tests/bam_publishing.nf.test.snap b/workflows/tests/bam_publishing.nf.test.snap new file mode 100644 index 00000000..11bc2574 --- /dev/null +++ b/workflows/tests/bam_publishing.nf.test.snap @@ -0,0 +1,55 @@ +{ + "Should handle --bam parameter with STAR aligner": { + "content": [ + 86, + [ + "cd4.bam", + "jurkat.bam" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:35:22.265415" + }, + "Should publish BAM files when --bam parameter is used": { + "content": [ + 71, + [ + "cd4_REP1.sorted.bam", + "cd4_REP2.sorted.bam", + "jurkat.sorted.bam", + "cd4.bam", + "jurkat.bam" + ], + [ + "cd4_REP1.sorted.bam.bai", + "cd4_REP2.sorted.bam.bai", + "jurkat.sorted.bam.bai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-29T14:47:13.244785" + }, + "Should publish BAM files with different aligner when --bam is used": { + "content": [ + 86, + [ + "cd4_REP1.sorted.bam", + "cd4_REP2.sorted.bam", + "jurkat.sorted.bam", + "cd4.bam", + "jurkat.bam" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:31:10.551067" + } +} \ No newline at end of file diff --git a/workflows/tests/cram_publishing.nf.test b/workflows/tests/cram_publishing.nf.test new file mode 100644 index 00000000..95bc3c62 --- /dev/null +++ b/workflows/tests/cram_publishing.nf.test @@ -0,0 +1,70 @@ +nextflow_pipeline { + + name "CRAM Publishing Default" + script "../../main.nf" + tag "cram" + tag "publishing" + + test("Should publish CRAM files by default") { + + when { + params { + outdir = "$outputDir" + aligner = "bowtie2" + } + } + + then { + // Get all CRAM files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get all BAM files (should be none with default settings) + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // Get CRAM index files + def cram_index_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram.crai']) + + assertAll( + { assert workflow.success }, + { assert cram_files.size() > 0 : "No CRAM files found in output" }, + { assert bam_files.size() == 0 : "BAM files found when CRAM should be default" }, + { assert cram_index_files.size() > 0 : "No CRAM index files found" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // CRAM files (names only, content validation removed due to reference mismatch) + cram_files.collect{ file -> file.getName() }, + // CRAM index files + cram_index_files.collect{ file -> file.getName() } + ).match() } + ) + } + } + + test("Should publish CRAM files with different aligner") { + + when { + params { + outdir = "$outputDir" + aligner = "bwa" + } + } + + then { + // Get all CRAM files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) + // Get all BAM files (should be none with default settings) + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + + assertAll( + { assert workflow.success }, + { assert cram_files.size() > 0 : "No CRAM files found in output" }, + { assert bam_files.size() == 0 : "BAM files found when CRAM should be default" }, + { assert snapshot( + // Number of tasks + workflow.trace.succeeded().size(), + // CRAM files (names only, content validation removed due to reference mismatch) + cram_files.collect{ file -> file.getName() } + ).match() } + ) + } + } +} diff --git a/workflows/tests/cram_publishing.nf.test.snap b/workflows/tests/cram_publishing.nf.test.snap new file mode 100644 index 00000000..1ced2a8c --- /dev/null +++ b/workflows/tests/cram_publishing.nf.test.snap @@ -0,0 +1,37 @@ +{ + "Should publish CRAM files by default": { + "content": [ + 74, + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" + ], + [ + "cd4_REP1.cram.crai", + "cd4_REP2.cram.crai", + "jurkat.cram.crai" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:06:40.237907" + }, + "Should publish CRAM files with different aligner": { + "content": [ + 89, + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-06-28T23:08:55.886843" + } +} \ No newline at end of file diff --git a/workflows/tests/inputs/gff/main.nf.test b/workflows/tests/inputs/gff/main.nf.test index 258a30bf..52b01cd9 100644 --- a/workflows/tests/inputs/gff/main.nf.test +++ b/workflows/tests/inputs/gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' } } @@ -19,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/gff/main.nf.test.snap b/workflows/tests/inputs/gff/main.nf.test.snap index fee78d85..b2c67297 100644 --- a/workflows/tests/inputs/gff/main.nf.test.snap +++ b/workflows/tests/inputs/gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with an unzipped GFF file": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -234,9 +237,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -309,12 +309,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.3" + "nextflow": "25.03.1" }, - "timestamp": "2025-05-11T19:48:55.653909" + "timestamp": "2025-07-06T21:31:16.708314" } } \ No newline at end of file diff --git a/workflows/tests/inputs/gzipped_gff/main.nf.test b/workflows/tests/inputs/gzipped_gff/main.nf.test index 33bc949f..7319402c 100644 --- a/workflows/tests/inputs/gzipped_gff/main.nf.test +++ b/workflows/tests/inputs/gzipped_gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff.gz' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' } } @@ -19,8 +20,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -32,8 +33,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/gzipped_gff/main.nf.test.snap b/workflows/tests/inputs/gzipped_gff/main.nf.test.snap index 9f1ef491..a1443cfc 100644 --- a/workflows/tests/inputs/gzipped_gff/main.nf.test.snap +++ b/workflows/tests/inputs/gzipped_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with a gzipped GFF file": { "content": [ - 86, + 89, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -75,6 +75,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -90,12 +93,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -234,9 +237,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -309,12 +309,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T21:38:08.846055" + "timestamp": "2025-07-06T21:27:10.363164" } } \ No newline at end of file diff --git a/workflows/tests/inputs/only_gff/main.nf.test b/workflows/tests/inputs/only_gff/main.nf.test index b9fd1389..c05a9154 100644 --- a/workflows/tests/inputs/only_gff/main.nf.test +++ b/workflows/tests/inputs/only_gff/main.nf.test @@ -11,6 +11,7 @@ nextflow_pipeline { params { outdir = "$outputDir" gff = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/genes_chr21.gff' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nascent/reference/GRCh38_chr21.fa' gtf = null bed = null } @@ -21,8 +22,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -34,8 +35,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/inputs/only_gff/main.nf.test.snap b/workflows/tests/inputs/only_gff/main.nf.test.snap index 198d7127..de608638 100644 --- a/workflows/tests/inputs/only_gff/main.nf.test.snap +++ b/workflows/tests/inputs/only_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with only a GFF file": { "content": [ - 87, + 90, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -78,6 +78,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -93,12 +96,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -237,9 +240,6 @@ "quantification/nascent/cd4-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -312,12 +312,17 @@ "jurkat_merged.bed:md5,f1dde43c4ad9dec972ff9fa38cc6f2fe", "cd4_chr21_1_unidirectional_peaks.bed:md5,0193e58943726af89bfd00e9da2536d8", "jurkat_chr21_1_unidirectional_peaks.bed:md5,cb6932229eea2e09f61d48d7dd397ae1" + ], + [ + "cd4_REP1.cram", + "cd4_REP2.cram", + "jurkat.cram" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "25.03.1" }, - "timestamp": "2025-05-20T22:11:14.562542" + "timestamp": "2025-07-06T21:29:11.669792" } } \ No newline at end of file diff --git a/workflows/tests/inputs/uniqmap/main.nf.test.snap b/workflows/tests/inputs/uniqmap/main.nf.test.snap index fc5bce3c..4fc67673 100644 --- a/workflows/tests/inputs/uniqmap/main.nf.test.snap +++ b/workflows/tests/inputs/uniqmap/main.nf.test.snap @@ -1,7 +1,7 @@ { "output_files": { "content": [ - 72, + 75, "# uniqMapDirectory = uniqmap.GRCh38_chr21.50nt", "# cmd = findPeaks cd4_tagdir -style groseq -o cd4-uniqmap.GRCh38_chr21.peaks.txt -uniqmap uniqmap.GRCh38_chr21.50nt", "# uniqMapDirectory = uniqmap.GRCh38_chr21.50nt", @@ -22,9 +22,9 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T22:12:52.684402" + "timestamp": "2025-06-29T20:29:53.268154" }, "software_versions": { "content": [ @@ -84,6 +84,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -100,8 +103,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.03.1" + "nextflow": "24.10.3" }, - "timestamp": "2025-05-20T21:41:42.367523" + "timestamp": "2025-06-29T20:29:53.145336" } } \ No newline at end of file diff --git a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test index 638a7ba0..4639de63 100644 --- a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test +++ b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test @@ -25,8 +25,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def cram_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -38,8 +38,9 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files - // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } + // All cram files + // TODO https://github.com/nvnieuwk/nft-bam/issues/22 - Add MD5 validation when bug is fixed + cram_files.collect{ file -> file.getName() } ).match() } ) } diff --git a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap index b52aae88..2039c9e0 100644 --- a/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap +++ b/workflows/tests/transcript_indentification/grohmm/only_gff/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run groHMM with only a GFF file": { "content": [ - 106, + 109, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -88,6 +88,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -103,12 +106,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -251,9 +254,6 @@ "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -349,7 +349,8 @@ "jurkat_merged.bed:md5,a94e1ec47f0564122ae1eb0f73be4ebd", "cd4_chr21_1_unidirectional_peaks.bed:md5,60e0da7e5691e55d86eb9df9f6ea0c46", "jurkat_chr21_1_unidirectional_peaks.bed:md5,3584ff1a08cdecc92b6fcf6b2db8dc90" - ] + ], + ["cd4_REP1.cram", "cd4_REP2.cram", "jurkat.cram"] ], "meta": { "nf-test": "0.9.2", @@ -357,4 +358,4 @@ }, "timestamp": "2025-05-11T20:01:27.91741" } -} \ No newline at end of file +} diff --git a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test index 847b907c..ea9b2858 100644 --- a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test +++ b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test @@ -17,8 +17,8 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}', 'multiqc/multiqc_plots', 'multiqc/multiqc_plots/**']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') - // bam_files: All bam files - def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.bam']) + // cram_files: All cram files + def bam_files = getAllFilesFromDir(params.outdir, include: ['**/*.cram']) assertAll( { assert workflow.success }, { assert snapshot( @@ -30,7 +30,7 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path, - // All bam files + // All cram files // FIXME bam_files.collect{ file -> [ file.getName(), bam(file.toString()).getReadsMD5() ] } ).match() } ) diff --git a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap index a2c2bb2b..5dee1387 100644 --- a/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap +++ b/workflows/tests/transcript_indentification/grohmm/tuning/main.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with defaults": { "content": [ - 106, + 109, { "BBMAP_PILEUP": { "bbmap": 39.01, @@ -79,6 +79,9 @@ "RSEQC_READDUPLICATION": { "rseqc": "5.0.2" }, + "SAMTOOLS_CONVERT": { + "samtools": 1.21 + }, "SAMTOOLS_MERGE": { "samtools": 1.21 }, @@ -94,12 +97,12 @@ }, [ "bwa", - "bwa/cd4_REP1.sorted.bam", - "bwa/cd4_REP1.sorted.bam.bai", - "bwa/cd4_REP2.sorted.bam", - "bwa/cd4_REP2.sorted.bam.bai", - "bwa/jurkat.sorted.bam", - "bwa/jurkat.sorted.bam.bai", + "bwa/cd4_REP1.cram", + "bwa/cd4_REP1.cram.crai", + "bwa/cd4_REP2.cram", + "bwa/cd4_REP2.cram.crai", + "bwa/jurkat.cram", + "bwa/jurkat.cram.crai", "bwa/samtools_stats", "bwa/samtools_stats/cd4_REP1.sorted.bam.flagstat", "bwa/samtools_stats/cd4_REP1.sorted.bam.idxstats", @@ -242,9 +245,6 @@ "quantification/nascent/jurkat-group_cd4_intersect-transcripts.featureCounts.txt.summary", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt", "quantification/nascent/jurkat-group_jurkat_intersect-transcripts.featureCounts.txt.summary", - "samtools", - "samtools/cd4.bam", - "samtools/jurkat.bam", "transcript_identification", "transcript_identification/filtered", "transcript_identification/filtered/cd4_filtered.bed", @@ -346,6 +346,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.3" }, - "timestamp": "2025-04-24T13:00:22.623091" + "timestamp": "2025-06-29T20:52:06.293766" } } \ No newline at end of file