diff --git a/install_data.sh b/install_data.sh new file mode 100644 index 000000000000..aed5c33bd36c --- /dev/null +++ b/install_data.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + + +# Test data is hosted on Google Drive at: +# https://drive.google.com/file/d/1GtT8jsBGwRoQC-5wHh06r8RFkiFBuirp/view?usp=sharing + +fileid=1GtT8jsBGwRoQC-5wHh06r8RFkiFBuirp + +filename=test_nucleo.tar.gz +foldername=test_nucleo + +# Skip if already have test data +[[ -f $filename ]] && exit 0 +[[ -d $foldername ]] && exit 0 + +curl -c ./cookie -s -k -L "https://drive.google.com/uc?export=download&id=$fileid" > /dev/null + +curl -k -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} + +# Suppress linux warnings for MacOS tar.gz files +if [[ "$OSTYPE" == "linux-gnu" ]]; then + tar --warning=no-unknown-keyword -xzvf $filename +elif [[ "$OSTYPE" == "darwin"* ]]; then + tar -xzvf $filename +fi + +rm $filename diff --git a/subworkflows/nf-core/alignment/main.nf b/subworkflows/nf-core/alignment/main.nf new file mode 100644 index 000000000000..ef645b9120ab --- /dev/null +++ b/subworkflows/nf-core/alignment/main.nf @@ -0,0 +1,66 @@ +// import +// bwa2 for extra alignment option +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' +include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main' +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/main' +include { PICARD_ADDORREPLACEREADGROUPS } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' + +workflow ALIGNMENT { + + take: + fastqs // channel: [ val(meta), [ bam ] ] + reference + bwa + + main: + + versions = Channel.empty() + + // switch statement to determine which bwa to use, this is a passed parameter + switch(bwa){ + case 1: + // INDEX + BWA_INDEX ( reference ) + versions = versions.mix(BWA_INDEX.out.versions.first()) + // MEM + BWA_MEM ( fastqs, BWA_INDEX.out.index, true ).bam.map { + meta, bam -> + new_id = 'aligned_bam' + [[id: new_id], bam ] + }.set {aligned_bam} + versions = versions.mix(BWA_MEM.out.versions.first()) + break + case 2: + // INDEX + BWAMEM2_INDEX (reference) + versions = versions.mix(BWAMEM2_INDEX.out.versions.first()) + // BWA MEM2 + BWAMEM2_MEM ( fastqs, BWAMEM2_INDEX.out.index, true ).bam.map { + meta, bam -> + new_id = 'aligned_bam' + [[id: new_id], bam ] + }.set {aligned_bam} + versions = versions.mix(BWAMEM2_MEM.out.versions.first()) + break + default: + throw new Exception("The argument bwa must be either 1 or 2, not ${bwa}.") + } + + + PICARD_ADDORREPLACEREADGROUPS(aligned_bam).bam.map { + meta, bam -> + new_id = 'grouped_aligned_bam' + [[id: new_id], bam ] + }.set {grouped_bam} + versions = versions.mix(PICARD_ADDORREPLACEREADGROUPS.out.versions.first()) + + // final output + emit: + + bam = PICARD_ADDORREPLACEREADGROUPS.out.bam // channel: [ val(meta), [ bam ] ] + + + versions = versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/alignment/meta.yml b/subworkflows/nf-core/alignment/meta.yml new file mode 100644 index 000000000000..5ba552123313 --- /dev/null +++ b/subworkflows/nf-core/alignment/meta.yml @@ -0,0 +1,48 @@ +name: "alignment" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules used in the subworkflow +modules: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the variables used as input, including their types and descriptions +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +## TODO nf-core: List all of the variables used as output, including their types and descriptions +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM samtools index + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI samtools index + pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@buehlere" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 0144bc892ef7..f03b9e78e84f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3696,6 +3696,10 @@ subworkflows/fasta_newick_epang_gappa: - subworkflows/nf-core/fasta_newick_epang_gappa/** - tests/subworkflows/nf-core/fasta_newick_epang_gappa/** +subworkflows/fastq_alignsort_bwa_picard: + - subworkflows/nf-core/fastq_alignsort_bwa_picard/** + - tests/subworkflows/nf-core/fastq_alignsort_bwa_picard/** + subworkflows/fastq_align_bowtie2: - subworkflows/nf-core/fastq_align_bowtie2/** - tests/subworkflows/nf-core/fastq_align_bowtie2/** diff --git a/tests/subworkflows/nf-core/alignment/main.nf b/tests/subworkflows/nf-core/alignment/main.nf new file mode 100644 index 000000000000..9c0852f72166 --- /dev/null +++ b/tests/subworkflows/nf-core/alignment/main.nf @@ -0,0 +1,33 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { ALIGNMENT } from '../../../../subworkflows/nf-core/alignment/main.nf' + +workflow test_alignment { + // load test data + def bashScriptFile = new File('install_data.sh') + + def processBuilder = new ProcessBuilder('bash', bashScriptFile.toString()) + processBuilder.redirectOutput(ProcessBuilder.Redirect.INHERIT) + processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT) + + def process = processBuilder.start() + process.waitFor() + + // + + // channels enable parralle: https://www.nextflow.io/docs/latest/faq.html?highlight=parallel + // test data + fastqs = [ + [[id:'gene', single_end:false], [params.test_data_msk['uncollapsed_bam_generation']['merged_fastq']['merged_1'], params.test_data_msk['uncollapsed_bam_generation']['merged_fastq']['merged_2']]] + ] + reference = [ + [id:'reference'], + file('test_nucleo/reference/chr14_chr16.fasta') + ] + fastqs = ch_fastq = Channel.fromList(fastqs) + + // workflow + ALIGNMENT ( fastqs, reference, 1) +} diff --git a/tests/subworkflows/nf-core/alignment/nextflow.config b/tests/subworkflows/nf-core/alignment/nextflow.config new file mode 100644 index 000000000000..c7bb699f65a6 --- /dev/null +++ b/tests/subworkflows/nf-core/alignment/nextflow.config @@ -0,0 +1,9 @@ +executor.cpus = 6 +executor.memory = 15.GB +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName:PICARD_ADDORREPLACEREADGROUPS{ + ext.args = "--RGID 4 --RGLB 'lib1' --RGPL 'ILLUMINA' --RGPU 'unit1' --RGSM 20" + } +} diff --git a/tests/subworkflows/nf-core/alignment/test.yml b/tests/subworkflows/nf-core/alignment/test.yml new file mode 100644 index 000000000000..0d281a4a9c37 --- /dev/null +++ b/tests/subworkflows/nf-core/alignment/test.yml @@ -0,0 +1,28 @@ +- name: alignment test_alignment + command: nextflow run ./tests/subworkflows/nf-core/alignment -entry test_alignment -c ./tests/config/nextflow.config + tags: + - bwa + - bwa/index + - bwa/mem + - bwamem2 + - bwamem2/index + - bwamem2/mem + - picard + - picard/addorreplacereadgroups + - subworkflows + - subworkflows/alignment + files: + - path: output/bwa/bwa/chr14_chr16.amb + md5sum: 00fb74627e074db6238dcd9bc08dc48a + - path: output/bwa/bwa/chr14_chr16.ann + md5sum: d8825e2fcb3cd372cd61ededfe283025 + - path: output/bwa/bwa/chr14_chr16.bwt + md5sum: 45637ec2c011d0f73cac6c470c5b5d2b + - path: output/bwa/bwa/chr14_chr16.pac + md5sum: 46f856371d59e859295497c967478d31 + - path: output/bwa/bwa/chr14_chr16.sa + md5sum: 466dbbbce2fb9528e760477ccdc2ea5b + - path: output/bwa/gene.bam + md5sum: d7c5943b79704d8ed7f432786738f25d + - path: output/picard/aligned_bam.bam + md5sum: 89acecb9fcb99f9182a417215489ea50