diff --git a/.github/actions/nf-test-action/action.yml b/.github/actions/nf-test-action/action.yml index 799e5db0..61ccbc2d 100644 --- a/.github/actions/nf-test-action/action.yml +++ b/.github/actions/nf-test-action/action.yml @@ -13,6 +13,14 @@ inputs: paths: description: "Test paths" required: true + jfrog_username: + description: "JFrog registry username" + required: false + default: "" + jfrog_password: + description: "JFrog registry password or token" + required: false + default: "" runs: using: "composite" @@ -72,6 +80,14 @@ runs: nextflow secrets set ONCOKB_TOKEN $ONCOKB_TOKEN + - name: Login to JFrog Container Registry + if: ${{ inputs.profile == 'docker' && inputs.jfrog_username != '' }} + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3 + with: + registry: mskcc.jfrog.io + username: ${{ inputs.jfrog_username }} + password: ${{ inputs.jfrog_password }} + # TODO Skip failing conda tests and document their failures # https://github.com/nf-core/modules/issues/7017 - name: Run nf-test diff --git a/.github/skip_nf_test.json b/.github/skip_nf_test.json index 58ef7e41..85634e98 100644 --- a/.github/skip_nf_test.json +++ b/.github/skip_nf_test.json @@ -36,7 +36,9 @@ "subworkflows/msk/phylowgs", "subworkflows/msk/generate_mutated_peptides", "subworkflows/msk/neoantigen_editing", - "subworkflows/msk/traceback" + "subworkflows/msk/traceback", + "modules/msk/hlahd", + "subworkflows/msk/hlahd_from_bam" ], "docker": [], "singularity": [] diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 30223b81..e41e3532 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -135,6 +135,8 @@ jobs: shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} paths: "${{ join(fromJson(steps.filter.outputs.filtered_paths), ' ') }}" + jfrog_username: ${{ secrets.JFROG_USERNAME }} + jfrog_password: ${{ secrets.JFROG_PASSWORD }} confirm-pass: runs-on: ubuntu-latest diff --git a/modules/msk/hlahd/environment.yml b/modules/msk/hlahd/environment.yml new file mode 100644 index 00000000..4c59b932 --- /dev/null +++ b/modules/msk/hlahd/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "YOUR-TOOL=HERE" diff --git a/modules/msk/hlahd/main.nf b/modules/msk/hlahd/main.nf new file mode 100644 index 00000000..56039658 --- /dev/null +++ b/modules/msk/hlahd/main.nf @@ -0,0 +1,64 @@ +process HLAHD { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker://mskcc.jfrog.io/omicswf-docker-dev-local/mskcc-omics-workflows/hlahd:1.7.1': + 'mskcc.jfrog.io/omicswf-docker-dev-local/mskcc-omics-workflows/hlahd:1.7.1' }" + + input: + tuple val(meta), path(fastq_1), path(fastq_2) + + output: + tuple val(meta), path("${prefix}/result/${prefix}_final.result.txt"), emit: result + tuple val(meta), path("${prefix}/result/${prefix}_*.est.txt"), emit: result_per_locus + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def min_read = task.ext.args2 ?: '100' + prefix = task.ext.prefix ?: "${meta.id}" + def install_dir = '/opt/hlahd/current' + """ + if [[ \$(ulimit -n) -lt 1024 ]]; then ulimit -n 1024; fi + + ln -sf /usr/bin/python3 ./python + export PATH=\$PWD:\$PATH + + mkdir -p ${prefix} + + bash ${install_dir}/bin/hlahd.sh \\ + -t ${task.cpus} \\ + -m ${min_read} \\ + -f ${install_dir}/freq_data \\ + ${args} \\ + ${fastq_1} \\ + ${fastq_2} \\ + ${install_dir}/HLA_gene.split.txt \\ + ${install_dir}/dictionary \\ + ${prefix} \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hlahd: \$(bash ${install_dir}/bin/hlahd.sh 2>&1 | grep -oP 'HLA-HD version \\K[0-9.]+' | head -1) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p ${prefix}/result + touch ${prefix}/result/${prefix}_final.result.txt + touch ${prefix}/result/${prefix}_A.est.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hlahd: \$(bash /opt/hlahd/current/bin/hlahd.sh 2>&1 | grep -oP 'HLA-HD version \\K[0-9.]+' | head -1) + END_VERSIONS + """ +} diff --git a/modules/msk/hlahd/meta.yml b/modules/msk/hlahd/meta.yml new file mode 100644 index 00000000..b268a910 --- /dev/null +++ b/modules/msk/hlahd/meta.yml @@ -0,0 +1,67 @@ +name: "hlahd" +description: HLA typing from paired-end FASTQ reads using HLA-HD +keywords: + - HLA + - immunology + - typing + - genomics +tools: + - "hlahd": + description: + "HLA-HD (HLA typing from High-quality Dictionary) performs HLA typing + from paired-end FASTQ reads using bowtie2 alignment against HLA allele dictionaries." + homepage: "https://w3.genome.med.kyoto-u.ac.jp/HLA-HD/" + documentation: "https://w3.genome.med.kyoto-u.ac.jp/HLA-HD/" + licence: + - "ACADEMIC SOFTWARE LICENSE" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fastq_1: + type: file + description: First read of paired-end FASTQ input + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 + - fastq_2: + type: file + description: Second read of paired-end FASTQ input + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 +output: + result: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/result/${prefix}_final.result.txt: + type: file + description: Final HLA typing result file containing best-call alleles for all loci + pattern: "**/result/*_final.result.txt" + result_per_locus: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}/result/${prefix}_*.est.txt: + type: file + description: Per-locus HLA estimation files (one file per HLA gene) + pattern: "**/result/*_*.est.txt" + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 +authors: + - "@johnoooh" +maintainers: + - "@johnoooh" diff --git a/modules/msk/hlahd/tests/main.nf.test b/modules/msk/hlahd/tests/main.nf.test new file mode 100644 index 00000000..836ec40c --- /dev/null +++ b/modules/msk/hlahd/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process HLAHD" + script "../main.nf" + process "HLAHD" + + tag "modules" + tag "modules_msk" + tag "hlahd" + + test("hlahd - fastq pair - result txt") { + + when { + process { + """ + input[0] = [ + [ id:'test_sample', single_end:false ], // meta map + file(params.test_data_mskcc['hlahd']['fastq_1'], checkIfExists: true), + file(params.test_data_mskcc['hlahd']['fastq_2'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.result, + process.out.versions + ).match() + } + ) + } + + } + + test("hlahd - fastq pair - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_sample', single_end:false ], // meta map + file(params.test_data_mskcc['hlahd']['fastq_1'], checkIfExists: true), + file(params.test_data_mskcc['hlahd']['fastq_2'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.result.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/msk/hlahd/tests/main.nf.test.snap b/modules/msk/hlahd/tests/main.nf.test.snap new file mode 100644 index 00000000..0184980d --- /dev/null +++ b/modules/msk/hlahd/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "hlahd - fastq pair - stub": { + "content": [ + [ + "versions.yml:md5,f196d451477cda61837f7cfb2ed3c9b4" + ] + ], + "timestamp": "2026-03-05T16:10:44.004384", + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + } + }, + "hlahd - fastq pair - result txt": { + "content": [ + [ + [ + { + "id": "test_sample", + "single_end": false + }, + "test_sample_final.result.txt:md5,6f83fc8ac5bd3b9f56853b583595e2a0" + ] + ], + [ + "versions.yml:md5,f196d451477cda61837f7cfb2ed3c9b4" + ] + ], + "timestamp": "2026-03-09T11:03:42.014639", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/msk/hlahd/tests/tags.yml b/modules/msk/hlahd/tests/tags.yml new file mode 100644 index 00000000..51631aae --- /dev/null +++ b/modules/msk/hlahd/tests/tags.yml @@ -0,0 +1,2 @@ +hlahd: + - modules/msk/hlahd/** diff --git a/subworkflows/msk/hlahd_from_bam/main.nf b/subworkflows/msk/hlahd_from_bam/main.nf new file mode 100644 index 00000000..4ca7a195 --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/main.nf @@ -0,0 +1,77 @@ +include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view/main' +include { GATK4_REVERTSAM } from '../../../modules/nf-core/gatk4/revertsam/main' +include { SAMTOOLS_FASTQ } from '../../../modules/nf-core/samtools/fastq/main' +include { HLAHD } from '../../../modules/msk/hlahd/main' + +workflow HLAHD_FROM_BAM { + + take: + ch_bam // channel: [ val(meta), path(bam), path(bai) ] + skip_revert_sam // val: Boolean + + main: + + ch_versions = Channel.empty() + + // + // MODULE: Extract HLA region from BAM using samtools view. + // The caller configures the region to extract via ext.args in modules.config, + // e.g. ext.args = '-b chr6:28000000-34000000' + // + SAMTOOLS_VIEW( + ch_bam, + [[],[]], + [], + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + + // + // Optional: Revert base quality score recalibration with GATK4 RevertSam. + // Set skip_revert_sam = true when the BAM has no BQSR applied (e.g. already + // in OQ-restored state, or produced by a tool that does not perform BQSR). + // + if (!skip_revert_sam) { + + GATK4_REVERTSAM( + SAMTOOLS_VIEW.out.bam + ) + ch_versions = ch_versions.mix(GATK4_REVERTSAM.out.versions.first()) + ch_for_fastq = GATK4_REVERTSAM.out.bam + + } else { + + ch_for_fastq = SAMTOOLS_VIEW.out.bam + + } + + // + // MODULE: Convert BAM to paired FASTQ files. + // SAMTOOLS_FASTQ emits .out.fastq as [ meta, [fq1, fq2] ]; unpack into + // separate paths so HLAHD receives the three-element tuple it expects. + // + SAMTOOLS_FASTQ( + ch_for_fastq, + false + ) + ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions.first()) + + ch_fastq_for_hlahd = SAMTOOLS_FASTQ.out.fastq + .map { meta, fastqs -> + def (fq1, fq2) = fastqs + [meta, fq1, fq2] + } + + // + // MODULE: Run HLA-HD to call HLA alleles from paired FASTQ files. + // + HLAHD( + ch_fastq_for_hlahd + ) + ch_versions = ch_versions.mix(HLAHD.out.versions.first()) + + emit: + result = HLAHD.out.result // channel: [ val(meta), path(result/*_final.result.txt) ] + result_per_locus = HLAHD.out.result_per_locus // channel: [ val(meta), path(result/*_*.est.txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/msk/hlahd_from_bam/meta.yml b/subworkflows/msk/hlahd_from_bam/meta.yml new file mode 100644 index 00000000..8dfd5366 --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "hlahd_from_bam" +description: | + Extract the HLA region from a coordinate-sorted BAM file, optionally revert + base quality score recalibration with GATK4 RevertSam, convert to paired + FASTQ files with samtools fastq, and run HLA-HD for high-resolution HLA + typing. +keywords: + - HLA + - typing + - BAM + - immunology + - samtools + - gatk4 +components: + - samtools/view + - gatk4/revertsam + - samtools/fastq + - hlahd +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'sample_01', single_end:false ] + - bam: + type: file + description: | + Input BAM file. Must be coordinate-sorted and indexed. + pattern: "*.bam" + - bai: + type: file + description: BAM index file. + pattern: "*.bai" + - skip_revert_sam: + type: boolean + description: | + When true, skip the GATK4 RevertSam step. Set to true if the BAM was + not processed with base quality score recalibration (BQSR), or if + original base qualities have already been restored. +output: + - result: + - meta: + type: map + description: Groovy Map containing sample information. + - "**/result/*_final.result.txt": + type: file + description: | + Tab-separated HLA allele calls for all typed loci produced by + HLA-HD. One file per sample. + pattern: "**/result/*_final.result.txt" + - result_per_locus: + - meta: + type: map + description: Groovy Map containing sample information. + - "**/result/*_*.est.txt": + type: file + description: Per-locus HLA estimation files produced by HLA-HD. + pattern: "**/result/*_*.est.txt" + - versions: + - "versions.yml": + type: file + description: File containing software versions for all tools used. + pattern: "versions.yml" +authors: + - "@johnoooh" +maintainers: + - "@johnoooh" diff --git a/subworkflows/msk/hlahd_from_bam/tests/main.nf.test b/subworkflows/msk/hlahd_from_bam/tests/main.nf.test new file mode 100644 index 00000000..3e817f9a --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_workflow { + + name "Test Subworkflow HLAHD_FROM_BAM" + script "../main.nf" + workflow "HLAHD_FROM_BAM" + + tag "subworkflows" + tag "subworkflows_msk" + tag "subworkflows/hlahd_from_bam" + tag "hlahd_from_bam" + tag "hlahd" + tag "samtools" + tag "gatk4" + + test("hlahd_from_bam - bam - with revert sam - result") { + + config "./nextflow.config" + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test_sample', single_end:false ], // meta map + file(params.test_data_mskcc['hlahd']['bam'], checkIfExists: true), + file(params.test_data_mskcc['hlahd']['bai'], checkIfExists: true) + ]) + + input[1] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.result, + workflow.out.versions + ).match() + } + ) + } + } + + test("hlahd_from_bam - bam - skip revert sam - result") { + + config "./nextflow.config" + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test_sample', single_end:false ], // meta map + file(params.test_data_mskcc['hlahd']['bam'], checkIfExists: true), + file(params.test_data_mskcc['hlahd']['bai'], checkIfExists: true) + ]) + + input[1] = true + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.result, + workflow.out.versions + ).match() + } + ) + } + } + + test("hlahd_from_bam - bam - stub") { + + config "./nextflow.config" + options "-stub" + + when { + workflow { + """ + input[0] = Channel.value([ + [ id:'test_sample', single_end:false ], // meta map + file('test_hla_region.bam', checkIfExists: false), + file('test_hla_region.bam.bai', checkIfExists: false) + ]) + + input[1] = false + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.result[0][0], + file(workflow.out.result[0][1]).name, + workflow.out.versions + ).match() + } + ) + } + } +} diff --git a/subworkflows/msk/hlahd_from_bam/tests/main.nf.test.snap b/subworkflows/msk/hlahd_from_bam/tests/main.nf.test.snap new file mode 100644 index 00000000..ac039f62 --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "hlahd_from_bam - bam - stub": { + "content": [ + { + "id": "test_sample", + "single_end": false + }, + "test_sample_final.result.txt", + [ + "versions.yml:md5,181590c9fdbd5c2815c3ce5208703884", + "versions.yml:md5,3536f6f188f43e5bb192831b7809a671", + "versions.yml:md5,475acfad0fe45163f0020ba941a658c1", + "versions.yml:md5,e0264ab44efbd0fd97853a90160075a1" + ] + ], + "timestamp": "2026-03-09T12:50:14.479909", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "hlahd_from_bam - bam - skip revert sam - result": { + "content": [ + [ + [ + { + "id": "test_sample", + "single_end": false + }, + "test_sample_final.result.txt:md5,6f83fc8ac5bd3b9f56853b583595e2a0" + ] + ], + [ + "versions.yml:md5,181590c9fdbd5c2815c3ce5208703884", + "versions.yml:md5,475acfad0fe45163f0020ba941a658c1", + "versions.yml:md5,e0264ab44efbd0fd97853a90160075a1" + ] + ], + "timestamp": "2026-03-09T12:49:52.217786", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "hlahd_from_bam - bam - with revert sam - result": { + "content": [ + [ + [ + { + "id": "test_sample", + "single_end": false + }, + "test_sample_final.result.txt:md5,6f83fc8ac5bd3b9f56853b583595e2a0" + ] + ], + [ + "versions.yml:md5,181590c9fdbd5c2815c3ce5208703884", + "versions.yml:md5,3536f6f188f43e5bb192831b7809a671", + "versions.yml:md5,475acfad0fe45163f0020ba941a658c1", + "versions.yml:md5,e0264ab44efbd0fd97853a90160075a1" + ] + ], + "timestamp": "2026-03-09T12:33:58.060705", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/subworkflows/msk/hlahd_from_bam/tests/nextflow.config b/subworkflows/msk/hlahd_from_bam/tests/nextflow.config new file mode 100644 index 00000000..cca601fd --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: 'HLAHD_FROM_BAM:SAMTOOLS_VIEW' { + ext.prefix = { "${meta.id}.hla_region" } + } + withName: 'HLAHD_FROM_BAM:GATK4_REVERTSAM' { + ext.prefix = { "${meta.id}.reverted" } + } + withName: 'HLAHD_FROM_BAM:SAMTOOLS_FASTQ' { + ext.prefix = { "${meta.id}" } + } +} diff --git a/subworkflows/msk/hlahd_from_bam/tests/tags.yml b/subworkflows/msk/hlahd_from_bam/tests/tags.yml new file mode 100644 index 00000000..4da44ddc --- /dev/null +++ b/subworkflows/msk/hlahd_from_bam/tests/tags.yml @@ -0,0 +1,3 @@ +subworkflows/hlahd_from_bam: + - subworkflows/msk/hlahd_from_bam/** + - modules/msk/hlahd/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index bdfd151f..d1da5b64 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -786,6 +786,12 @@ params { iedb_alignments = "${params.test_data_base_msk}/neoantigen/neoantigen/neoantigenEditing/data/IEDB_alignments/iedb_alignments_3-OLTS.txt" test_annotated = "${params.test_data_base_msk}/neoantigen/neoantigen/test_patient_test_annotated.json" } + 'hlahd' { + fastq_1 = "${params.test_data_base_msk}/hlahd/hlahd/test_R1.fastq.gz" + fastq_2 = "${params.test_data_base_msk}/hlahd/hlahd/test_R2.fastq.gz" + bam = "${params.test_data_base_msk}/hlahd/hlahd/test_hla_region.bam" + bai = "${params.test_data_base_msk}/hlahd/hlahd/test_hla_region.bam.bai" + } 'genome_nexus' { test_maf = "${params.test_data_base_msk}/feature/genome_nexus_subworkflow/mafs/test.maf" sample2_sample1_annotated_maf = "${params.test_data_base_msk}/feature/genome_nexus_subworkflow/mafs/sample2_sample1_annotated.maf"