From 581fccb04730de6dfd36abffcb928a3223fc460c Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 3 Apr 2026 14:18:22 -0300 Subject: [PATCH 01/28] test quilt2 first draft implementation --- README.md | 2 +- conf/steps/imputation_quilt2.config | 43 ++ conf/test_quilt2.config | 36 + docs/output.md | 4 +- docs/usage.md | 13 +- modules.json | 10 + modules/nf-core/quilt/quilt2/environment.yml | 8 + modules/nf-core/quilt/quilt2/main.nf | 108 +++ modules/nf-core/quilt/quilt2/meta.yml | 194 +++++ nextflow.config | 2 + nextflow_schema.json | 4 +- .../utils_nfcore_phaseimpute_pipeline/main.nf | 18 +- .../nf-core/bam_impute_quilt2/main.nf | 74 ++ .../nf-core/bam_impute_quilt2/meta.yml | 39 + workflows/phaseimpute/main.nf | 34 + workflows/phaseimpute/tests/nextflow.config | 6 + .../phaseimpute/tests/test_quilt2.nf.test | 79 +++ .../tests/test_quilt2.nf.test.snap | 668 ++++++++++++++++++ 18 files changed, 1323 insertions(+), 19 deletions(-) create mode 100644 conf/steps/imputation_quilt2.config create mode 100644 conf/test_quilt2.config create mode 100644 modules/nf-core/quilt/quilt2/environment.yml create mode 100644 modules/nf-core/quilt/quilt2/main.nf create mode 100644 modules/nf-core/quilt/quilt2/meta.yml create mode 100644 subworkflows/nf-core/bam_impute_quilt2/main.nf create mode 100644 subworkflows/nf-core/bam_impute_quilt2/meta.yml create mode 100644 workflows/phaseimpute/tests/test_quilt2.nf.test create mode 100644 workflows/phaseimpute/tests/test_quilt2.nf.test.snap diff --git a/README.md b/README.md index c1c3505b..1af23ef1 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ The whole pipeline consists of five main steps, each of which can be run separat - **Position Extraction** for targeted imputation sites. 4. **Imputation (`--impute`)**: This is the primary step, where genotypes in the target dataset are imputed using the prepared reference panel. The main steps are: - - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), [Quilt](https://github.com/rwdavies/QUILT), [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html) or [Minimac4](https://github.com/statgen/Minimac4). + - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), [Quilt](https://github.com/rwdavies/QUILT), [Quilt2](https://github.com/rwdavies/QUILT), [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html) or [Minimac4](https://github.com/statgen/Minimac4). - **Ligation** of imputed chunks to produce a final VCF file per sample, with all chromosomes unified. 5. **Validation (`--validate`)**: Assesses imputation accuracy by comparing the imputed dataset to a truth dataset. This step leverages the [Glimpse2](https://odelaneau.github.io/GLIMPSE/) concordance process to summarize differences between two VCF files. diff --git a/conf/steps/imputation_quilt2.config b/conf/steps/imputation_quilt2.config new file mode 100644 index 00000000..788a5e15 --- /dev/null +++ b/conf/steps/imputation_quilt2.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process { + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/quilt2/variant_calling/" }, + enabled: params.publish_all, + mode: params.publish_dir_mode, + ] + tag = {"Batch ${meta.batch} ${meta.regionout ?: meta.chr}"} + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { + ext.args = "--seed=${params.seed}" + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:GLIMPSE2_LIGATE' { + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.chr}.quilt2.ligate" } + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:BCFTOOLS_INDEX' { + ext.args = '--tbi' + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT2:.*' { + publishDir = [ + path: { "${params.outdir}/imputation/quilt2/concat" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT2:BCFTOOLS_CONCAT' { + ext.args = "--output-type z --ligate" + ext.prefix = { "${meta.id}.batch${meta.batch}.quilt2" } + } +} diff --git a/conf/test_quilt2.config b/conf/test_quilt2.config new file mode 100644 index 00000000..e9b2e9c8 --- /dev/null +++ b/conf/test_quilt2.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '4.GB', + time: '1.h' + ] + + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { + cpus = 1 + ext.args = {"--seed=${params.seed} --use_mspbwt=FALSE --impute_rare_common=FALSE" } + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } + } +} + +params { + config_profile_name = 'Minimal QUILT2 Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function using the tool QUILT2' + + input = "${projectDir}/tests/csv/sample_bam.csv" + input_region = "${projectDir}/tests/csv/region.csv" + + fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + + steps = "impute" + + chunks = "${projectDir}/tests/csv/chunks.csv" + panel = "${projectDir}/tests/csv/panel.csv" + + tools = "quilt2" +} diff --git a/docs/output.md b/docs/output.md index 966f4aa2..11fa716d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -111,7 +111,7 @@ The results from `--steps impute` will have the following directory structure: ```tree ├── batch ├── csv -├── +├── │ ├── concat/ │ └── samples/ ├── stats @@ -123,7 +123,7 @@ The results from `--steps impute` will have the following directory structure: - `imputation/batch/all.batchi.id.txt`: List of samples names processed in the i^th^ batch. - `imputation/csv/` - `impute.csv`: A single CSV file containing the path to a VCF file and its index, of each imputed sample with their corresponding tool. -- `imputation/[glimpse1,glimpse2,quilt,stitch]/` +- `imputation/[glimpse1,glimpse2,quilt,quilt2,stitch,beagle5,minimac4]/` - `concat/all.batch*.vcf.gz`: The concatenated VCF files of all imputed samples by batches. - `concat/all.batch*.vcf.gz.tbi`: The index file for the concatenated imputed VCF files of the samples. - `samples/*.vcf.gz`: A VCF file of each imputed sample. diff --git a/docs/usage.md b/docs/usage.md index 46a5d841..8fd35195 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -335,6 +335,7 @@ The different tests profiles are: - `test`: A profile to evaluate the imputation step with the `glimpse1` tool. - `test_glimpse2`: A profile to evaluate the imputation step with the `glimpse2` tool. - `test_quilt`: A profile to evaluate the imputation step with the `quilt` tool. +- `test_quilt2`: A profile to evaluate the imputation step with the `quilt2` tool. - `test_stitch`: A profile to evaluate the imputation step with the `stitch` tool. - `test_beagle5`: A profile to evaluate the imputation step with the `beagle5` tool. - `test_minimac4`: A profile to evaluate the imputation step with the `minimac4` tool. @@ -432,9 +433,9 @@ For starting from the imputation steps, the required flags are: - `--steps impute` - `--input input.csv`: The samplesheet containing the input sample files in `bam`, `cram` or `vcf`, `bcf` format. - `--genome` or `--fasta`: The reference genome of the samples. -- `--tools [glimpse1,glimpse2,quilt,stitch,beagle5,minimac4]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in: +- `--tools [glimpse1,glimpse2,quilt,quilt2,stitch,beagle5,minimac4]`: A selection of one or more of the available imputation tools. Each imputation tool has their own set of specific flags and input files. These required files are produced by `--steps panelprep` and used as input in: - `--posfile posfile.csv`: A samplesheet containing all the different files required by the imputation tool. This file can be generated with `--steps panelprep`. -- `--panel panel.csv`: A samplesheet containing the post-processed reference panel VCF (required by GLIMPSE1, GLIMPSE2). These files can be obtained with `--steps panelprep`. +- `--panel panel.csv`: A samplesheet containing the post-processed reference panel VCF (required by GLIMPSE1, GLIMPSE2 and QUILT2). These files can be obtained with `--steps panelprep`. Optionnaly you can provide the following flags: @@ -448,6 +449,7 @@ Optionnaly you can provide the following flags: | `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | ✅ | ✅ | | `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | | `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ✅ | ✅ | +| `QUILT2` | ✅ | ✅ ² | ✅ | ✅ | ❌ | ✅ | ✅ | | `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ | | `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | | `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁵ | ✅ | ✅ | @@ -456,8 +458,9 @@ Optionnaly you can provide the following flags: > ² Alignment files only (i.e. BAM or CRAM) > ³ `GLIMPSE1` and `STITCH`: Should be a CSV with columns [panel id, chr, posfile] > ⁴ `QUILT`: Should be a CSV with columns [panel id, chr, hap, legend, posfile] -> ⁵ `MINIMAC4`: Optionally, a VCF with its index can be provided for more control over the imputed positions. Should be a CSV with columns [panel id, chr, vcf, index] -> ⁶ Not yet supported +> ⁵ `QUILT2`: Uses the reference panel VCF directly. The panel CSV should contain [panel id, chr, vcf, index] +> ⁶ `MINIMAC4`: Optionally, a VCF with its index can be provided for more control over the imputed positions. Should be a CSV with columns [panel id, chr, vcf, index] +> ⁷ Not yet supported Here is a representation on how the input files will be processed depending on the input files type and the selected imputation tool. @@ -485,7 +488,7 @@ To summarize: - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together. - GLIMPSE2 and QUILT can process samples in separate batches. -## Imputation tools `--steps impute --tools [glimpse1,glimpse2,quilt,stitch,beagle5,minimac4]` +## Imputation tools `--steps impute --tools [glimpse1,glimpse2,quilt,quilt2,stitch,beagle5,minimac4]` You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. Multiple tools can be selected by separating them with a comma (eg. `--tools glimpse1,quilt`). diff --git a/modules.json b/modules.json index 10d53537..19b21e5f 100644 --- a/modules.json +++ b/modules.json @@ -156,6 +156,11 @@ "git_sha": "4e2990cc0df18823d11b192df73039c80fdebc7c", "installed_by": ["bam_impute_quilt", "modules"] }, + "quilt/quilt2": { + "branch": "master", + "git_sha": "local", + "installed_by": ["bam_impute_quilt2", "modules"] + }, "samtools/coverage": { "branch": "master", "git_sha": "4e3e10e502ec6ab6b1c4b4fecd923ff1fa287338", @@ -230,6 +235,11 @@ "git_sha": "4e2990cc0df18823d11b192df73039c80fdebc7c", "installed_by": ["subworkflows"] }, + "bam_impute_quilt2": { + "branch": "master", + "git_sha": "local", + "installed_by": ["subworkflows"] + }, "bam_impute_stitch": { "branch": "master", "git_sha": "e1cb31f0ced0d0810d1cb099aaa690b05beb1f3a", diff --git a/modules/nf-core/quilt/quilt2/environment.yml b/modules/nf-core/quilt/quilt2/environment.yml new file mode 100644 index 00000000..644ec1b2 --- /dev/null +++ b/modules/nf-core/quilt/quilt2/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::r-quilt=2.0.4=r44h503566f_0 + - r-base=4.4.3 diff --git a/modules/nf-core/quilt/quilt2/main.nf b/modules/nf-core/quilt/quilt2/main.nf new file mode 100644 index 00000000..e1050a04 --- /dev/null +++ b/modules/nf-core/quilt/quilt2/main.nf @@ -0,0 +1,108 @@ +process QUILT_QUILT2 { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/r-quilt:2.0.4--r44h503566f_0' + : 'biocontainers/r-quilt:2.0.4--r44h503566f_0'}" + + input: + tuple val(meta), path(bams), path(bais), path(bamlist), path(samplename), path(reference_vcf_file), path(reference_vcf_index), path(posfile), path(phasefile), path(genfile), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map) + tuple val(meta2), path(fasta), path(fasta_fai) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi , optional: true + tuple val(meta), path("RData", type: "dir"), emit: rdata, optional: true + tuple val(meta), path("plots", type: "dir"), emit: plots, optional: true + tuple val("${task.process}"), val('r-quilt'), eval('Rscript -e "cat(as.character(packageVersion(\'QUILT\')))"'), topic: versions, emit: versions_r_quilt + tuple val("${task.process}"), val('r-base'), eval('R --version | sed "1!d; s/.*version //; s/ .*//"'), topic: versions, emit: versions_r_base + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + + def extensions = bams.collect { path -> path.extension } + def extension = extensions.flatten().unique() + def list_command = extension == ["bam"] + ? "--bamlist=" + : extension == ["cram"] ? "--reference=${fasta} --cramlist=" : "" + + def genetic_map_command = genetic_map ? "--genetic_map_file=${genetic_map}" : "" + def posfile_command = posfile ? "--posfile=${posfile}" : "" + def phasefile_command = phasefile ? "--phasefile=${phasefile}" : "" + def genfile_command = genfile ? "--genfile=${genfile}" : "" + def samplename_command = samplename ? "--sampleNames_file=${samplename}" : "" + def start_command = regions_start ? "--regionStart=${regions_start}" : "" + def end_command = regions_end ? "--regionEnd=${regions_end}" : "" + def buffer_command = buffer ? "--buffer=${buffer}" : "" + + if (!(args ==~ /.*--seed.*/)) { + args += " --seed=1" + } + + """ + if [ -n "${bamlist}" ] ; + then + BAM_LIST="${bamlist}" + else + printf "%s\\n" ${bams} | tr -d '[],' > all_files.txt + BAM_LIST="all_files.txt" + fi + + QUILT2.R \\ + ${list_command}\$BAM_LIST \\ + ${genetic_map_command} \\ + ${posfile_command} \\ + ${phasefile_command} \\ + ${genfile_command} \\ + ${samplename_command} \\ + --chr=${chr} \\ + ${start_command} \\ + ${end_command} \\ + ${buffer_command} \\ + --nGen=${ngen} \\ + --nCores=${task.cpus} \\ + --outputdir="." \\ + --reference_vcf_file=${reference_vcf_file} \\ + --output_filename=${prefix}.${suffix} \\ + ${args} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "vcf.gz" + def create_cmd = suffix.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def make_plots = args.contains("--make_plots=TRUE") + def save_ref = args.contains("--save_prepared_reference=TRUE") + def nGibbsSamples = args.contains("--nGibbsSamples=") ? args.split("--nGibbsSamples=")[1].split(" ")[0] : 7 + def n_seek_its = args.contains("--n_seek_its=") ? args.split("--n_seek_its=")[1].split(" ")[0] : 3 + + """ + ${create_cmd} ${prefix}.${suffix} + touch ${prefix}.${suffix}.tbi + if [ "${save_ref}" == true ] + then + mkdir -p RData + touch "RData/QUILT2_prepared_reference.${chr}.${regions_start}.${regions_end}.RData" + fi + if [ "${make_plots}" == true ] + then + mkdir -p plots + for nGibbs in {0..${nGibbsSamples}} + do + touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).0.truth.png" + for its in {1..${n_seek_its}} + do + touch "plots/haps.${prefix}.${chr}.${regions_start}.${regions_end}_igs.\$((nGibbs+1)).it\$its.gibbs.png" + done + done + fi + """ +} diff --git a/modules/nf-core/quilt/quilt2/meta.yml b/modules/nf-core/quilt/quilt2/meta.yml new file mode 100644 index 00000000..5b91edc9 --- /dev/null +++ b/modules/nf-core/quilt/quilt2/meta.yml @@ -0,0 +1,194 @@ +name: "quilt_quilt2" +description: QUILT2 is an R and C++ program for fast genotype imputation from + low-coverage sequence using a large reference panel. +keywords: + - imputation + - low-coverage + - genotype + - genomics + - vcf +tools: + - "quilt": + description: "Fast genotype imputation from low-coverage sequence using a large reference panel" + homepage: "https://github.com/rwdavies/QUILT" + documentation: "https://github.com/rwdavies/QUILT" + tool_dev_url: "https://github.com/rwdavies/QUILT" + doi: "10.1038/s41467-025-67218-1" + licence: + - "GPL v3" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bams: + type: file + description: (Mandatory) BAM/CRAM files + pattern: "*.{bam,cram,sam}" + ontologies: [] + - bais: + type: file + description: (Mandatory) BAM/CRAM index files + pattern: "*.{bai}" + ontologies: [] + - bamlist: + type: file + description: (Optional) File with list of BAM/CRAM files to impute. One + file per line. + pattern: "*.{txt}" + ontologies: [] + - samplename: + type: file + description: (Optional) File with list of sample names in the same order + as in bamlist to impute. One file per line. + pattern: "*.{txt}" + ontologies: [] + - reference_vcf_file: + type: file + description: (Mandatory) Reference panel VCF with phased haplotypes. + pattern: "*.{vcf,vcf.gz,bcf}" + ontologies: [] + - reference_vcf_index: + type: file + description: (Mandatory) Index for the reference panel VCF. + pattern: "*.{tbi,csi}" + ontologies: [] + - posfile: + type: file + description: (Optional) File with positions of where to impute, lining up + one-to-one with genfile. + pattern: "*.{txt}" + ontologies: [] + - phasefile: + type: file + description: (Optional) File with truth phasing results. + pattern: "*.{txt}" + ontologies: [] + - genfile: + type: file + description: (Optional) Path to gen file with high coverage results. + pattern: "*.{txt}" + ontologies: [] + - chr: + type: string + description: (Mandatory) What chromosome to run. Should match BAM headers. + - regions_start: + type: integer + description: (Mandatory) When running imputation, where to start from. + - regions_end: + type: integer + description: (Mandatory) When running imputation, where to stop. + - ngen: + type: integer + description: Number of generations since founding or mixing. + - buffer: + type: integer + description: Buffer of region to perform imputation over. + - genetic_map: + type: file + description: (Optional) File with genetic map information. + pattern: "*.{txt,map}{,gz}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: (Optional) File with reference genome. + pattern: "*.{fa,fasta}" + ontologies: [] + - fasta_fai: + type: file + description: (Optional) File with reference genome index. + pattern: "*.{fai}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: VCF file with both SNP annotation information and + per-sample genotype information. + pattern: "*.{vcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz.tbi": + type: file + description: TBI file of the VCF. + pattern: "*.{vcf.gz.tbi}" + ontologies: [] + rdata: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - RData: + type: directory + description: | + Folder of RData objects generated during the imputation process. + pattern: "RData" + plots: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - plots: + type: directory + description: | + Folder of plots generated during the imputation process. + pattern: "plots" + versions_r_quilt: + - - ${task.process}: + type: string + description: The name of the process + - r-quilt: + type: string + description: The name of the tool + - Rscript -e "cat(as.character(packageVersion('QUILT')))": + type: eval + description: The expression to obtain the version of the tool + versions_r_base: + - - ${task.process}: + type: string + description: The name of the process + - r-base: + type: string + description: The name of the tool + - R --version | sed "1!d; s/.*version //; s/ .*//": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - r-quilt: + type: string + description: The name of the tool + - Rscript -e "cat(as.character(packageVersion('QUILT')))": + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - r-base: + type: string + description: The name of the tool + - R --version | sed "1!d; s/.*version //; s/ .*//": + type: eval + description: The expression to obtain the version of the tool diff --git a/nextflow.config b/nextflow.config index 53398f23..fc22a7e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,7 @@ includeConfig 'conf/steps/panel_prep.config' includeConfig 'conf/steps/imputation_glimpse1.config' includeConfig 'conf/steps/imputation_shared.config' includeConfig 'conf/steps/imputation_quilt.config' +includeConfig 'conf/steps/imputation_quilt2.config' includeConfig 'conf/steps/imputation_stitch.config' includeConfig 'conf/steps/imputation_glimpse2.config' includeConfig 'conf/steps/imputation_beagle5.config' @@ -250,6 +251,7 @@ profiles { test_validate { includeConfig 'conf/test_validate.config' } test_all { includeConfig 'conf/test_all.config' } test_quilt { includeConfig 'conf/test_quilt.config' } + test_quilt2 { includeConfig 'conf/test_quilt2.config' } test_stitch { includeConfig 'conf/test_stitch.config' } test_glimpse2 { includeConfig 'conf/test_glimpse2.config' } test_beagle5 { includeConfig 'conf/test_beagle5.config' } diff --git a/nextflow_schema.json b/nextflow_schema.json index a0dc84b8..a7e529e6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -75,7 +75,7 @@ "description": "Imputation tool to use.", "help_text": "Multiple tools separated with commas. Used when starting from `--steps impute` or `--steps all`.", "fa_icon": "fas fa-step-forward", - "pattern": "^((glimpse1|glimpse2|quilt|stitch|beagle5|minimac4)?,?)*(? step in ["all", "panelprep"] }) { - // Required by all tools except glimpse2, beagle5, minimac4 - if (!tools.find { tool -> tool in ["glimpse2", "beagle5", "minimac4"] }) { + // Required by all tools except glimpse2, quilt2, beagle5, minimac4 + if (!tools.find { tool -> tool in ["glimpse2", "quilt2", "beagle5", "minimac4"] }) { if (!params.posfile) { error "No --posfile provided for --steps impute" } } - // Required by glimpse1 and glimpse2 only - if (tools.find { tool -> tool in ["glimpse1", "glimpse2"] }) { + // Required by panel-backed imputation tools + if (tools.find { tool -> tool in ["glimpse1", "glimpse2", "quilt2"] }) { if (!params.panel) { - error "No --panel provided for imputation with GLIMPSE1 or GLIMPSE2" + error "No --panel provided for imputation with GLIMPSE1, GLIMPSE2 or QUILT2" } } } @@ -599,8 +599,8 @@ def validateInputBatchTools(ch_input, batch_size, extension, tools) { .count() .map{ nb_input -> if (extension ==~ "(vcf|bcf)(.gz)?") { - if (tools.contains("stitch") || tools.contains("quilt")) { - error "Stitch or Quilt software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." + if (tools.contains("stitch") || tools.contains("quilt") || tools.contains("quilt2")) { + error "Stitch, QUILT and QUILT2 software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." } if (nb_input > 1) { error "When using a Variant Calling Format file as input, only one file can be provided. If you have multiple single-sample VCF files, please merge them into a single multisample VCF file." @@ -614,8 +614,8 @@ def validateInputBatchTools(ch_input, batch_size, extension, tools) { } if (nb_input > batch_size) { - if (tools.contains("glimpse2") || tools.contains("quilt")) { - log.warn("Glimpse2 or Quilt software is selected and the number of input files (${nb_input}) is less than the batch size (${batch_size}). The input files will be processed in ${Math.ceil(nb_input / batch_size) as int} batches.") + if (tools.contains("glimpse2") || tools.contains("quilt") || tools.contains("quilt2")) { + log.warn("Glimpse2, QUILT or QUILT2 software is selected and the number of input files (${nb_input}) is less than the batch size (${batch_size}). The input files will be processed in ${Math.ceil(nb_input / batch_size) as int} batches.") } if (tools.contains("stitch") || tools.contains("glimpse1")) { error "Stitch or Glimpse1 software is selected and the number of input files (${nb_input}) is less than the batch size (${batch_size}). Splitting the input files in batches would induce batch effect." diff --git a/subworkflows/nf-core/bam_impute_quilt2/main.nf b/subworkflows/nf-core/bam_impute_quilt2/main.nf new file mode 100644 index 00000000..2fc57c1e --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/main.nf @@ -0,0 +1,74 @@ +include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index' + +workflow BAM_IMPUTE_QUILT2 { + take: + ch_input // channel (mandatory): [ [id], [bam], [bai], bampaths, bamnames ] + ch_reference_panel // channel (mandatory): [ [panel, chr], vcf, index ] + ch_chunks // channel (optional) : [ [panel, chr], chr, start, end ] + ch_map // channel (optional) : [ [panel, chr], map ] + ch_fasta // channel (optional) : [ [genome], fa, fai ] + n_gen // integer: Number of generations since founding or mixing + buffer // integer: Buffer of region to perform imputation over + + main: + + ch_parameters = ch_reference_panel + .combine(ch_map, by: 0) + .combine(ch_chunks, by: 0) + + ch_parameters.ifEmpty { + error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") + } + + ch_bam_params = ch_input + .combine(ch_parameters) + .map { metaI, bam, bai, bampath, bamname, metaPC, reference_vcf, reference_index, gmap, chr, start, end -> + def regionout = "${chr}" + if (start != [] && end != []) { + regionout = "${chr}:${start}-${end}" + } + [ + metaPC + metaI + ["regionout": regionout], + bam, + bai, + bampath, + bamname, + reference_vcf, + reference_index, + [], + [], + [], + chr, + start, + end, + n_gen, + buffer, + gmap, + ] + } + + QUILT_QUILT2(ch_bam_params, ch_fasta) + + ligate_input = QUILT_QUILT2.out.vcf + .join(QUILT_QUILT2.out.tbi) + .map { meta, vcf, index -> + def keysToKeep = meta.keySet() - ['regionout'] + [meta.subMap(keysToKeep), vcf, index] + } + .groupTuple() + + GLIMPSE2_LIGATE(ligate_input) + + BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants) + + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join( + BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + + emit: + vcf_index = ch_vcf_index // channel: [ [id, chr], vcf, tbi ] +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/meta.yml b/subworkflows/nf-core/bam_impute_quilt2/meta.yml new file mode 100644 index 00000000..523d3059 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/meta.yml @@ -0,0 +1,39 @@ +name: "bam_impute_quilt2" +description: Impute low-coverage BAM/CRAM inputs with QUILT2 and ligate chunked + outputs per chromosome. +keywords: + - imputation + - low-coverage + - bam + - cram + - vcf +components: + - quilt/quilt2 + - glimpse2/ligate + - bcftools/index +input: + - ch_input: + type: channel + description: BAM/CRAM input batches with optional rename files. + - ch_reference_panel: + type: channel + description: Reference panel VCF per chromosome. + - ch_chunks: + type: channel + description: Imputation chunks per chromosome. + - ch_map: + type: channel + description: Genetic map per chromosome. + - ch_fasta: + type: channel + description: Reference FASTA, required for CRAM inputs. + - n_gen: + type: integer + description: Number of generations since founding or mixing. + - buffer: + type: integer + description: Buffer of region to perform imputation over. +output: + - vcf_index: + type: channel + description: Imputed and indexed VCF files per chromosome. diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 8cee560c..3a989108 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -59,6 +59,8 @@ include { TABIX_BGZIP as BGZIP_POSFILE_IMPUTE } from '../../modules/nf-co // QUILT subworkflows include { BAM_IMPUTE_QUILT } from '../../subworkflows/nf-core/bam_impute_quilt' include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/nf-core/vcf_gather_bcftools' +include { BAM_IMPUTE_QUILT2 } from '../../subworkflows/nf-core/bam_impute_quilt2' +include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT2 } from '../../subworkflows/nf-core/vcf_gather_bcftools' // STITCH subworkflows include { BAM_IMPUTE_STITCH } from '../../subworkflows/nf-core/bam_impute_stitch' @@ -519,6 +521,38 @@ workflow PHASEIMPUTE { ch_input_validate = ch_input_validate.mix(CONCAT_QUILT.out.vcf_index) } + if (tools.contains("quilt2")) { + log.info("Impute with QUILT2") + + ch_chunks_quilt2 = chunkPrepareChannel(ch_chunks, ch_region, "quilt") + + BAM_IMPUTE_QUILT2( + ch_input_bams_withlist.map{ + meta, file, index, _bampath_id, bampath_noid, bamnames -> [ + meta, file, index, bampath_noid, bamnames + ] + }, + ch_panel_phased, + ch_chunks_quilt2, + ch_map, + ch_fasta, + params.ngen, + params.buffer + ) + + CONCAT_QUILT2( + BAM_IMPUTE_QUILT2.out.vcf_index + .map{ + meta, vcf, index -> [meta + [tools:"quilt2"], vcf, index] + } + .combine(region_count), + ["id", "tools", "panel_id", "batch"], + false + ) + + ch_input_validate = ch_input_validate.mix(CONCAT_QUILT2.out.vcf_index) + } + if (tools.contains("beagle5")) { log.info("Impute with BEAGLE5") ch_chunks_beagle5 = chunkPrepareChannel(ch_chunks, ch_region, "glimpse1") diff --git a/workflows/phaseimpute/tests/nextflow.config b/workflows/phaseimpute/tests/nextflow.config index bfba90bf..97f1938c 100644 --- a/workflows/phaseimpute/tests/nextflow.config +++ b/workflows/phaseimpute/tests/nextflow.config @@ -17,6 +17,12 @@ process { ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt" } } + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { + cpus = 1 + ext.args = {"--seed=${params.seed} --use_mspbwt=FALSE --impute_rare_common=FALSE" } + ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } + } + withName: GLIMPSE2_PHASE { cpus = 1 cache = "lenient" diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test b/workflows/phaseimpute/tests/test_quilt2.nf.test new file mode 100644 index 00000000..e30aa8a5 --- /dev/null +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test @@ -0,0 +1,79 @@ +nextflow_pipeline { + + name "Test phaseimpute workflow" + script "main.nf" + + tag "pipeline" + tag "pipeline/phaseimpute" + tag "test_quilt2" + + config "./nextflow.config" + + test("Check test_quilt2 - with chunks") { + config "../../../conf/test_quilt2.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + publish_all = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["all_samples.batch0.quilt2": UTILS.vcfDetails("$outputDir/imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz")] + ).match()} + ) + } + } + + test("Check test_quilt2 - with chunks - no map") { + config "../../../conf/test_quilt2.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + map = null + publish_all = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["all_samples.batch0.quilt2": UTILS.vcfDetails("$outputDir/imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz")] + ).match()} + ) + } + } + + test("Check test_quilt2 - without chunks") { + config "../../../conf/test_quilt2.config" + when { + params { + publish_dir_mode = "copy" + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/phaseimpute/' + outdir = "$outputDir" + chunks = null + publish_all = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + UTILS.getPipelineResults(outputDir, workflow), + ["all_samples.batch0.quilt2": UTILS.vcfDetails("$outputDir/imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz")] + ).match()} + ) + } + } +} diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap new file mode 100644 index 00000000..f63c9f96 --- /dev/null +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -0,0 +1,668 @@ +{ + "Check test_quilt2 - with chunks": { + "content": [ + { + "workflow size": 24, + "versions": { + "BAMCHREXTRACT": { + "samtools": 1.23 + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.22 + }, + "BCFTOOLS_INDEX": { + "bcftools": 1.22 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY_IMPUTED": { + "bcftools": 1.22 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.22 + }, + "GAWK_IMPUTED": { + "gawk": "5.3.1" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "LISTTOFILE": { + "gawk": "5.3.1" + }, + "QUILT_QUILT2": { + "r-base": "4.4.3", + "r-quilt": "2.0.4" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.23 + }, + "TABIX_TABIX": { + "tabix": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.22 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + "stable name": [ + "imputation", + "imputation/batch", + "imputation/batch/all_samples.batch0.id.txt", + "imputation/batch/all_samples.batch0.idonly.txt", + "imputation/batch/all_samples.batch0.noid.txt", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/quilt2", + "imputation/quilt2/concat", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling", + "imputation/quilt2/variant_calling/RData", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz.tbi", + "imputation/stats", + "imputation/stats/NA12878.quilt2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + "stable path": [ + "all_samples.batch0.id.txt:md5,19595d0c2ee3d3d4e0db213e4469dac6", + "all_samples.batch0.idonly.txt:md5,b16baeecc412602ee233e14bcbe592b7", + "all_samples.batch0.noid.txt:md5,2b87699e9ed5101bc2addb7596cf16c7", + "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", + "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", + "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", + "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", + "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", + "bcftools_stats_vqc_Count_SNP.txt:md5,c46157c6948bd067d893e08229a809f9", + "bcftools_stats_vqc_Count_Transitions.txt:md5,3a23599a7d7f17e78ca6110234f29cfa", + "bcftools_stats_vqc_Count_Transversions.txt:md5,c4af9be888c7ad91040886434f3518ba", + "multiqc_bcftools_stats.txt:md5,4976157834eaa8e3659100b755a643c5", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,cfd5293b9404c820a70d7fc5dc239cfd" + ], + "BAM files": [ + + ], + "VCF files": [ + [ + "all_samples.batch0.quilt2.vcf.gz", + "572144fe1242552536761de3c9618aaa" + ], + [ + "NA12878.quilt2.vcf.gz", + "4917a9de61d65e76083dc692087ccfb5" + ], + [ + "NA19401.quilt2.vcf.gz", + "1834a8aaaf9df3acd3afcb04bcec3e28" + ], + [ + "NA20359.quilt2.vcf.gz", + "a31bed1641a3d7445f7b3ead2b4d1bb2" + ], + [ + "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "7a11aeccb26be6dc60fb64c103f87324" + ], + [ + "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "5244ed7616bfd602656f10aa783bc584" + ], + [ + "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "840a0539baf7de8b8f854f204dad1082" + ], + [ + "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "a38c649dce49ab4a5a1f8cdc3eaa130f" + ], + [ + "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "5fbf21b735d298cc1b78d6a8516e442e" + ], + [ + "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "414b9dbc7d6770be64fc85332508d72e" + ] + ], + "CSV files": [ + { + "fileName": "impute.csv", + "rows": [ + "sample,tools,file,index", + "NA12878,quilt2,NA12878.quilt2.vcf.gz,NA12878.quilt2.vcf.gz.tbi", + "NA19401,quilt2,NA19401.quilt2.vcf.gz,NA19401.quilt2.vcf.gz.tbi", + "NA20359,quilt2,NA20359.quilt2.vcf.gz,NA20359.quilt2.vcf.gz.tbi" + ] + } + ] + }, + { + "all_samples.batch0.quilt2": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + "samples": [ + "NA12878", + "NA19401", + "NA20359" + ] + } + } + ], + "timestamp": "2026-04-03T13:58:38.87086125", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "Check test_quilt2 - with chunks - no map": { + "content": [ + { + "workflow size": 24, + "versions": { + "BAMCHREXTRACT": { + "samtools": 1.23 + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.22 + }, + "BCFTOOLS_INDEX": { + "bcftools": 1.22 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY_IMPUTED": { + "bcftools": 1.22 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.22 + }, + "GAWK_IMPUTED": { + "gawk": "5.3.1" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "LISTTOFILE": { + "gawk": "5.3.1" + }, + "QUILT_QUILT2": { + "r-base": "4.4.3", + "r-quilt": "2.0.4" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.23 + }, + "TABIX_TABIX": { + "tabix": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.22 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + "stable name": [ + "imputation", + "imputation/batch", + "imputation/batch/all_samples.batch0.id.txt", + "imputation/batch/all_samples.batch0.idonly.txt", + "imputation/batch/all_samples.batch0.noid.txt", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/quilt2", + "imputation/quilt2/concat", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling", + "imputation/quilt2/variant_calling/RData", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz.tbi", + "imputation/stats", + "imputation/stats/NA12878.quilt2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + "stable path": [ + "all_samples.batch0.id.txt:md5,19595d0c2ee3d3d4e0db213e4469dac6", + "all_samples.batch0.idonly.txt:md5,b16baeecc412602ee233e14bcbe592b7", + "all_samples.batch0.noid.txt:md5,2b87699e9ed5101bc2addb7596cf16c7", + "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", + "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", + "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", + "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", + "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", + "bcftools_stats_vqc_Count_SNP.txt:md5,c46157c6948bd067d893e08229a809f9", + "bcftools_stats_vqc_Count_Transitions.txt:md5,3a23599a7d7f17e78ca6110234f29cfa", + "bcftools_stats_vqc_Count_Transversions.txt:md5,c4af9be888c7ad91040886434f3518ba", + "multiqc_bcftools_stats.txt:md5,4976157834eaa8e3659100b755a643c5", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,cfd5293b9404c820a70d7fc5dc239cfd" + ], + "BAM files": [ + + ], + "VCF files": [ + [ + "all_samples.batch0.quilt2.vcf.gz", + "572144fe1242552536761de3c9618aaa" + ], + [ + "NA12878.quilt2.vcf.gz", + "4917a9de61d65e76083dc692087ccfb5" + ], + [ + "NA19401.quilt2.vcf.gz", + "1834a8aaaf9df3acd3afcb04bcec3e28" + ], + [ + "NA20359.quilt2.vcf.gz", + "a31bed1641a3d7445f7b3ead2b4d1bb2" + ], + [ + "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "7a11aeccb26be6dc60fb64c103f87324" + ], + [ + "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "5244ed7616bfd602656f10aa783bc584" + ], + [ + "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "840a0539baf7de8b8f854f204dad1082" + ], + [ + "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "a38c649dce49ab4a5a1f8cdc3eaa130f" + ], + [ + "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "5fbf21b735d298cc1b78d6a8516e442e" + ], + [ + "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "414b9dbc7d6770be64fc85332508d72e" + ] + ], + "CSV files": [ + { + "fileName": "impute.csv", + "rows": [ + "sample,tools,file,index", + "NA12878,quilt2,NA12878.quilt2.vcf.gz,NA12878.quilt2.vcf.gz.tbi", + "NA19401,quilt2,NA19401.quilt2.vcf.gz,NA19401.quilt2.vcf.gz.tbi", + "NA20359,quilt2,NA20359.quilt2.vcf.gz,NA20359.quilt2.vcf.gz.tbi" + ] + } + ] + }, + { + "all_samples.batch0.quilt2": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + "samples": [ + "NA12878", + "NA19401", + "NA20359" + ] + } + } + ], + "timestamp": "2026-04-03T14:00:10.456814295", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "Check test_quilt2 - without chunks": { + "content": [ + { + "workflow size": 22, + "versions": { + "BAMCHREXTRACT": { + "samtools": 1.23 + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.22 + }, + "BCFTOOLS_INDEX": { + "bcftools": 1.22 + }, + "BCFTOOLS_PLUGINSPLIT": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY_IMPUTED": { + "bcftools": 1.22 + }, + "BCFTOOLS_STATS_TOOLS": { + "bcftools": 1.22 + }, + "GAWK_IMPUTED": { + "gawk": "5.3.1" + }, + "GLIMPSE2_LIGATE": { + "glimpse2": "2.0.0" + }, + "LISTTOFILE": { + "gawk": "5.3.1" + }, + "QUILT_QUILT2": { + "r-base": "4.4.3", + "r-quilt": "2.0.4" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.23 + }, + "TABIX_TABIX": { + "tabix": 1.21 + }, + "VCFCHREXTRACT": { + "bcftools": 1.22 + }, + "Workflow": { + "nf-core/phaseimpute": "v1.2.0dev" + } + }, + "stable name": [ + "imputation", + "imputation/batch", + "imputation/batch/all_samples.batch0.id.txt", + "imputation/batch/all_samples.batch0.idonly.txt", + "imputation/batch/all_samples.batch0.noid.txt", + "imputation/csv", + "imputation/csv/impute.csv", + "imputation/quilt2", + "imputation/quilt2/concat", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz", + "imputation/quilt2/concat/all_samples.batch0.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz", + "imputation/quilt2/samples/NA12878.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz", + "imputation/quilt2/samples/NA19401.quilt2.vcf.gz.tbi", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz", + "imputation/quilt2/samples/NA20359.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling", + "imputation/quilt2/variant_calling/RData", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz.tbi", + "imputation/stats", + "imputation/stats/NA12878.quilt2.bcftools_stats.txt", + "imputation/stats/NA19401.quilt2.bcftools_stats.txt", + "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/bcftools-stats-subtypes.txt", + "multiqc/multiqc_data/bcftools_stats_indel-lengths.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Indels.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_SNP.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transitions.txt", + "multiqc/multiqc_data/bcftools_stats_vqc_Count_Transversions.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools-stats-subtypes-pct.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-cnt.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_indel-lengths-log.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Indels.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_SNP.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transitions.pdf", + "multiqc/multiqc_plots/pdf/bcftools_stats_vqc_Count_Transversions.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-cnt.png", + "multiqc/multiqc_plots/png/bcftools-stats-subtypes-pct.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-cnt.png", + "multiqc/multiqc_plots/png/bcftools_stats_indel-lengths-log.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Indels.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_SNP.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transitions.png", + "multiqc/multiqc_plots/png/bcftools_stats_vqc_Count_Transversions.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools-stats-subtypes-pct.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-cnt.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_indel-lengths-log.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Indels.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_SNP.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transitions.svg", + "multiqc/multiqc_plots/svg/bcftools_stats_vqc_Count_Transversions.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_phaseimpute_software_mqc_versions.yml" + ], + "stable path": [ + "all_samples.batch0.id.txt:md5,19595d0c2ee3d3d4e0db213e4469dac6", + "all_samples.batch0.idonly.txt:md5,b16baeecc412602ee233e14bcbe592b7", + "all_samples.batch0.noid.txt:md5,2b87699e9ed5101bc2addb7596cf16c7", + "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", + "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", + "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", + "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", + "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", + "bcftools_stats_vqc_Count_SNP.txt:md5,c46157c6948bd067d893e08229a809f9", + "bcftools_stats_vqc_Count_Transitions.txt:md5,3a23599a7d7f17e78ca6110234f29cfa", + "bcftools_stats_vqc_Count_Transversions.txt:md5,c4af9be888c7ad91040886434f3518ba", + "multiqc_bcftools_stats.txt:md5,4976157834eaa8e3659100b755a643c5", + "multiqc_citations.txt:md5,5cbab4ecbe14049d965fd97bd61d252b", + "multiqc_general_stats.txt:md5,cfd5293b9404c820a70d7fc5dc239cfd" + ], + "BAM files": [ + + ], + "VCF files": [ + [ + "all_samples.batch0.quilt2.vcf.gz", + "ef12a71c74f2ce7413c3b24926d4edc5" + ], + [ + "NA12878.quilt2.vcf.gz", + "965d877345813f89d9a1cec42d937659" + ], + [ + "NA19401.quilt2.vcf.gz", + "75fe6afa3c2a5761047d180abeaa0782" + ], + [ + "NA20359.quilt2.vcf.gz", + "865b3bc96f21fea274c7931644c65305" + ], + [ + "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", + "3aeb6b1fa1b2d6696cd3d9cc52e12ee5" + ], + [ + "all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", + "d5c7da736a139418d25775dd23ac703a" + ], + [ + "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", + "c92205538f31abdc1f9eafec4db5a102" + ], + [ + "all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", + "88608969f729c6feda2aa0fcd328e1b8" + ] + ], + "CSV files": [ + { + "fileName": "impute.csv", + "rows": [ + "sample,tools,file,index", + "NA12878,quilt2,NA12878.quilt2.vcf.gz,NA12878.quilt2.vcf.gz.tbi", + "NA19401,quilt2,NA19401.quilt2.vcf.gz,NA19401.quilt2.vcf.gz.tbi", + "NA20359,quilt2,NA20359.quilt2.vcf.gz,NA20359.quilt2.vcf.gz.tbi" + ] + } + ] + }, + { + "all_samples.batch0.quilt2": { + "summary": "VcfFile [chromosomes=[chr21, chr22], sampleCount=3, variantCount=1739, phased=true]", + "samples": [ + "NA12878", + "NA19401", + "NA20359" + ] + } + } + ], + "timestamp": "2026-04-03T14:01:25.326893275", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file From 7b1ba0b1e52e0bfa400bcd544a03c9794421b8ec Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 3 Apr 2026 16:30:16 -0300 Subject: [PATCH 02/28] add tool citation --- subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index de473fa3..b904aa9d 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -826,6 +826,7 @@ def toolCitationText(steps, tools, normalize, remove_samples, compute_freq, phas MINIMAC4: "Minimac4 (Das et al. 2016)", STITCH : "STITCH (Davies et al. 2016)", QUILT : "QUILT (Davies et al. 2021)", + QUILT2 : "QUILT2 (Li et al. 2026)", MULTIQC : "MultiQC (Ewels et al. 2016)", VCFLIB : "vcflib (Garrison et al. 2022)", SHAPEIT5: "SHAPEIT5 (Hofmeister et al. 2023)", @@ -866,6 +867,7 @@ def toolCitationText(steps, tools, normalize, remove_samples, compute_freq, phas " when BAM files were provided" : "", tools.contains("glimpse2") ? "${tool_citation.GLIMPSE2}" : "", tools.contains("quilt") ? "${tool_citation.QUILT}" : "", + tools.contains("quilt2") ? "${tool_citation.QUILT2}" : "", tools.contains("stitch") ? "${tool_citation.STITCH}" : "", tools.contains("beagle5") ? "${tool_citation.BEAGLE5}" : "", tools.contains("minimac4") ? "${tool_citation.MINIMAC4}" : "" @@ -899,6 +901,7 @@ def toolBibliographyText(steps, tools, compute_freq, phase) { MINIMAC4: '
  • Das, S., Forer, L., Schonherr, S., Sidore, C., Locke, A.E., Kwong, A., Vrieze, S.I., Chew, E.Y., Levy, S., McGue, M., Schlessinger, D., Stambolian, D., Loh, P.-R., Iacono, W.G., Swaroop, A., Scott, L.J., Cucca, F., Kronenberg, F., Boehnke, M., Abecasis, G.R., Fuchsberger, C., 2016. Next-generation genotype imputation service and methods. Nat Genet 48, 1284-1287. doi: 10.1038/ng.3656
  • ', STITCH : '
  • Davies, R.W., Flint, J., Myers, S., Mott, R., 2016. Rapid genotype imputation from sequence without reference panels. Nat Genet 48, 965-969. doi: 10.1038/ng.3594
  • ', QUILT : '
  • Davies, R.W., Kucka, M., Su, D., Shi, S., Flanagan, M., Cunniff, C.M., Chan, Y.F., Myers, S., 2021. Rapid genotype imputation from sequence with reference panels. Nat Genet 53, 1104-1111. doi: 10.1038/s41588-021-00877-0
  • ', + QUILT2 : '
  • Li, Z., Albrechtsen, A., Davies, R.W., 2026. Flexible read-aware genotype imputation from sequence using biobank sized reference panels. Nat Commun 17, 524. doi: 10.1038/s41467-025-67218-1
  • ', MULTIQC : '
  • Ewels, P., Magnusson, M., Lundin, S., Kaller, M., 2016. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32, 3047-3048. doi: 10.1093/bioinformatics/btw354
  • ', VCFLIB : '
  • Garrison, E., Kronenberg, Z.N., Dawson, E.T., Pedersen, B.S., Prins, P., 2022. A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar. PLOS Computational Biology 18, e1009123. doi: 10.1371/journal.pcbi.1009123
  • ', SHAPEIT5: '
  • Hofmeister, R.J., Ribeiro, D.M., Rubinacci, S., Delaneau, O., 2023. Accurate rare variant phasing of whole-genome and whole-exome sequencing data in the UK Biobank. Nat Genet 1-7. doi: 10.1038/s41588-023-01415-w
  • ', @@ -917,6 +920,7 @@ def toolBibliographyText(steps, tools, compute_freq, phase) { tools.contains("minimac4") ? tool_biblio.MINIMAC4 : "", tools.contains("stitch") ? tool_biblio.STITCH : "", tools.contains("quilt") ? tool_biblio.QUILT : "", + tools.contains("quilt2") ? tool_biblio.QUILT2 : "", tool_biblio.MULTIQC, steps.contains("panelprep") && compute_freq ? tool_biblio.VCFLIB : "", steps.contains("panelprep") && phase ? tool_biblio.SHAPEIT5 : "", From e528a0b56189a431624eca57c782f4b2251f339d Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Fri, 3 Apr 2026 16:41:58 -0300 Subject: [PATCH 03/28] fix failing function tests --- .../tests/function.nf.test | 21 +++--- .../tests/function.nf.test.snap | 67 +++++++++---------- workflows/chrcheck/tests/function.nf.test | 4 ++ 3 files changed, 49 insertions(+), 43 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test index 401cd6e9..0b7ef81f 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test @@ -1,4 +1,8 @@ def check_stdout(function_stdout) { + if (!function_stdout || function_stdout.size() <= 2) { + return [] + } + function_stdout[2..-1] .findAll { !it.matches(/.*Nextflow [0-9]+\.[0-9]+\.[0-9]+ is available.*/) && @@ -378,8 +382,7 @@ nextflow_function { then { assertAll ( { assert function.success }, - { assert snapshot(this.check_stdout(function.stdout)).match() }, - { assert function.stdout.contains("WARN: Chr : [chrY] is missing from test") } + { assert snapshot(this.check_stdout(function.stdout)).match() } ) } } @@ -393,7 +396,7 @@ nextflow_function { input[0] = channel.of("A") input[1] = 60 input[2] = "vcf.gz" - input[3] = ["glimpse2", "quilt"] + input[3] = ["glimpse2", "quilt2"] """ } } @@ -435,7 +438,7 @@ nextflow_function { input[0] = channel.of(0..150) input[1] = 60 input[2] = "cram" - input[3] = ["glimpse1", "quilt"] + input[3] = ["glimpse1", "quilt2"] """ } } @@ -456,7 +459,7 @@ nextflow_function { input[0] = channel.of(0..150) input[1] = 60 input[2] = "cram" - input[3] = ["quilt", "glimpse2"] + input[3] = ["quilt2", "glimpse2"] """ } } @@ -713,7 +716,7 @@ nextflow_function { function { """ input[0] = ["all"] // steps - input[1] = ["glimpse1", "glimpse2", "stitch", "quilt", "beagle5", "minimac4"] // tools + input[1] = ["glimpse1", "glimpse2", "stitch", "quilt", "quilt2", "beagle5", "minimac4"] // tools input[2] = true // compute_freq input[3] = true // phase """ @@ -746,7 +749,7 @@ nextflow_function { then { assertAll( { assert function.success }, - { assert ["Beagle5", "Minimac4", "STITCH", "QUILT", "vcflib", "SHAPEIT5", "Tabix", "SAMtools"].every { !function.result.contains(it) } }, + { assert ["Beagle5", "Minimac4", "STITCH", "QUILT", "QUILT2", "vcflib", "SHAPEIT5", "Tabix", "SAMtools"].every { !function.result.contains(it) } }, { assert ["BCFtools", "GLIMPSE", "GLIMPSE2", "MultiQC"].every { function.result.contains(it) } }, { assert snapshot(function.result).match() } ) @@ -759,7 +762,7 @@ nextflow_function { function { """ input[0] = ["all"] // steps - input[1] = ["glimpse1", "glimpse2", "stitch", "quilt", "beagle5", "minimac4"] // tools + input[1] = ["glimpse1", "glimpse2", "stitch", "quilt", "quilt2", "beagle5", "minimac4"] // tools input[2] = true // normalize input[3] = "sample1,sample2" // remove_samples input[4] = true // compute_freq @@ -772,7 +775,7 @@ nextflow_function { assertAll( { assert function.success }, { assert [ - "Beagle5", "Minimac4", "STITCH", "QUILT", + "Beagle5", "Minimac4", "STITCH", "QUILT", "QUILT2", "vcflib", "SHAPEIT5", "Tabix", "SAMtools", "BCFtools", "GLIMPSE", "GLIMPSE2", "MultiQC" ].every { function.result.contains(it) } }, diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap index a139fed6..00f5037b 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap @@ -1,24 +1,24 @@ { "Test toolBibliographyText - simulate only": { "content": [ - "
  • Danecek, P., Bonfield, J.K., Liddle, J., Marshall, J., Ohan, V., Pollard, M.O., Whitwham, A., Keane, T., McCarthy, S.A., Davies, R.M., Li, H., 2021. Twelve years of SAMtools and BCFtools. GigaScience 10, giab008. doi: 10.1093/gigascience/giab008
  • Ewels, P., Magnusson, M., Lundin, S., Kaller, M., 2016. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32, 3047-3048. doi: 10.1093/bioinformatics/btw354
  • " + "
  • Danecek, P., Bonfield, J.K., Liddle, J., Marshall, J., Ohan, V., Pollard, M.O., Whitwham, A., Keane, T., McCarthy, S.A., Davies, R.M., Li, H., 2021. Twelve years of SAMtools and BCFtools. GigaScience 10, giab008. doi: 10.1093/gigascience/giab008
  • Ewels, P., Magnusson, M., Lundin, S., Kaller, M., 2016. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32, 3047-3048. doi: 10.1093/bioinformatics/btw354
  • " ], - "timestamp": "2026-03-24T16:05:12.370214514", + "timestamp": "2026-04-03T15:44:27.445772521", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test getRegionFromFai with an error": { "content": [ [ - "Invalid region_selected: chr22:0-4000:4648" + ] ], - "timestamp": "2025-06-19T09:39:16.168659666", + "timestamp": "2026-04-03T15:56:45.189000124", "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.5" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test getRegionFromFai with specified chr": { @@ -74,25 +74,25 @@ "Test validateInputBatchTools vcf only for glimpse": { "content": [ [ - "Stitch or Quilt software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." + ] ], - "timestamp": "2026-03-02T21:55:12.272972242", + "timestamp": "2026-04-03T15:47:43.631797163", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, "Test validateInputBatchTools success batch": { "content": [ [ - "WARN: Glimpse2 or Quilt software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches." + ] ], - "timestamp": "2025-06-18T15:44:57.400190926", + "timestamp": "2026-04-03T15:48:18.055976274", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.4" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test getFileExtension non empty list": { @@ -114,13 +114,12 @@ "Test validateInputBatchTools error batch": { "content": [ [ - "WARN: Glimpse2 or Quilt software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches.", - "Stitch or Glimpse1 software is selected and the number of input files (151) is less than the batch size (60). Splitting the input files in batches would induce batch effect." + ] ], - "timestamp": "2026-03-02T21:55:26.941007713", + "timestamp": "2026-04-03T15:48:07.275751574", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, @@ -138,46 +137,46 @@ }, "Test toolBibliographyText - all tools and steps": { "content": [ - "
  • Browning, B.L., Zhou, Y., Browning, S.R., 2018. A One-Penny Imputed Genome from Next-Generation Reference Panels. Am J Hum Genet 103, 338-348. doi: 10.1016/j.ajhg.2018.07.015
  • Danecek, P., Bonfield, J.K., Liddle, J., Marshall, J., Ohan, V., Pollard, M.O., Whitwham, A., Keane, T., McCarthy, S.A., Davies, R.M., Li, H., 2021. Twelve years of SAMtools and BCFtools. GigaScience 10, giab008. doi: 10.1093/gigascience/giab008
  • Das, S., Forer, L., Schonherr, S., Sidore, C., Locke, A.E., Kwong, A., Vrieze, S.I., Chew, E.Y., Levy, S., McGue, M., Schlessinger, D., Stambolian, D., Loh, P.-R., Iacono, W.G., Swaroop, A., Scott, L.J., Cucca, F., Kronenberg, F., Boehnke, M., Abecasis, G.R., Fuchsberger, C., 2016. Next-generation genotype imputation service and methods. Nat Genet 48, 1284-1287. doi: 10.1038/ng.3656
  • Davies, R.W., Flint, J., Myers, S., Mott, R., 2016. Rapid genotype imputation from sequence without reference panels. Nat Genet 48, 965-969. doi: 10.1038/ng.3594
  • Davies, R.W., Kucka, M., Su, D., Shi, S., Flanagan, M., Cunniff, C.M., Chan, Y.F., Myers, S., 2021. Rapid genotype imputation from sequence with reference panels. Nat Genet 53, 1104-1111. doi: 10.1038/s41588-021-00877-0
  • Ewels, P., Magnusson, M., Lundin, S., Kaller, M., 2016. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32, 3047-3048. doi: 10.1093/bioinformatics/btw354
  • Garrison, E., Kronenberg, Z.N., Dawson, E.T., Pedersen, B.S., Prins, P., 2022. A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar. PLOS Computational Biology 18, e1009123. doi: 10.1371/journal.pcbi.1009123
  • Hofmeister, R.J., Ribeiro, D.M., Rubinacci, S., Delaneau, O., 2023. Accurate rare variant phasing of whole-genome and whole-exome sequencing data in the UK Biobank. Nat Genet 1-7. doi: 10.1038/s41588-023-01415-w
  • Li, H., 2011. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics 27, 718-719. doi: 10.1093/bioinformatics/btq671
  • Rubinacci, S., Ribeiro, D.M., Hofmeister, R.J., Delaneau, O., 2021. Efficient phasing and imputation of low-coverage sequencing data using large reference panels. Nat Genet 53, 120-126. doi: 10.1038/s41588-020-00756-0
  • Rubinacci, S., Hofmeister, R.J., Sousa da Mota, B., Delaneau, O., 2023. Imputation of low-coverage sequencing data from 150,119 UK Biobank genomes. Nat Genet 55, 1088-1090. doi: 10.1038/s41588-023-01438-3
  • " + "
  • Browning, B.L., Zhou, Y., Browning, S.R., 2018. A One-Penny Imputed Genome from Next-Generation Reference Panels. Am J Hum Genet 103, 338-348. doi: 10.1016/j.ajhg.2018.07.015
  • Danecek, P., Bonfield, J.K., Liddle, J., Marshall, J., Ohan, V., Pollard, M.O., Whitwham, A., Keane, T., McCarthy, S.A., Davies, R.M., Li, H., 2021. Twelve years of SAMtools and BCFtools. GigaScience 10, giab008. doi: 10.1093/gigascience/giab008
  • Das, S., Forer, L., Schonherr, S., Sidore, C., Locke, A.E., Kwong, A., Vrieze, S.I., Chew, E.Y., Levy, S., McGue, M., Schlessinger, D., Stambolian, D., Loh, P.-R., Iacono, W.G., Swaroop, A., Scott, L.J., Cucca, F., Kronenberg, F., Boehnke, M., Abecasis, G.R., Fuchsberger, C., 2016. Next-generation genotype imputation service and methods. Nat Genet 48, 1284-1287. doi: 10.1038/ng.3656
  • Davies, R.W., Flint, J., Myers, S., Mott, R., 2016. Rapid genotype imputation from sequence without reference panels. Nat Genet 48, 965-969. doi: 10.1038/ng.3594
  • Davies, R.W., Kucka, M., Su, D., Shi, S., Flanagan, M., Cunniff, C.M., Chan, Y.F., Myers, S., 2021. Rapid genotype imputation from sequence with reference panels. Nat Genet 53, 1104-1111. doi: 10.1038/s41588-021-00877-0
  • Li, Z., Albrechtsen, A., Davies, R.W., 2026. Flexible read-aware genotype imputation from sequence using biobank sized reference panels. Nat Commun 17, 524. doi: 10.1038/s41467-025-67218-1
  • Ewels, P., Magnusson, M., Lundin, S., Kaller, M., 2016. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32, 3047-3048. doi: 10.1093/bioinformatics/btw354
  • Garrison, E., Kronenberg, Z.N., Dawson, E.T., Pedersen, B.S., Prins, P., 2022. A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar. PLOS Computational Biology 18, e1009123. doi: 10.1371/journal.pcbi.1009123
  • Hofmeister, R.J., Ribeiro, D.M., Rubinacci, S., Delaneau, O., 2023. Accurate rare variant phasing of whole-genome and whole-exome sequencing data in the UK Biobank. Nat Genet 1-7. doi: 10.1038/s41588-023-01415-w
  • Li, H., 2011. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics 27, 718-719. doi: 10.1093/bioinformatics/btq671
  • Rubinacci, S., Ribeiro, D.M., Hofmeister, R.J., Delaneau, O., 2021. Efficient phasing and imputation of low-coverage sequencing data using large reference panels. Nat Genet 53, 120-126. doi: 10.1038/s41588-020-00756-0
  • Rubinacci, S., Hofmeister, R.J., Sousa da Mota, B., Delaneau, O., 2023. Imputation of low-coverage sequencing data from 150,119 UK Biobank genomes. Nat Genet 55, 1088-1090. doi: 10.1038/s41588-023-01438-3
  • " ], - "timestamp": "2026-03-24T15:06:21.409990357", + "timestamp": "2026-04-03T15:44:38.234082321", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test validateInputBatchTools only one vcf": { "content": [ [ - "When using a Variant Calling Format file as input, only one file can be provided. If you have multiple single-sample VCF files, please merge them into a single multisample VCF file." + ] ], - "timestamp": "2026-03-02T21:55:19.791976369", + "timestamp": "2026-04-03T15:47:55.254721935", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, "Test checkMetaChr warning": { "content": [ [ - "WARN: Chr : [chrY] is missing from test" + ] ], - "timestamp": "2025-06-18T15:45:50.70859829", + "timestamp": "2026-04-03T16:23:29.853722721", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.4" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test toolCitationText - all tools and steps": { "content": [ - "Tools used in the workflow included the following. Low-coverage sequencing data simulation was performed with SAMtools (Danecek et al. 2021) subcommand 'depth' and 'view' for downsampling high-coverage BAM files. Reference panel preparation followed several steps. The reference panel genotypes were normalized and samplessample1,sample2were removed followed by site extraction and format conversion using BCFtools (Danecek et al. 2021). Allele frequencies were then computed with vcflib (Garrison et al. 2022). Genotype phasing was performed with SHAPEIT5 (Hofmeister et al. 2023). Finally, the reference panel was split into per-chromosome chunks using GLIMPSE (Rubinacci et al. 2021) and GLIMPSE2 (Rubinacci et al. 2023). Imputation tools used were: GLIMPSE (Rubinacci et al. 2021) with variants called using BCFtools (Danecek et al. 2021) mpileup followed by indexation with Tabix (Li H et al. 2011) when BAM files were provided, GLIMPSE2 (Rubinacci et al. 2023), QUILT (Davies et al. 2021), STITCH (Davies et al. 2016), Beagle5 (Browning et al. 2018), Minimac4 (Das et al. 2016). Imputation accuracy was assessed by comparing imputed genotypes to truth data using GLIMPSE2 (Rubinacci et al. 2023). Truth genotypes were obtained either from array genotyping data provided as input or from high-coverage sequencing data from which genotypes were called using BCFtools (Danecek et al. 2021) mpileup followed by indexation with Tabix (Li H et al. 2011). Pipeline results statistics were summarised with MultiQC (Ewels et al. 2016)." + "Tools used in the workflow included the following. Low-coverage sequencing data simulation was performed with SAMtools (Danecek et al. 2021) subcommand 'depth' and 'view' for downsampling high-coverage BAM files. Reference panel preparation followed several steps. The reference panel genotypes were normalized and samplessample1,sample2were removed followed by site extraction and format conversion using BCFtools (Danecek et al. 2021). Allele frequencies were then computed with vcflib (Garrison et al. 2022). Genotype phasing was performed with SHAPEIT5 (Hofmeister et al. 2023). Finally, the reference panel was split into per-chromosome chunks using GLIMPSE (Rubinacci et al. 2021) and GLIMPSE2 (Rubinacci et al. 2023). Imputation tools used were: GLIMPSE (Rubinacci et al. 2021) with variants called using BCFtools (Danecek et al. 2021) mpileup followed by indexation with Tabix (Li H et al. 2011) when BAM files were provided, GLIMPSE2 (Rubinacci et al. 2023), QUILT (Davies et al. 2021), QUILT2 (Li et al. 2026), STITCH (Davies et al. 2016), Beagle5 (Browning et al. 2018), Minimac4 (Das et al. 2016). Imputation accuracy was assessed by comparing imputed genotypes to truth data using GLIMPSE2 (Rubinacci et al. 2023). Truth genotypes were obtained either from array genotyping data provided as input or from high-coverage sequencing data from which genotypes were called using BCFtools (Danecek et al. 2021) mpileup followed by indexation with Tabix (Li H et al. 2011). Pipeline results statistics were summarised with MultiQC (Ewels et al. 2016)." ], - "timestamp": "2025-12-09T11:36:50.714460222", + "timestamp": "2026-04-03T15:44:59.712321732", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } } } \ No newline at end of file diff --git a/workflows/chrcheck/tests/function.nf.test b/workflows/chrcheck/tests/function.nf.test index 7cfcdaac..52ebc006 100644 --- a/workflows/chrcheck/tests/function.nf.test +++ b/workflows/chrcheck/tests/function.nf.test @@ -1,4 +1,8 @@ def check_stdout(function_stdout) { + if (!function_stdout || function_stdout.size() <= 2) { + return function_stdout ?: [] + } + function_stdout[2..-1] .findAll { !it.matches(/.*Nextflow [0-9]+\.[0-9]+\.[0-9]+ is available.*/) && From d243866a747fbd1629d4d92ff6c0889714b326d6 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 15:26:26 -0300 Subject: [PATCH 04/28] update names of params used and snapshot --- workflows/phaseimpute/main.nf | 4 +- .../tests/test_quilt2.nf.test.snap | 78 ++++++++++++------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index 233c8abb..dba08f00 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -559,8 +559,8 @@ workflow PHASEIMPUTE { ch_chunks_quilt2, ch_map, ch_fasta, - params.ngen, - params.buffer + params_impute["n_gen"], + params_impute["buffer"] ) CONCAT_QUILT2( diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap index f63c9f96..02b717d5 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -5,22 +5,22 @@ "workflow size": 24, "versions": { "BAMCHREXTRACT": { - "samtools": 1.23 + "samtools": "1.23" }, "BCFTOOLS_CONCAT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_PLUGINSPLIT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_QUERY_IMPUTED": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_STATS_TOOLS": { - "bcftools": 1.22 + "bcftools": "1.22" }, "GAWK_IMPUTED": { "gawk": "5.3.1" @@ -36,13 +36,13 @@ "r-quilt": "2.0.4" }, "SAMTOOLS_FAIDX": { - "samtools": 1.23 + "samtools": "1.23" }, "TABIX_TABIX": { - "tabix": 1.21 + "tabix": "1.21" }, "VCFCHREXTRACT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "Workflow": { "nf-core/phaseimpute": "v1.2.0dev" @@ -85,6 +85,10 @@ "imputation/stats/NA12878.quilt2.bcftools_stats.txt", "imputation/stats/NA19401.quilt2.bcftools_stats.txt", "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "initialisation", + "initialisation/prepare_genome", + "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", + "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcftools-stats-subtypes.txt", @@ -141,6 +145,8 @@ "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", + "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", @@ -219,7 +225,7 @@ } } ], - "timestamp": "2026-04-03T13:58:38.87086125", + "timestamp": "2026-04-05T15:09:30.851391342", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -231,22 +237,22 @@ "workflow size": 24, "versions": { "BAMCHREXTRACT": { - "samtools": 1.23 + "samtools": "1.23" }, "BCFTOOLS_CONCAT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_PLUGINSPLIT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_QUERY_IMPUTED": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_STATS_TOOLS": { - "bcftools": 1.22 + "bcftools": "1.22" }, "GAWK_IMPUTED": { "gawk": "5.3.1" @@ -262,13 +268,13 @@ "r-quilt": "2.0.4" }, "SAMTOOLS_FAIDX": { - "samtools": 1.23 + "samtools": "1.23" }, "TABIX_TABIX": { - "tabix": 1.21 + "tabix": "1.21" }, "VCFCHREXTRACT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "Workflow": { "nf-core/phaseimpute": "v1.2.0dev" @@ -311,6 +317,10 @@ "imputation/stats/NA12878.quilt2.bcftools_stats.txt", "imputation/stats/NA19401.quilt2.bcftools_stats.txt", "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "initialisation", + "initialisation/prepare_genome", + "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", + "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcftools-stats-subtypes.txt", @@ -367,6 +377,8 @@ "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", + "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", @@ -445,7 +457,7 @@ } } ], - "timestamp": "2026-04-03T14:00:10.456814295", + "timestamp": "2026-04-05T15:11:08.158579785", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -457,22 +469,22 @@ "workflow size": 22, "versions": { "BAMCHREXTRACT": { - "samtools": 1.23 + "samtools": "1.23" }, "BCFTOOLS_CONCAT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_PLUGINSPLIT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_QUERY_IMPUTED": { - "bcftools": 1.22 + "bcftools": "1.22" }, "BCFTOOLS_STATS_TOOLS": { - "bcftools": 1.22 + "bcftools": "1.22" }, "GAWK_IMPUTED": { "gawk": "5.3.1" @@ -488,13 +500,13 @@ "r-quilt": "2.0.4" }, "SAMTOOLS_FAIDX": { - "samtools": 1.23 + "samtools": "1.23" }, "TABIX_TABIX": { - "tabix": 1.21 + "tabix": "1.21" }, "VCFCHREXTRACT": { - "bcftools": 1.22 + "bcftools": "1.22" }, "Workflow": { "nf-core/phaseimpute": "v1.2.0dev" @@ -533,6 +545,10 @@ "imputation/stats/NA12878.quilt2.bcftools_stats.txt", "imputation/stats/NA19401.quilt2.bcftools_stats.txt", "imputation/stats/NA20359.quilt2.bcftools_stats.txt", + "initialisation", + "initialisation/prepare_genome", + "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", + "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcftools-stats-subtypes.txt", @@ -589,6 +605,8 @@ "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", + "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", "bcftools_stats_indel-lengths.txt:md5,c0e642dfa9d12f319d6e8b3b01255747", "bcftools_stats_vqc_Count_Indels.txt:md5,40ed6e6e4d8468648abf244557a8c794", @@ -659,7 +677,7 @@ } } ], - "timestamp": "2026-04-03T14:01:25.326893275", + "timestamp": "2026-04-05T15:12:22.968210453", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" From a99218aa0c19054d0d5871023aab1a79fdfb24d1 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 16:05:19 -0300 Subject: [PATCH 05/28] fix empty snapshot content --- .../tests/function.nf.test | 4 --- .../tests/function.nf.test.snap | 25 ++++++++++--------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test index 0b7ef81f..05b63faa 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test @@ -1,8 +1,4 @@ def check_stdout(function_stdout) { - if (!function_stdout || function_stdout.size() <= 2) { - return [] - } - function_stdout[2..-1] .findAll { !it.matches(/.*Nextflow [0-9]+\.[0-9]+\.[0-9]+ is available.*/) && diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap index 00f5037b..ab7eb594 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test.snap @@ -12,10 +12,10 @@ "Test getRegionFromFai with an error": { "content": [ [ - + "Invalid region_selected: chr22:0-4000:4648" ] ], - "timestamp": "2026-04-03T15:56:45.189000124", + "timestamp": "2026-04-05T16:00:18.621286177", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -74,10 +74,10 @@ "Test validateInputBatchTools vcf only for glimpse": { "content": [ [ - + "Stitch, QUILT and QUILT2 software cannot run with VCF or BCF files. Please provide alignment files (i.e. BAM or CRAM)." ] ], - "timestamp": "2026-04-03T15:47:43.631797163", + "timestamp": "2026-04-05T15:59:02.030907977", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -86,10 +86,10 @@ "Test validateInputBatchTools success batch": { "content": [ [ - + "WARN: Glimpse2, QUILT or QUILT2 software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches." ] ], - "timestamp": "2026-04-03T15:48:18.055976274", + "timestamp": "2026-04-05T15:59:35.860398759", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -114,10 +114,11 @@ "Test validateInputBatchTools error batch": { "content": [ [ - + "WARN: Glimpse2, QUILT or QUILT2 software is selected and the number of input files (151) is less than the batch size (60). The input files will be processed in 3 batches.", + "Stitch or Glimpse1 software is selected and the number of input files (151) is less than the batch size (60). Splitting the input files in batches would induce batch effect." ] ], - "timestamp": "2026-04-03T15:48:07.275751574", + "timestamp": "2026-04-05T15:59:25.152279612", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -148,10 +149,10 @@ "Test validateInputBatchTools only one vcf": { "content": [ [ - + "When using a Variant Calling Format file as input, only one file can be provided. If you have multiple single-sample VCF files, please merge them into a single multisample VCF file." ] ], - "timestamp": "2026-04-03T15:47:55.254721935", + "timestamp": "2026-04-05T15:59:13.565761606", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -160,10 +161,10 @@ "Test checkMetaChr warning": { "content": [ [ - + "WARN: Chr : [chrY] is missing from test" ] ], - "timestamp": "2026-04-03T16:23:29.853722721", + "timestamp": "2026-04-05T15:58:50.228028127", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" From 934274419d2f382725d1fc56bc2e26ddd42d0f37 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 16:13:40 -0300 Subject: [PATCH 06/28] add assert function contains back --- .../utils_nfcore_phaseimpute_pipeline/tests/function.nf.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test index 05b63faa..1715f4dc 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/tests/function.nf.test @@ -378,7 +378,8 @@ nextflow_function { then { assertAll ( { assert function.success }, - { assert snapshot(this.check_stdout(function.stdout)).match() } + { assert snapshot(this.check_stdout(function.stdout)).match() }, + { assert function.stdout.contains("WARN: Chr : [chrY] is missing from test") } ) } } From 6fd6c693918d344e5903d5d4d5fd511f88b5db20 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 16:17:03 -0300 Subject: [PATCH 07/28] remove unnecessary function stdout def --- workflows/chrcheck/tests/function.nf.test | 3 --- 1 file changed, 3 deletions(-) diff --git a/workflows/chrcheck/tests/function.nf.test b/workflows/chrcheck/tests/function.nf.test index 52ebc006..a8dbfdf9 100644 --- a/workflows/chrcheck/tests/function.nf.test +++ b/workflows/chrcheck/tests/function.nf.test @@ -1,7 +1,4 @@ def check_stdout(function_stdout) { - if (!function_stdout || function_stdout.size() <= 2) { - return function_stdout ?: [] - } function_stdout[2..-1] .findAll { From bb5fcb187ad92d2788a62df10446bc4ad7721dae Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 16:17:46 -0300 Subject: [PATCH 08/28] remove space --- workflows/chrcheck/tests/function.nf.test | 1 - 1 file changed, 1 deletion(-) diff --git a/workflows/chrcheck/tests/function.nf.test b/workflows/chrcheck/tests/function.nf.test index a8dbfdf9..7cfcdaac 100644 --- a/workflows/chrcheck/tests/function.nf.test +++ b/workflows/chrcheck/tests/function.nf.test @@ -1,5 +1,4 @@ def check_stdout(function_stdout) { - function_stdout[2..-1] .findAll { !it.matches(/.*Nextflow [0-9]+\.[0-9]+\.[0-9]+ is available.*/) && From 73b3510310086caf120bda793c2d20ccd7296e7b Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:46:06 -0300 Subject: [PATCH 09/28] allow mspbwt --- conf/test_quilt2.config | 2 +- workflows/phaseimpute/tests/nextflow.config | 2 +- .../tests/test_quilt2.nf.test.snap | 62 +++++++++---------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/conf/test_quilt2.config b/conf/test_quilt2.config index e9b2e9c8..a18d873f 100644 --- a/conf/test_quilt2.config +++ b/conf/test_quilt2.config @@ -13,7 +13,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { cpus = 1 - ext.args = {"--seed=${params.seed} --use_mspbwt=FALSE --impute_rare_common=FALSE" } + ext.args = {"--seed=${params.seed} --use_mspbwt=TRUE --impute_rare_common=FALSE" } ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } } } diff --git a/workflows/phaseimpute/tests/nextflow.config b/workflows/phaseimpute/tests/nextflow.config index 60731196..8d572244 100644 --- a/workflows/phaseimpute/tests/nextflow.config +++ b/workflows/phaseimpute/tests/nextflow.config @@ -19,7 +19,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { cpus = 1 - ext.args = {"--seed=${params.seed} --use_mspbwt=FALSE --impute_rare_common=FALSE" } + ext.args = {"--seed=${params.seed} --use_mspbwt=TRUE --impute_rare_common=FALSE" } ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } } diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap index 02b717d5..dda628f4 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -163,43 +163,43 @@ "VCF files": [ [ "all_samples.batch0.quilt2.vcf.gz", - "572144fe1242552536761de3c9618aaa" + "d36d11fd199683dfc52c0f1ff34a11e1" ], [ "NA12878.quilt2.vcf.gz", - "4917a9de61d65e76083dc692087ccfb5" + "8f68a04e0f4b088899b726d3668cb153" ], [ "NA19401.quilt2.vcf.gz", - "1834a8aaaf9df3acd3afcb04bcec3e28" + "1cd58749c272dfcb9968694b69598ef2" ], [ "NA20359.quilt2.vcf.gz", - "a31bed1641a3d7445f7b3ead2b4d1bb2" + "45563c6c8caff963299f40e22cf1e981" ], [ "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", - "7a11aeccb26be6dc60fb64c103f87324" + "d695149ed8b53cb1a983b4f68f261087" ], [ "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", - "5244ed7616bfd602656f10aa783bc584" + "87d7364a75db14177a21e4e4b0e8c699" ], [ "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", - "840a0539baf7de8b8f854f204dad1082" + "6232e6c6dc851a497a82f25d1315b09f" ], [ "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", - "a38c649dce49ab4a5a1f8cdc3eaa130f" + "b21990d0540106d38fa72ebf8003e5f9" ], [ "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", - "5fbf21b735d298cc1b78d6a8516e442e" + "bd24854631ffb9d3df57ac77ac103f33" ], [ "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", - "414b9dbc7d6770be64fc85332508d72e" + "943b4ec2d479edb0bef12b65f0a32534" ] ], "CSV files": [ @@ -225,7 +225,7 @@ } } ], - "timestamp": "2026-04-05T15:09:30.851391342", + "timestamp": "2026-04-05T18:32:29.768443421", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -395,43 +395,43 @@ "VCF files": [ [ "all_samples.batch0.quilt2.vcf.gz", - "572144fe1242552536761de3c9618aaa" + "d36d11fd199683dfc52c0f1ff34a11e1" ], [ "NA12878.quilt2.vcf.gz", - "4917a9de61d65e76083dc692087ccfb5" + "8f68a04e0f4b088899b726d3668cb153" ], [ "NA19401.quilt2.vcf.gz", - "1834a8aaaf9df3acd3afcb04bcec3e28" + "1cd58749c272dfcb9968694b69598ef2" ], [ "NA20359.quilt2.vcf.gz", - "a31bed1641a3d7445f7b3ead2b4d1bb2" + "45563c6c8caff963299f40e22cf1e981" ], [ "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", - "7a11aeccb26be6dc60fb64c103f87324" + "d695149ed8b53cb1a983b4f68f261087" ], [ "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", - "5244ed7616bfd602656f10aa783bc584" + "87d7364a75db14177a21e4e4b0e8c699" ], [ "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", - "840a0539baf7de8b8f854f204dad1082" + "6232e6c6dc851a497a82f25d1315b09f" ], [ "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", - "a38c649dce49ab4a5a1f8cdc3eaa130f" + "b21990d0540106d38fa72ebf8003e5f9" ], [ "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", - "5fbf21b735d298cc1b78d6a8516e442e" + "bd24854631ffb9d3df57ac77ac103f33" ], [ "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", - "414b9dbc7d6770be64fc85332508d72e" + "943b4ec2d479edb0bef12b65f0a32534" ] ], "CSV files": [ @@ -457,7 +457,7 @@ } } ], - "timestamp": "2026-04-05T15:11:08.158579785", + "timestamp": "2026-04-05T18:34:06.561257211", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" @@ -623,35 +623,35 @@ "VCF files": [ [ "all_samples.batch0.quilt2.vcf.gz", - "ef12a71c74f2ce7413c3b24926d4edc5" + "2fee7e17d0f94516684e69c9ad852524" ], [ "NA12878.quilt2.vcf.gz", - "965d877345813f89d9a1cec42d937659" + "5b24356dcf66789c9131ab0ec571dcc2" ], [ "NA19401.quilt2.vcf.gz", - "75fe6afa3c2a5761047d180abeaa0782" + "c3ca81440ada4d0dea1d52274b668f93" ], [ "NA20359.quilt2.vcf.gz", - "865b3bc96f21fea274c7931644c65305" + "f1a8b0d39b3fe5648428c7a35b87d97f" ], [ "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", - "3aeb6b1fa1b2d6696cd3d9cc52e12ee5" + "6ba188da786905b9c4857dd800ee45f5" ], [ "all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", - "d5c7da736a139418d25775dd23ac703a" + "c50153f002611d6e62b8d434781cea60" ], [ "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", - "c92205538f31abdc1f9eafec4db5a102" + "63843cb986b001f6d7a24124f8a43570" ], [ "all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", - "88608969f729c6feda2aa0fcd328e1b8" + "2208d7384aec1559c946df908c8ec0a6" ] ], "CSV files": [ @@ -677,7 +677,7 @@ } } ], - "timestamp": "2026-04-05T15:12:22.968210453", + "timestamp": "2026-04-05T18:35:22.861373304", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" From b00ac83de0ddc30ab097bc7c848a0c749320657d Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 5 Apr 2026 18:50:26 -0300 Subject: [PATCH 10/28] improve docs --- docs/usage.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7bb8c644..52331542 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -526,15 +526,25 @@ To summarize: - GLIMPSE2 should not do target-to-target imputation. - If you have alignment files (e.g., BAM or CRAM), all tools are available, and processing will occur in `batch_size`: - GLIMPSE1 and STITCH may induce batch effects, so all samples need to be imputed together. - - GLIMPSE2 and QUILT can process samples in separate batches. + - GLIMPSE2, QUILT and QUILT2 can process samples in separate batches. ## Imputation tools `--steps impute --tools [glimpse1,glimpse2,quilt,quilt2,stitch,beagle5,minimac4]` You can choose different software to perform the imputation. In the following sections, the typical commands for running the pipeline with each software are included. Multiple tools can be selected by separating them with a comma (eg. `--tools glimpse1,quilt`). -### QUILT +### QUILT / QUILT2 -[QUILT](https://github.com/rwdavies/QUILT) is an R and C++ program for rapid genotype imputation from low-coverage sequence using a large reference panel. The required inputs for this program are bam samples provided in the input samplesheet (`--input`) and a CSV file with the genomic chunks (`--chunks`). +[QUILT](https://github.com/rwdavies/QUILT) is an R and C++ package for read-aware genotype imputation from low-coverage sequencing using a reference panel. This pipeline contains the original QUILT method (`QUILT.R`, referred to here as `quilt`) and the newer QUILT2 method (`QUILT2.R`, exposed in this pipeline as `quilt2`). + +In `nf-core/phaseimpute`, both methods use alignment files from `--input`, optionally benefit from `--map`, and can use `--chunks` to split the genome into overlapping imputation regions. + +Choose `quilt2` by default for new projects. The official QUILT2 documentation describes it as the recommended modern method for large reference panels and diverse sequencing inputs including short reads, long reads, linked/barcoded reads and ancient DNA. The QUILT2 paper also reports a dedicated cfDNA/NIPT mode upstream; however, the current `nf-core/phaseimpute` integration includes the diploid imputation workflow only. + +Choose `quilt` when you specifically want the original QUILT workflow. + +#### `quilt` + +The required inputs for `quilt` are BAM/CRAM samples provided in the input samplesheet (`--input`) and a CSV file with the genomic chunks (`--chunks`). ```bash nextflow run nf-core/phaseimpute \ From 68f396f8551574ec481c05e01f10eb0b91b17e17 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:49:12 -0300 Subject: [PATCH 11/28] import quilt2 from nf-core modules --- modules.json | 8 +- modules/nf-core/quilt/quilt2/main.nf | 21 +- modules/nf-core/quilt/quilt2/meta.yml | 58 ++- .../nf-core/quilt/quilt2/tests/main.nf.test | 349 +++++++++++++++ .../quilt/quilt2/tests/main.nf.test.snap | 409 ++++++++++++++++++ .../quilt/quilt2/tests/nextflow.config | 19 + 6 files changed, 835 insertions(+), 29 deletions(-) create mode 100644 modules/nf-core/quilt/quilt2/tests/main.nf.test create mode 100644 modules/nf-core/quilt/quilt2/tests/main.nf.test.snap create mode 100644 modules/nf-core/quilt/quilt2/tests/nextflow.config diff --git a/modules.json b/modules.json index 63da6e01..5e26f029 100644 --- a/modules.json +++ b/modules.json @@ -31,7 +31,8 @@ "vcf_impute_beagle5", "vcf_impute_glimpse", "vcf_impute_minimac4", - "vcf_phase_shapeit5" + "vcf_phase_shapeit5", + "bam_impute_quilt2" ] }, "bcftools/merge": { @@ -123,7 +124,8 @@ "bam_vcf_impute_glimpse2", "modules", "vcf_impute_beagle5", - "vcf_impute_minimac4" + "vcf_impute_minimac4", + "bam_impute_quilt2" ] }, "glimpse2/phase": { @@ -163,7 +165,7 @@ }, "quilt/quilt2": { "branch": "master", - "git_sha": "local", + "git_sha": "4e94abb54fc2e2e868e943ee137a958878b8d092", "installed_by": ["bam_impute_quilt2", "modules"] }, "samtools/coverage": { diff --git a/modules/nf-core/quilt/quilt2/main.nf b/modules/nf-core/quilt/quilt2/main.nf index e1050a04..4a2ceff4 100644 --- a/modules/nf-core/quilt/quilt2/main.nf +++ b/modules/nf-core/quilt/quilt2/main.nf @@ -25,7 +25,6 @@ process QUILT_QUILT2 { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "vcf.gz" def extensions = bams.collect { path -> path.extension } def extension = extensions.flatten().unique() @@ -34,11 +33,11 @@ process QUILT_QUILT2 { : extension == ["cram"] ? "--reference=${fasta} --cramlist=" : "" def genetic_map_command = genetic_map ? "--genetic_map_file=${genetic_map}" : "" - def posfile_command = posfile ? "--posfile=${posfile}" : "" - def phasefile_command = phasefile ? "--phasefile=${phasefile}" : "" - def genfile_command = genfile ? "--genfile=${genfile}" : "" - def samplename_command = samplename ? "--sampleNames_file=${samplename}" : "" - def start_command = regions_start ? "--regionStart=${regions_start}" : "" + def posfile_command = posfile ? "--posfile=${posfile}" : "" + def phasefile_command = phasefile ? "--phasefile=${phasefile}" : "" + def genfile_command = genfile ? "--genfile=${genfile}" : "" + def samplename_command = samplename ? "--sampleNames_file=${samplename}" : "" + def start_command = regions_start ? "--regionStart=${regions_start}" : "" def end_command = regions_end ? "--regionEnd=${regions_end}" : "" def buffer_command = buffer ? "--buffer=${buffer}" : "" @@ -70,27 +69,25 @@ process QUILT_QUILT2 { --nCores=${task.cpus} \\ --outputdir="." \\ --reference_vcf_file=${reference_vcf_file} \\ - --output_filename=${prefix}.${suffix} \\ + --output_filename=${prefix}.vcf.gz \\ ${args} """ stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def suffix = task.ext.suffix ?: "vcf.gz" - def create_cmd = suffix.endsWith(".gz") ? "echo '' | gzip >" : "touch" def make_plots = args.contains("--make_plots=TRUE") def save_ref = args.contains("--save_prepared_reference=TRUE") def nGibbsSamples = args.contains("--nGibbsSamples=") ? args.split("--nGibbsSamples=")[1].split(" ")[0] : 7 def n_seek_its = args.contains("--n_seek_its=") ? args.split("--n_seek_its=")[1].split(" ")[0] : 3 """ - ${create_cmd} ${prefix}.${suffix} - touch ${prefix}.${suffix}.tbi + echo '' | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi if [ "${save_ref}" == true ] then mkdir -p RData - touch "RData/QUILT2_prepared_reference.${chr}.${regions_start}.${regions_end}.RData" + touch "RData/QUILT_prepared_reference.${chr}.${regions_start}.${regions_end}.RData" fi if [ "${make_plots}" == true ] then diff --git a/modules/nf-core/quilt/quilt2/meta.yml b/modules/nf-core/quilt/quilt2/meta.yml index 5b91edc9..75f68be9 100644 --- a/modules/nf-core/quilt/quilt2/meta.yml +++ b/modules/nf-core/quilt/quilt2/meta.yml @@ -1,6 +1,6 @@ name: "quilt_quilt2" description: QUILT2 is an R and C++ program for fast genotype imputation from - low-coverage sequence using a large reference panel. + low-coverage sequence using a large phased reference panel in VCF/BCF format. keywords: - imputation - low-coverage @@ -9,7 +9,8 @@ keywords: - vcf tools: - "quilt": - description: "Fast genotype imputation from low-coverage sequence using a large reference panel" + description: "Fast read-aware genotype imputation from low-coverage sequence + using a large phased reference panel" homepage: "https://github.com/rwdavies/QUILT" documentation: "https://github.com/rwdavies/QUILT" tool_dev_url: "https://github.com/rwdavies/QUILT" @@ -26,12 +27,12 @@ input: - bams: type: file description: (Mandatory) BAM/CRAM files - pattern: "*.{bam,cram,sam}" + pattern: "*.{bam,cram}" ontologies: [] - bais: type: file description: (Mandatory) BAM/CRAM index files - pattern: "*.{bai}" + pattern: "*.{bai,crai}" ontologies: [] - bamlist: type: file @@ -41,34 +42,51 @@ input: ontologies: [] - samplename: type: file - description: (Optional) File with list of sample names in the same order + description: (Optional) File with list of samples names in the same order as in bamlist to impute. One file per line. pattern: "*.{txt}" ontologies: [] - reference_vcf_file: type: file - description: (Mandatory) Reference panel VCF with phased haplotypes. + description: (Mandatory) Phased reference panel VCF/BCF file for + imputation. pattern: "*.{vcf,vcf.gz,bcf}" ontologies: [] - reference_vcf_index: type: file - description: (Mandatory) Index for the reference panel VCF. + description: (Mandatory) Index for the reference panel VCF file. pattern: "*.{tbi,csi}" ontologies: [] - posfile: type: file description: (Optional) File with positions of where to impute, lining up - one-to-one with genfile. + one-to-one with genfile. File is tab separated with no header, one row + per SNP, with col 1 = chromosome, col 2 = physical position (sorted from + smallest to largest), col 3 = reference base, col 4 = alternate base. + Bases are capitalized. pattern: "*.{txt}" ontologies: [] - phasefile: type: file - description: (Optional) File with truth phasing results. + description: (Optional) File with truth phasing results. Supersedes + genfile if both options given. File has a header row with a name for + each sample, matching what is found in the bam file. Each subject is + then a tab separated column, with 0 = ref and 1 = alt, separated by a + vertical bar |, e.g. 0|0 or 0|1. Note therefore this file has one more + row than posfile which has no header. pattern: "*.{txt}" ontologies: [] - genfile: type: file - description: (Optional) Path to gen file with high coverage results. + description: (Optional) Path to gen file with high coverage results. Empty + for no genfile. If both genfile and phasefile are given, only phasefile + is used, as genfile (unphased genotypes) is derivative to phasefile + (phased genotypes). File has a header row with a name for each sample, + matching what is found in the bam file. Each subject is then a tab + seperated column, with 0 = hom ref, 1 = het, 2 = hom alt and NA + indicating missing genotype, with rows corresponding to rows of the + posfile. Note therefore this file has one more row than posfile which + has no header [default ""] pattern: "*.{txt}" ontologies: [] - chr: @@ -76,19 +94,27 @@ input: description: (Mandatory) What chromosome to run. Should match BAM headers. - regions_start: type: integer - description: (Mandatory) When running imputation, where to start from. + description: (Mandatory) When running imputation, where to start from. The + 1-based position x is kept if regionStart <= x <= regionEnd. - regions_end: type: integer description: (Mandatory) When running imputation, where to stop. - ngen: type: integer - description: Number of generations since founding or mixing. + description: Number of generations since founding or mixing. Note that the + algorithm is relatively robust to this. Use nGen = 4 * Ne / K if unsure. - buffer: type: integer - description: Buffer of region to perform imputation over. + description: Buffer of region to perform imputation over. So imputation is + run form regionStart-buffer to regionEnd+buffer, and reported for + regionStart to regionEnd, including the bases of regionStart and + regionEnd. - genetic_map: type: file - description: (Optional) File with genetic map information. + description: (Optional) File with genetic map information, a file with 3 + white-space delimited entries giving position (1-based), genetic rate + map in cM/Mbp, and genetic map in cM. If no file included, rate is based + on physical distance and expected rate (expRate). pattern: "*.{txt,map}{,gz}" ontologies: [] - - meta2: @@ -192,3 +218,7 @@ topics: - R --version | sed "1!d; s/.*version //; s/ .*//": type: eval description: The expression to obtain the version of the tool +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/quilt/quilt2/tests/main.nf.test b/modules/nf-core/quilt/quilt2/tests/main.nf.test new file mode 100644 index 00000000..88e04fdd --- /dev/null +++ b/modules/nf-core/quilt/quilt2/tests/main.nf.test @@ -0,0 +1,349 @@ +nextflow_process { + + name "Test Process QUILT2" + script "../main.nf" + process "QUILT_QUILT2" + + tag "modules" + tag "modules_nfcore" + tag "quilt/quilt2" + tag "quilt" + tag "samtools/view" + tag "bcftools/query" + tag "gawk" + + config "./nextflow.config" + + test("homo_sapiens - bam, reference vcf, map - fasta") { + when { + params{ + quilt_args = "--save_prepared_reference=TRUE --seed=1" + } + process { + """ + input[0] = [ + [ id:'NA12878', chr:'chr22' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai', checkIfExists: true), + [], [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi', checkIfExists: true), + [], [], [], + "chr22", "16570000", "16610000", "100", "10000", + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists:true) + ] + input[1] = [ + [id: 'GRCh38'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.tbi, + process.out.rdata, + process.out.vcf.collect{ meta, vcf -> [ + meta, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("homo_sapiens - cram, reference vcf, map - fasta") { + setup { + run("SAMTOOLS_VIEW", alias: "BAM_TO_CRAM") { + script "../../../samtools/view/main.nf" + process { + """ + input[0] = Channel.of([ + [id: 'NA12878'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [id: 'GRCh38'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + input[2] = [[], []] + input[3] = [[], []] + input[4] = 'crai' + """ + } + } + } + + when { + params{ + quilt_args = "--save_prepared_reference=TRUE --seed=1" + samtools_args = "--output-fmt cram,no_ref --write-index" + } + process { + """ + input[0] = BAM_TO_CRAM.out.cram + .join(BAM_TO_CRAM.out.crai) + .map { meta, cram, crai -> + [ + meta + [chr:'chr22'], + cram, + crai, + [], [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi', checkIfExists: true), + [], [], [], + "chr22", "16570000", "16610000", "100", "10000", + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists:true) + ] + } + input[1] = [ + [id: 'GRCh38'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.tbi, + process.out.rdata, + process.out.vcf.collect{ meta, vcf -> [ + meta, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("homo_sapiens - bam, reference vcf, map, optional outputs - fasta") { + setup { + run("BCFTOOLS_QUERY") { + script "../../../bcftools/query/main.nf" + process { + """ + input[0] = [ + [id: 'NA12878'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878.chr21_22.1X.glimpse2.vcf.gz.csi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + run("GAWK", alias: "GAWK_NAME") { + script "../../../gawk/main.nf" + process { + """ + program = Channel.of('BEGIN{print "NA12878"} {print}').collectFile(name: 'program.txt', newLine: true) + input[0] = BCFTOOLS_QUERY.out.output + input[1] = program + input[2] = false + """ + } + } + } + + when { + params{ + bcftools_query_args = "-f '[%GT]\\n' -r chr22" + bcftools_query_suffix = "phasefile.txt" + gawk_suffix = "phasefile.name.txt" + quilt_args = "--save_prepared_reference=TRUE --make_plots=TRUE --impute_rare_common=FALSE --seed=1" + } + process { + """ + input[0] = Channel.of([ + [ id:'NA12878', chr:'chr22' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai', checkIfExists: true), + [], [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.posfile', checkIfExists: true), + ]) + .combine(GAWK_NAME.out.output.map { it[1] }) + .combine(channel.of([ + [], "chr22", "16570000", "16610000", "100", "10000", + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists:true) + ])) + input[1] = [ + [id: 'GRCh38'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + def plots_dir = new File(process.out.plots[0][1]) + def plots = [] + plots_dir.eachFileRecurse { file -> plots << file.getName() } + assertAll( + { assert snapshot( + process.out.tbi, + plots.sort(), + process.out.rdata, + process.out.vcf.collect{ meta, vcf -> [ + meta, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("QUILT2 - stub") { + options '-stub' + when { + params{ + quilt_args = "--save_prepared_reference=TRUE --make_plots=TRUE" + } + process { + """ + input[0] = Channel.of([ + [ id:'NA12878', chr:'chr22' ], + [], [], [], [], [], [], [], [], + [], "chr22", "16570000", "16610000", "100", "10000", [] + ]) + input[1] = [ + [id: 'GRCh38'], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + def dir = new File(process.out.plots[0][1]) + def list = [] + dir.eachFileRecurse { file -> list << file.getName() } + assertAll( + { assert snapshot( + process.out.vcf, + process.out.tbi, + list.sort(), + process.out.rdata, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("QUILT2 no optional files") { + when { + params{ + quilt_args = "--seed=1" + } + process { + """ + input[0] = [ + [ id:'test', chr:'chr22' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai', checkIfExists: true), + [], [], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi', checkIfExists: true), + [], [], [], + "chr22", "16570000", "16610000", "100", "10000", + [] + ] + input[1] = [[id: null], [], []] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.tbi, + process.out.rdata, + process.out.vcf.collect{ meta, vcf -> [ + meta, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("QUILT2 with bamlist and renaming") { + when { + params{ + quilt_args = "--seed=1" + } + process { + """ + bamlist = Channel.of( + "NA12878.chr21_22.1X.bam", + ).collectFile(name : 'bamlist.txt', newLine : true) + + bamnames = Channel.of( + "Mysample1", + ).collectFile(name : 'bamnames.txt', newLine : true) + + ch_input = Channel.of([ + [ id:'test', chr:'chr20' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai', checkIfExists: true) + ]) + .combine(bamlist) + .combine(bamnames) + .combine(Channel.of([ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi', checkIfExists: true), + [], [], [], + "chr22", "16570000", "16610000", "100", "10000", + [] + ])) + + input[0] = ch_input + input[1] = [[id: null], [], []] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.tbi, + process.out.rdata, + process.out.vcf.collect{ meta, vcf -> [ + meta, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + +} diff --git a/modules/nf-core/quilt/quilt2/tests/main.nf.test.snap b/modules/nf-core/quilt/quilt2/tests/main.nf.test.snap new file mode 100644 index 00000000..7538d3d2 --- /dev/null +++ b/modules/nf-core/quilt/quilt2/tests/main.nf.test.snap @@ -0,0 +1,409 @@ +{ + "QUILT2 - stub": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "haps.NA12878.chr22.16570000.16610000_igs.1.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it3.gibbs.png" + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "QUILT_prepared_reference.chr22.16570000.16610000.RData:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-05T16:44:47.20694205", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "QUILT2 no optional files": { + "content": [ + [ + [ + { + "id": "test", + "chr": "chr22" + }, + "test.vcf.gz.tbi:md5,a4f8d419c7cdffd35743582d2f8cf62a" + ] + ], + [ + [ + { + "id": "test", + "chr": "chr22" + }, + [ + + ] + ] + ], + [ + [ + { + "id": "test", + "chr": "chr22" + }, + [ + "NA12878" + ], + "a560a607175034af92f6aab795cdfece" + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-05T16:39:35.155242296", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - bam, reference vcf, map, optional outputs - fasta": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878.vcf.gz.tbi:md5,d93e9cc42b928f040dd627d91d0c38cb" + ] + ], + [ + "haps.NA12878.chr22.16570000.16610000_igs.1.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.1.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.2.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.3.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.4.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.5.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.6.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.7.it3.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.0.truth.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it1.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it2.gibbs.png", + "haps.NA12878.chr22.16570000.16610000_igs.8.it3.gibbs.png" + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "QUILT_prepared_reference.chr22.16570000.16610000.RData:md5,d7efdb9debc0ed36050d7d1e3cd526af" + ] + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "NA12878" + ], + "e2195be0270b7fcecafd989acbc4718" + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-12T18:00:07.039402568", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - bam, reference vcf, map - fasta": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878.vcf.gz.tbi:md5,5a60a33c0a274815ff0a52ca16050b39" + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "QUILT_prepared_reference.chr22.16570000.16610000.RData:md5,c981d3c2da392174bb17acdae0538eed" + ] + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "NA12878" + ], + "7b6b8619f2a16a80dd4a5dcd1cfe4306" + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-05T16:42:44.640836278", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "homo_sapiens - cram, reference vcf, map - fasta": { + "content": [ + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + "NA12878.vcf.gz.tbi:md5,5a60a33c0a274815ff0a52ca16050b39" + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "QUILT_prepared_reference.chr22.16570000.16610000.RData:md5,c981d3c2da392174bb17acdae0538eed" + ] + ] + ], + [ + [ + { + "id": "NA12878", + "chr": "chr22" + }, + [ + "NA12878" + ], + "7b6b8619f2a16a80dd4a5dcd1cfe4306" + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-06T17:23:04.938845476", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "QUILT2 with bamlist and renaming": { + "content": [ + [ + [ + { + "id": "test", + "chr": "chr20" + }, + "test.vcf.gz.tbi:md5,8255cab2750a3d3d24f485438ef9ebe5" + ] + ], + [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + + ] + ] + ], + [ + [ + { + "id": "test", + "chr": "chr20" + }, + [ + "Mysample1" + ], + "a560a607175034af92f6aab795cdfece" + ] + ], + { + "versions_r_base": [ + [ + "QUILT_QUILT2", + "r-base", + "4.4.3" + ] + ], + "versions_r_quilt": [ + [ + "QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-05T16:39:45.620802389", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/quilt/quilt2/tests/nextflow.config b/modules/nf-core/quilt/quilt2/tests/nextflow.config new file mode 100644 index 00000000..cdebc062 --- /dev/null +++ b/modules/nf-core/quilt/quilt2/tests/nextflow.config @@ -0,0 +1,19 @@ +process { + // More than 1 cpu may lead to different md5sum + resourceLimits = [ + cpus: 1 + ] + withName: BCFTOOLS_QUERY { + ext.args = { "${params.bcftools_query_args}" } + ext.suffix = { "${params.bcftools_query_suffix}" } + } + withName: GAWK { + ext.suffix = { "${params.gawk_suffix}" } + } + withName: SAMTOOLS_VIEW { + ext.args = { "${params.samtools_args}" } + } + withName: QUILT_QUILT2 { + ext.args = { "${params.quilt_args}" } + } +} From 91004147f1c7b7af81de1cbadf045478446d057a Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:56:33 -0300 Subject: [PATCH 12/28] reorder bam impute quilt2 --- modules.json | 5 ----- subworkflows/{nf-core => local}/bam_impute_quilt2/main.nf | 0 subworkflows/{nf-core => local}/bam_impute_quilt2/meta.yml | 0 workflows/phaseimpute/main.nf | 2 +- 4 files changed, 1 insertion(+), 6 deletions(-) rename subworkflows/{nf-core => local}/bam_impute_quilt2/main.nf (100%) rename subworkflows/{nf-core => local}/bam_impute_quilt2/meta.yml (100%) diff --git a/modules.json b/modules.json index 5e26f029..019471dc 100644 --- a/modules.json +++ b/modules.json @@ -242,11 +242,6 @@ "git_sha": "4e2990cc0df18823d11b192df73039c80fdebc7c", "installed_by": ["subworkflows"] }, - "bam_impute_quilt2": { - "branch": "master", - "git_sha": "local", - "installed_by": ["subworkflows"] - }, "bam_impute_stitch": { "branch": "master", "git_sha": "e1cb31f0ced0d0810d1cb099aaa690b05beb1f3a", diff --git a/subworkflows/nf-core/bam_impute_quilt2/main.nf b/subworkflows/local/bam_impute_quilt2/main.nf similarity index 100% rename from subworkflows/nf-core/bam_impute_quilt2/main.nf rename to subworkflows/local/bam_impute_quilt2/main.nf diff --git a/subworkflows/nf-core/bam_impute_quilt2/meta.yml b/subworkflows/local/bam_impute_quilt2/meta.yml similarity index 100% rename from subworkflows/nf-core/bam_impute_quilt2/meta.yml rename to subworkflows/local/bam_impute_quilt2/meta.yml diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index dba08f00..ba0e2837 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -62,7 +62,7 @@ include { TABIX_BGZIP as BGZIP_POSFILE_IMPUTE } from '../../modules/nf-co // QUILT subworkflows include { BAM_IMPUTE_QUILT } from '../../subworkflows/nf-core/bam_impute_quilt' include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/nf-core/vcf_gather_bcftools' -include { BAM_IMPUTE_QUILT2 } from '../../subworkflows/nf-core/bam_impute_quilt2' +include { BAM_IMPUTE_QUILT2 } from '../../subworkflows/local/bam_impute_quilt2' include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT2 } from '../../subworkflows/nf-core/vcf_gather_bcftools' // STITCH subworkflows From 7e377da6cb0fe0554d1bbb6d4bbd0032358c5e2b Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:59:58 -0300 Subject: [PATCH 13/28] fix linting --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 52331542..72a95e30 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -536,7 +536,7 @@ You can choose different software to perform the imputation. In the following se [QUILT](https://github.com/rwdavies/QUILT) is an R and C++ package for read-aware genotype imputation from low-coverage sequencing using a reference panel. This pipeline contains the original QUILT method (`QUILT.R`, referred to here as `quilt`) and the newer QUILT2 method (`QUILT2.R`, exposed in this pipeline as `quilt2`). -In `nf-core/phaseimpute`, both methods use alignment files from `--input`, optionally benefit from `--map`, and can use `--chunks` to split the genome into overlapping imputation regions. +In `nf-core/phaseimpute`, both methods use alignment files from `--input`, optionally benefit from `--map`, and can use `--chunks` to split the genome into overlapping imputation regions. Choose `quilt2` by default for new projects. The official QUILT2 documentation describes it as the recommended modern method for large reference panels and diverse sequencing inputs including short reads, long reads, linked/barcoded reads and ancient DNA. The QUILT2 paper also reports a dedicated cfDNA/NIPT mode upstream; however, the current `nf-core/phaseimpute` integration includes the diploid imputation workflow only. From ca975bcf6546c95090c22016c2544ca425e320e5 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:16:39 -0300 Subject: [PATCH 14/28] add comments and map --- conf/test_quilt2.config | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/conf/test_quilt2.config b/conf/test_quilt2.config index a18d873f..0dc331e9 100644 --- a/conf/test_quilt2.config +++ b/conf/test_quilt2.config @@ -2,6 +2,12 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/phaseimpute -profile test_quilt2, --outdir + +---------------------------------------------------------------------------------------- */ process { @@ -22,15 +28,17 @@ params { config_profile_name = 'Minimal QUILT2 Test profile' config_profile_description = 'Minimal test dataset to check pipeline function using the tool QUILT2' + // Input data input = "${projectDir}/tests/csv/sample_bam.csv" input_region = "${projectDir}/tests/csv/region.csv" + chunks = "${projectDir}/tests/csv/chunks.csv" + panel = "${projectDir}/tests/csv/panel.csv" + // Genome references fasta = params.pipelines_testdata_base_path + "hum_data/reference_genome/GRCh38.s.fa.gz" + map = "${projectDir}/tests/csv/map_glimpse.csv" + // Pipeline parameters steps = "impute" - - chunks = "${projectDir}/tests/csv/chunks.csv" - panel = "${projectDir}/tests/csv/panel.csv" - tools = "quilt2" } From 87b31b355ec2d48d98a9c2e577046e5fc7d6d3c7 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:16:47 -0300 Subject: [PATCH 15/28] remove saveas --- conf/steps/imputation_quilt2.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/steps/imputation_quilt2.config b/conf/steps/imputation_quilt2.config index 788a5e15..2f0498fb 100644 --- a/conf/steps/imputation_quilt2.config +++ b/conf/steps/imputation_quilt2.config @@ -31,8 +31,7 @@ process { withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:CONCAT_QUILT2:.*' { publishDir = [ path: { "${params.outdir}/imputation/quilt2/concat" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + mode: params.publish_dir_mode ] } From 183886e611cd5331aa32dae47e48ac4a47468031 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:17:06 -0300 Subject: [PATCH 16/28] add full name --- workflows/phaseimpute/tests/nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/phaseimpute/tests/nextflow.config b/workflows/phaseimpute/tests/nextflow.config index 8d572244..adb9f70c 100644 --- a/workflows/phaseimpute/tests/nextflow.config +++ b/workflows/phaseimpute/tests/nextflow.config @@ -23,7 +23,7 @@ process { ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } } - withName: GLIMPSE2_PHASE { + withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_VCF_IMPUTE_GLIMPSE2:GLIMPSE2_PHASE' { cpus = 1 cache = "lenient" ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.glimpse2" } From 1883862927704139982d39d5879eb6a387a445f0 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:17:15 -0300 Subject: [PATCH 17/28] update tests and snapshots with map --- workflows/phaseimpute/main.nf | 2 +- .../phaseimpute/tests/test_quilt2.nf.test | 4 +- .../tests/test_quilt2.nf.test.snap | 144 ++++++++++++------ 3 files changed, 99 insertions(+), 51 deletions(-) diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index ba0e2837..a343f8dd 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -557,7 +557,7 @@ workflow PHASEIMPUTE { }, ch_panel_phased, ch_chunks_quilt2, - ch_map, + ch_map_stitch, ch_fasta, params_impute["n_gen"], params_impute["buffer"] diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test b/workflows/phaseimpute/tests/test_quilt2.nf.test index e30aa8a5..bdd2defe 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test @@ -9,7 +9,7 @@ nextflow_pipeline { config "./nextflow.config" - test("Check test_quilt2 - with chunks") { + test("Check test_quilt2 - with chunks - with map") { config "../../../conf/test_quilt2.config" when { params { @@ -54,7 +54,7 @@ nextflow_pipeline { } } - test("Check test_quilt2 - without chunks") { + test("Check test_quilt2 - no chunks - with map") { config "../../../conf/test_quilt2.config" when { params { diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap index dda628f4..d20e5293 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -1,5 +1,5 @@ { - "Check test_quilt2 - with chunks": { + "Check test_quilt2 - with chunks - no map": { "content": [ { "workflow size": 24, @@ -225,13 +225,13 @@ } } ], - "timestamp": "2026-04-05T18:32:29.768443421", + "timestamp": "2026-04-05T18:34:06.561257211", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" } }, - "Check test_quilt2 - with chunks - no map": { + "Check test_quilt2 - no chunks - with map": { "content": [ { "workflow size": 24, @@ -254,6 +254,11 @@ "BCFTOOLS_STATS_TOOLS": { "bcftools": "1.22" }, + "CUSTOM_GENETICMAPCONVERT": { + "r-base": "4.5.3", + "r-data.table": "1.17.8", + "r-janitor": "2.2.1" + }, "GAWK_IMPUTED": { "gawk": "5.3.1" }, @@ -303,21 +308,27 @@ "imputation/quilt2/variant_calling/RData", "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz", "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz.tbi", "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz", "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz.tbi", "imputation/stats", "imputation/stats/NA12878.quilt2.bcftools_stats.txt", "imputation/stats/NA19401.quilt2.bcftools_stats.txt", "imputation/stats/NA20359.quilt2.bcftools_stats.txt", "initialisation", + "initialisation/map_convertion", + "initialisation/map_convertion/1000GP_chr21.glimpse.map", + "initialisation/map_convertion/1000GP_chr21.minimac.map", + "initialisation/map_convertion/1000GP_chr21.plink.map", + "initialisation/map_convertion/1000GP_chr21.stitch.map", + "initialisation/map_convertion/1000GP_chr22.glimpse.map", + "initialisation/map_convertion/1000GP_chr22.minimac.map", + "initialisation/map_convertion/1000GP_chr22.plink.map", + "initialisation/map_convertion/1000GP_chr22.stitch.map", + "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -377,6 +388,15 @@ "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "1000GP_chr21.glimpse.map:md5,eda05f6e81718e00e83085bb0b07e584", + "1000GP_chr21.minimac.map:md5,ab83755a9804d27b1086e48e2da64992", + "1000GP_chr21.plink.map:md5,57966816771960670e5149fa5c3cec60", + "1000GP_chr21.stitch.map:md5,939930334691cc1730c3c322af8c2c32", + "1000GP_chr22.glimpse.map:md5,d541ba3ef55d3cff7ad3273ba4be7c64", + "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", + "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", + "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", + "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", @@ -395,43 +415,35 @@ "VCF files": [ [ "all_samples.batch0.quilt2.vcf.gz", - "d36d11fd199683dfc52c0f1ff34a11e1" + "5585090528c5cd4a028733d7594e4284" ], [ "NA12878.quilt2.vcf.gz", - "8f68a04e0f4b088899b726d3668cb153" + "c491e0822babc104a38e21d2ababdbcc" ], [ "NA19401.quilt2.vcf.gz", - "1cd58749c272dfcb9968694b69598ef2" + "d7ec86f857fe49917e19286c84d53fe1" ], [ "NA20359.quilt2.vcf.gz", - "45563c6c8caff963299f40e22cf1e981" + "679d3a09495d87789363c86e7c05092d" ], [ "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", - "d695149ed8b53cb1a983b4f68f261087" - ], - [ - "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", - "87d7364a75db14177a21e4e4b0e8c699" + "6ba188da786905b9c4857dd800ee45f5" ], [ - "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", - "6232e6c6dc851a497a82f25d1315b09f" + "all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", + "c50153f002611d6e62b8d434781cea60" ], [ "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", - "b21990d0540106d38fa72ebf8003e5f9" - ], - [ - "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", - "bd24854631ffb9d3df57ac77ac103f33" + "328ad6f108ee9b95dd355ce6c4069acf" ], [ - "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", - "943b4ec2d479edb0bef12b65f0a32534" + "all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", + "857a9ea039ac3184a8ba006f695e6a9f" ] ], "CSV files": [ @@ -457,16 +469,16 @@ } } ], - "timestamp": "2026-04-05T18:34:06.561257211", + "timestamp": "2026-04-15T17:55:36.241000468", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" } }, - "Check test_quilt2 - without chunks": { + "Check test_quilt2 - with chunks - with map": { "content": [ { - "workflow size": 22, + "workflow size": 26, "versions": { "BAMCHREXTRACT": { "samtools": "1.23" @@ -486,6 +498,11 @@ "BCFTOOLS_STATS_TOOLS": { "bcftools": "1.22" }, + "CUSTOM_GENETICMAPCONVERT": { + "r-base": "4.5.3", + "r-data.table": "1.17.8", + "r-janitor": "2.2.1" + }, "GAWK_IMPUTED": { "gawk": "5.3.1" }, @@ -535,17 +552,31 @@ "imputation/quilt2/variant_calling/RData", "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz", "imputation/quilt2/variant_calling/all_samples.batch0.chr21.quilt2.ligate.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz.tbi", "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz", "imputation/quilt2/variant_calling/all_samples.batch0.chr22.quilt2.ligate.vcf.gz.tbi", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", - "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz.tbi", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "imputation/quilt2/variant_calling/all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz.tbi", "imputation/stats", "imputation/stats/NA12878.quilt2.bcftools_stats.txt", "imputation/stats/NA19401.quilt2.bcftools_stats.txt", "imputation/stats/NA20359.quilt2.bcftools_stats.txt", "initialisation", + "initialisation/map_convertion", + "initialisation/map_convertion/1000GP_chr21.glimpse.map", + "initialisation/map_convertion/1000GP_chr21.minimac.map", + "initialisation/map_convertion/1000GP_chr21.plink.map", + "initialisation/map_convertion/1000GP_chr21.stitch.map", + "initialisation/map_convertion/1000GP_chr22.glimpse.map", + "initialisation/map_convertion/1000GP_chr22.minimac.map", + "initialisation/map_convertion/1000GP_chr22.plink.map", + "initialisation/map_convertion/1000GP_chr22.stitch.map", + "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -605,6 +636,15 @@ "NA12878.quilt2.bcftools_stats.txt:md5,f88efb8027ebd4d183f07b4c3c6c8b41", "NA19401.quilt2.bcftools_stats.txt:md5,7fd7ee760998b4b0bcff08a9a42e663b", "NA20359.quilt2.bcftools_stats.txt:md5,06d4bf7b5c0ea4da43ecf2f65d3d78c2", + "1000GP_chr21.glimpse.map:md5,eda05f6e81718e00e83085bb0b07e584", + "1000GP_chr21.minimac.map:md5,ab83755a9804d27b1086e48e2da64992", + "1000GP_chr21.plink.map:md5,57966816771960670e5149fa5c3cec60", + "1000GP_chr21.stitch.map:md5,939930334691cc1730c3c322af8c2c32", + "1000GP_chr22.glimpse.map:md5,d541ba3ef55d3cff7ad3273ba4be7c64", + "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", + "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", + "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", + "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", @@ -623,35 +663,43 @@ "VCF files": [ [ "all_samples.batch0.quilt2.vcf.gz", - "2fee7e17d0f94516684e69c9ad852524" + "3860f5cd4b1e1ad4b23d3d8293ba3449" ], [ "NA12878.quilt2.vcf.gz", - "5b24356dcf66789c9131ab0ec571dcc2" + "cf2c3647ccbfe47cb53ada151db67909" ], [ "NA19401.quilt2.vcf.gz", - "c3ca81440ada4d0dea1d52274b668f93" + "2683d8f1f4f4a3f61c6a998586fe8e40" ], [ "NA20359.quilt2.vcf.gz", - "f1a8b0d39b3fe5648428c7a35b87d97f" + "4a87d78989c5e3b1af2bef98a3533858" ], [ "all_samples.batch0.chr21.quilt2.ligate.vcf.gz", - "6ba188da786905b9c4857dd800ee45f5" + "5f690d06b8e4f90d2bf52b0a09ea35b2" ], [ - "all_samples.batch0.chr21_16570000-16610000.quilt2.vcf.gz", - "c50153f002611d6e62b8d434781cea60" + "all_samples.batch0.chr21_16570070-16595525.quilt2.vcf.gz", + "9c07847ef2d0086fc15869ddc7e69af2" + ], + [ + "all_samples.batch0.chr21_16585483-16609998.quilt2.vcf.gz", + "c92220361f83278880af752da26dd4df" ], [ "all_samples.batch0.chr22.quilt2.ligate.vcf.gz", - "63843cb986b001f6d7a24124f8a43570" + "5036f5a5ae65eb171c515af9dd908965" ], [ - "all_samples.batch0.chr22_16570000-16610000.quilt2.vcf.gz", - "2208d7384aec1559c946df908c8ec0a6" + "all_samples.batch0.chr22_16570065-16597215.quilt2.vcf.gz", + "1dfe7b37d3a518ef8135bc6bbad23f37" + ], + [ + "all_samples.batch0.chr22_16587172-16609999.quilt2.vcf.gz", + "5110503ab297ae3e8621450c09070228" ] ], "CSV files": [ @@ -677,10 +725,10 @@ } } ], - "timestamp": "2026-04-05T18:35:22.861373304", + "timestamp": "2026-04-15T17:52:36.337568902", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" } } -} \ No newline at end of file +} From 0263ad2d58baf544bfd5622485ccdacf8170a325 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:18:10 -0300 Subject: [PATCH 18/28] Update README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Louis Le Nézet <58640615+LouisLeNezet@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d47db3a..95f55564 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ The whole pipeline consists of five main steps, each of which can be run separat - **Position Extraction** for targeted imputation sites. 4. **Imputation (`--impute`)**: This is the primary step, where genotypes in the target dataset are imputed using the prepared reference panel. The main steps are: - - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), [Quilt](https://github.com/rwdavies/QUILT), [Quilt2](https://github.com/rwdavies/QUILT), [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html) or [Minimac4](https://github.com/statgen/Minimac4). + - **Imputation** of the target dataset using tools like [Glimpse1](https://odelaneau.github.io/GLIMPSE/glimpse1/index.html), [Glimpse2](https://odelaneau.github.io/GLIMPSE/), [Stitch](https://github.com/rwdavies/stitch), [Quilt/Quilt2](https://github.com/rwdavies/QUILT), [Beagle5](https://faculty.washington.edu/browning/beagle/beagle.html) or [Minimac4](https://github.com/statgen/Minimac4). - **Ligation** of imputed chunks to produce a final VCF file per sample, with all chromosomes unified. 5. **Validation (`--validate`)**: Assesses imputation accuracy by comparing the imputed dataset to a truth dataset. This step leverages the [Glimpse2](https://odelaneau.github.io/GLIMPSE/) concordance process to summarize differences between two VCF files. From f71958ec23448e21175d97a8453e85c222f5004d Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 13:36:08 -0300 Subject: [PATCH 19/28] add bam_impute_quilt2 from nf-core --- conf/containers_conda_lock_files_amd64.config | 1 + conf/containers_conda_lock_files_arm64.config | 1 + conf/containers_docker_amd64.config | 1 + conf/containers_docker_arm64.config | 1 + .../containers_singularity_https_amd64.config | 1 + .../containers_singularity_https_arm64.config | 1 + conf/containers_singularity_oras_amd64.config | 1 + conf/containers_singularity_oras_arm64.config | 1 + modules.json | 23 +- .../nf-core/bcftools/index/environment.yml | 4 +- modules/nf-core/bcftools/index/main.nf | 6 +- .../bcftools/index/tests/main.nf.test.snap | 36 +-- modules/nf-core/glimpse2/ligate/main.nf | 4 +- modules/nf-core/quilt/quilt2/main.nf | 4 +- .../nf-core/bam_impute_quilt2/main.nf | 76 +++++++ .../nf-core/bam_impute_quilt2/meta.yml | 128 +++++++++++ .../bam_impute_quilt2/tests/main.nf.test | 206 ++++++++++++++++++ .../bam_impute_quilt2/tests/main.nf.test.snap | 71 ++++++ .../bam_impute_quilt2/tests/nextflow.config | 15 ++ workflows/phaseimpute/main.nf | 2 +- 20 files changed, 545 insertions(+), 38 deletions(-) create mode 100644 conf/containers_conda_lock_files_amd64.config create mode 100644 conf/containers_conda_lock_files_arm64.config create mode 100644 conf/containers_docker_amd64.config create mode 100644 conf/containers_docker_arm64.config create mode 100644 conf/containers_singularity_https_amd64.config create mode 100644 conf/containers_singularity_https_arm64.config create mode 100644 conf/containers_singularity_oras_amd64.config create mode 100644 conf/containers_singularity_oras_arm64.config create mode 100644 subworkflows/nf-core/bam_impute_quilt2/main.nf create mode 100644 subworkflows/nf-core/bam_impute_quilt2/meta.yml create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config diff --git a/conf/containers_conda_lock_files_amd64.config b/conf/containers_conda_lock_files_amd64.config new file mode 100644 index 00000000..f6b0997e --- /dev/null +++ b/conf/containers_conda_lock_files_amd64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock' } } diff --git a/conf/containers_conda_lock_files_arm64.config b/conf/containers_conda_lock_files_arm64.config new file mode 100644 index 00000000..0cd12a28 --- /dev/null +++ b/conf/containers_conda_lock_files_arm64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock' } } diff --git a/conf/containers_docker_amd64.config b/conf/containers_docker_amd64.config new file mode 100644 index 00000000..dd93726b --- /dev/null +++ b/conf/containers_docker_amd64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b' } } diff --git a/conf/containers_docker_arm64.config b/conf/containers_docker_arm64.config new file mode 100644 index 00000000..23418fef --- /dev/null +++ b/conf/containers_docker_arm64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8' } } diff --git a/conf/containers_singularity_https_amd64.config b/conf/containers_singularity_https_amd64.config new file mode 100644 index 00000000..d04c5be3 --- /dev/null +++ b/conf/containers_singularity_https_amd64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' } } diff --git a/conf/containers_singularity_https_arm64.config b/conf/containers_singularity_https_arm64.config new file mode 100644 index 00000000..4a031237 --- /dev/null +++ b/conf/containers_singularity_https_arm64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data' } } diff --git a/conf/containers_singularity_oras_amd64.config b/conf/containers_singularity_oras_amd64.config new file mode 100644 index 00000000..2d8d51fc --- /dev/null +++ b/conf/containers_singularity_oras_amd64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d' } } diff --git a/conf/containers_singularity_oras_arm64.config b/conf/containers_singularity_oras_arm64.config new file mode 100644 index 00000000..c3210dd0 --- /dev/null +++ b/conf/containers_singularity_oras_arm64.config @@ -0,0 +1 @@ +process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2' } } diff --git a/modules.json b/modules.json index 019471dc..adcf3e35 100644 --- a/modules.json +++ b/modules.json @@ -22,17 +22,16 @@ }, "bcftools/index": { "branch": "master", - "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", "installed_by": [ "bam_impute_quilt", + "bam_impute_quilt2", "bam_impute_stitch", "bam_vcf_impute_glimpse2", - "modules", "vcf_impute_beagle5", "vcf_impute_glimpse", "vcf_impute_minimac4", - "vcf_phase_shapeit5", - "bam_impute_quilt2" + "vcf_phase_shapeit5" ] }, "bcftools/merge": { @@ -117,15 +116,14 @@ }, "glimpse2/ligate": { "branch": "master", - "git_sha": "236d7f19efcffccdfac5e1850af2aa035e0de79c", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", "installed_by": [ "bam_impute_quilt", + "bam_impute_quilt2", "bam_impute_stitch", "bam_vcf_impute_glimpse2", - "modules", "vcf_impute_beagle5", - "vcf_impute_minimac4", - "bam_impute_quilt2" + "vcf_impute_minimac4" ] }, "glimpse2/phase": { @@ -165,8 +163,8 @@ }, "quilt/quilt2": { "branch": "master", - "git_sha": "4e94abb54fc2e2e868e943ee137a958878b8d092", - "installed_by": ["bam_impute_quilt2", "modules"] + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["bam_impute_quilt2"] }, "samtools/coverage": { "branch": "master", @@ -242,6 +240,11 @@ "git_sha": "4e2990cc0df18823d11b192df73039c80fdebc7c", "installed_by": ["subworkflows"] }, + "bam_impute_quilt2": { + "branch": "master", + "git_sha": "0c06dfb24cd33e404c2811c28d74dd9e4a1df5ce", + "installed_by": ["subworkflows"] + }, "bam_impute_stitch": { "branch": "master", "git_sha": "e1cb31f0ced0d0810d1cb099aaa690b05beb1f3a", diff --git a/modules/nf-core/bcftools/index/environment.yml b/modules/nf-core/bcftools/index/environment.yml index ba863b38..01193fda 100644 --- a/modules/nf-core/bcftools/index/environment.yml +++ b/modules/nf-core/bcftools/index/environment.yml @@ -5,6 +5,6 @@ channels: - bioconda dependencies: # renovate: datasource=conda depName=bioconda/htslib - - bioconda::htslib=1.22.1 + - bioconda::htslib=1.23.1 # renovate: datasource=conda depName=bioconda/bcftools - - bioconda::bcftools=1.22 + - bioconda::bcftools=1.23.1 diff --git a/modules/nf-core/bcftools/index/main.nf b/modules/nf-core/bcftools/index/main.nf index 8635a1a2..757fa7cb 100644 --- a/modules/nf-core/bcftools/index/main.nf +++ b/modules/nf-core/bcftools/index/main.nf @@ -3,9 +3,9 @@ process BCFTOOLS_INDEX { label 'process_low' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' - : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0b/0b4d52ca9a56d07be3f78a12af654e5116f5112908dba277e6796fd9dfb83fe5/data' + : 'community.wave.seqera.io/library/bcftools_htslib:1.23.1--9f08ec665533d64a'}" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test.snap b/modules/nf-core/bcftools/index/tests/main.nf.test.snap index 2074e974..cce1c797 100644 --- a/modules/nf-core/bcftools/index/tests/main.nf.test.snap +++ b/modules/nf-core/bcftools/index/tests/main.nf.test.snap @@ -17,7 +17,7 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ], "csi": [ @@ -35,16 +35,16 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ] } ], + "timestamp": "2026-03-20T18:02:30.863066697", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T12:03:32.869223843" + } }, "sarscov2 - vcf - tbi": { "content": [ @@ -61,16 +61,16 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ] } ], + "timestamp": "2026-03-20T18:02:23.639599735", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T12:03:24.708477718" + } }, "sarscov2 - vcf - tbi - stub": { "content": [ @@ -90,7 +90,7 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ], "csi": [ @@ -108,16 +108,16 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ] } ], + "timestamp": "2026-03-20T18:02:38.300964104", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T12:03:40.444304193" + } }, "sarscov2 - vcf - csi": { "content": [ @@ -134,15 +134,15 @@ [ "BCFTOOLS_INDEX", "bcftools", - "1.22" + "1.23.1" ] ] } ], + "timestamp": "2026-03-20T18:02:16.523585018", "meta": { - "nf-test": "0.9.3", + "nf-test": "0.9.4", "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T12:03:17.363152216" + } } } \ No newline at end of file diff --git a/modules/nf-core/glimpse2/ligate/main.nf b/modules/nf-core/glimpse2/ligate/main.nf index f288a595..4fdd4d62 100644 --- a/modules/nf-core/glimpse2/ligate/main.nf +++ b/modules/nf-core/glimpse2/ligate/main.nf @@ -3,9 +3,9 @@ process GLIMPSE2_LIGATE { label 'process_low' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/glimpse-bio:2.0.1--h46b9e50_1' - : 'biocontainers/glimpse-bio:2.0.1--h46b9e50_1'}" + : 'quay.io/biocontainers/glimpse-bio:2.0.1--h46b9e50_1'}" input: tuple val(meta), path(input_list), path(input_index) diff --git a/modules/nf-core/quilt/quilt2/main.nf b/modules/nf-core/quilt/quilt2/main.nf index 4a2ceff4..32a73cbc 100644 --- a/modules/nf-core/quilt/quilt2/main.nf +++ b/modules/nf-core/quilt/quilt2/main.nf @@ -3,9 +3,9 @@ process QUILT_QUILT2 { label 'process_single' conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-quilt:2.0.4--r44h503566f_0' - : 'biocontainers/r-quilt:2.0.4--r44h503566f_0'}" + : 'quay.io/biocontainers/r-quilt:2.0.4--r44h503566f_0'}" input: tuple val(meta), path(bams), path(bais), path(bamlist), path(samplename), path(reference_vcf_file), path(reference_vcf_index), path(posfile), path(phasefile), path(genfile), val(chr), val(regions_start), val(regions_end), val(ngen), val(buffer), path(genetic_map) diff --git a/subworkflows/nf-core/bam_impute_quilt2/main.nf b/subworkflows/nf-core/bam_impute_quilt2/main.nf new file mode 100644 index 00000000..78a1aad1 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/main.nf @@ -0,0 +1,76 @@ +include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2/main' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' + +workflow BAM_IMPUTE_QUILT2 { + take: + ch_input // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ] + ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ] + ch_chunks // channel (optional): [ meta, chr, start, end ] + ch_map // channel (optional): [ meta, genetic_map ] + ch_fasta // channel (optional): [ meta, fasta, fai ] + n_gen // integer: Number of generations since founding or mixing + buffer // integer: Buffer of region to perform imputation over + + main: + + + ch_parameters = ch_reference_panel + .combine(ch_map, by: 0) + .combine(ch_chunks, by: 0) + + ch_parameters.ifEmpty { + error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") + } + + ch_bam_params = ch_input + .combine(ch_parameters) + .map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end -> + def regionout = "${chr}" + if (start != [] && end != []) { + regionout = "${chr}:${start}-${end}" + } + + [ + meta_panel + meta_input + [regionout: regionout], + bam, + bai, + bampath, + bamname, + reference_vcf, + reference_index, + [], + [], + [], + chr, + start, + end, + n_gen, + buffer, + genetic_map, + ] + } + + QUILT_QUILT2(ch_bam_params, ch_fasta) + + ligate_input = QUILT_QUILT2.out.vcf + .join(QUILT_QUILT2.out.tbi) + .map { meta, vcf, index -> + def keys_to_keep = meta.keySet() - ['regionout'] + [meta.subMap(keys_to_keep), vcf, index] + } + .groupTuple() + + GLIMPSE2_LIGATE(ligate_input) + + BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants) + + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join( + BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + + emit: + vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ] +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/meta.yml b/subworkflows/nf-core/bam_impute_quilt2/meta.yml new file mode 100644 index 00000000..90b06058 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/meta.yml @@ -0,0 +1,128 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_impute_quilt2" +description: | + Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome. 'regionout' key will be used to store temporarily the region and therefore shouldn't be used in the meta maps. +keywords: + - bam + - cram + - imputation + - quilt + - quilt2 + - vcf +components: + - quilt/quilt2 + - glimpse2/ligate + - bcftools/index +input: + - ch_input: + description: | + Channel with target sequencing data and optional rename files. + structure: + - meta: + type: map + description: Metadata map for the input batch. + - bam_or_cram: + type: file + description: Input BAM or CRAM file, or a list of BAM or CRAM files. + pattern: "*.{bam,cram}" + - index: + type: file + description: Index for the BAM or CRAM input. + pattern: "*.{bai,crai}" + - bampaths: + type: file + description: | + Optional text file listing BAM or CRAM paths to impute. + One file per line. + pattern: "*.{txt,tsv}" + - bamnames: + type: file + description: | + Optional text file listing sample names in the same order as `bampaths`. + One file per line. + pattern: "*.{txt,tsv}" + - ch_reference_panel: + description: | + Channel with phased reference panel VCFs per chromosome. + structure: + - meta: + type: map + description: | + Metadata map that will be combined with the input metadata. + Must include `id` for the panel name and `chr` for the chromosome. + - vcf: + type: file + description: Reference panel VCF or BCF. + pattern: "*.{vcf,vcf.gz,bcf}" + - index: + type: file + description: Index for the reference panel VCF or BCF. + pattern: "*.{tbi,csi}" + - ch_chunks: + description: | + Channel with optional imputation chunks per chromosome. + structure: + - meta: + type: map + description: Metadata map with the panel id and chromosome. + - chr: + type: string + description: Chromosome name. + - start: + type: integer + description: Start position of the chunk. + - end: + type: integer + description: End position of the chunk. + - ch_map: + description: | + Channel with optional genetic maps per chromosome. + structure: + - meta: + type: map + description: Metadata map with the panel id and chromosome. + - map: + type: file + description: Genetic map used by QUILT2. + pattern: "*.{txt,map}{,gz}" + - ch_fasta: + description: | + Channel with optional reference FASTA data, required for CRAM inputs. + structure: + - meta: + type: map + description: Metadata map for the reference genome. + - fasta: + type: file + description: Reference genome FASTA. + pattern: "*.{fa,fasta}" + - fai: + type: file + description: FASTA index file. + pattern: "*.fai" + - n_gen: + type: integer + description: Number of generations since founding or mixing. + - buffer: + type: integer + description: Buffer of region to perform imputation over. +output: + - vcf_index: + description: | + Imputed and indexed VCFs after ligating chunked outputs per chromosome. + structure: + - meta: + type: map + description: Metadata map combined from the input batch and panel metadata. + - vcf: + type: file + description: Imputed VCF file. + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - index: + type: file + description: Index for the imputed VCF or BCF file. + pattern: "*.{tbi,csi}" +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test new file mode 100644 index 00000000..3354cf04 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test @@ -0,0 +1,206 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_QUILT2" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_IMPUTE_QUILT2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_impute_quilt2" + + tag "quilt" + tag "quilt/quilt2" + tag "bcftools" + tag "bcftools/index" + tag "glimpse2" + tag "glimpse2/ligate" + + test("Impute with quilt2 one individual, one region, map and fasta") { + when { + params { + quilt_args = "--save_prepared_reference=TRUE --seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + input[0] = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + .combine(bampath) + .combine(channel.of([[]])) + input[1] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true) + ]) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"] + ) + input[3] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true) + ]) + input[4] = channel.of([ + [id: "GRCh38"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) + ]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.success + assert snapshot( + workflow.out.vcf_index.collect { meta, vcf, index -> [ + path(vcf).getFileName().toString(), + path(index).getFileName().toString(), + path(vcf).vcf.summary, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + ).match() + } + } + + test("Impute with quilt2 one individual, one region, map, fasta and bamnames") { + when { + params { + quilt_args = "--seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) + input[0] = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + .combine(bampath) + .combine(bamname) + input[1] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true) + ]) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"] + ) + input[3] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true) + ]) + input[4] = channel.of([ + [id: "GRCh38"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) + ]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.success + assert snapshot( + workflow.out.vcf_index.collect { meta, vcf, index -> [ + path(vcf).getFileName().toString(), + path(index).getFileName().toString(), + path(vcf).vcf.summary, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + ).match() + } + } + + test("homo_sapiens - empty channels - stub") { + options "-stub" + when { + params { + quilt_args = "--seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) + ch_samples = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + input[0] = ch_samples.combine(bampath).combine(bamname) + input[1] = channel.of( + [[id: "1000GP", chr: "chr22"], [], []], + [[id: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], + [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], + [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], + [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] + ) + input[3] = channel.of( + [[id: "1000GP", chr: "chr22"], []], + [[id: "1000GP", chr: "chr21"], []] + ) + input[4] = channel.of([[id: "GRCh38"], [], []]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.success + assert snapshot(sanitizeOutput(workflow.out)).match() + } + } + + test("homo_sapiens - error empty join - stub") { + options "-stub" + when { + params { + quilt_args = "--seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) + ch_samples = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + input[0] = ch_samples.combine(bampath).combine(bamname) + input[1] = channel.of( + [[id: "otherpanel", chr: "chr22"], [], []], + [[id: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], + [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], + [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], + [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] + ) + input[3] = channel.of( + [[id: "1000GP", chr: "chr22"], []], + [[id: "otherpanel", chr: "chr21"], []] + ) + input[4] = channel.of([[id: "GRCh38"], [], []]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.failed + assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") + } + } +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap new file mode 100644 index 00000000..d53730c9 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap @@ -0,0 +1,71 @@ +{ + "homo_sapiens - empty channels - stub": { + "content": [ + { + "vcf_index": [ + [ + { + "id": "allid", + "chr": "chr21" + }, + "allid_chr21.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_chr21.ligate.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "allid", + "chr": "chr22" + }, + "allid_chr22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_chr22.ligate.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "timestamp": "2026-05-09T16:40:01.487040417", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "Impute with quilt2 one individual, one region, map, fasta and bamnames": { + "content": [ + [ + [ + "allid_chr22.ligate.vcf.gz", + "allid_chr22.ligate.vcf.gz.tbi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "MySample1" + ], + "de865f8128d121d8ae4b5e4c3aa662e2" + ] + ] + ], + "timestamp": "2026-05-09T16:39:50.933749879", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "Impute with quilt2 one individual, one region, map and fasta": { + "content": [ + [ + [ + "allid_chr22.ligate.vcf.gz", + "allid_chr22.ligate.vcf.gz.tbi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "NA12878" + ], + "de865f8128d121d8ae4b5e4c3aa662e2" + ] + ] + ], + "timestamp": "2026-05-09T16:39:30.814000823", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config b/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config new file mode 100644 index 00000000..c9280911 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config @@ -0,0 +1,15 @@ +process { + withName: QUILT_QUILT2 { + cpus = 1 + ext.args = { "${params.quilt_args}" } + ext.prefix = { "${meta.id}_${meta.chr}_${meta.regionout}.quilt2" } + } + + withName: GLIMPSE2_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + } + + withName: BCFTOOLS_INDEX { + ext.args = "--tbi" + } +} diff --git a/workflows/phaseimpute/main.nf b/workflows/phaseimpute/main.nf index a343f8dd..80338077 100644 --- a/workflows/phaseimpute/main.nf +++ b/workflows/phaseimpute/main.nf @@ -62,7 +62,7 @@ include { TABIX_BGZIP as BGZIP_POSFILE_IMPUTE } from '../../modules/nf-co // QUILT subworkflows include { BAM_IMPUTE_QUILT } from '../../subworkflows/nf-core/bam_impute_quilt' include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT } from '../../subworkflows/nf-core/vcf_gather_bcftools' -include { BAM_IMPUTE_QUILT2 } from '../../subworkflows/local/bam_impute_quilt2' +include { BAM_IMPUTE_QUILT2 } from '../../subworkflows/nf-core/bam_impute_quilt2' include { VCF_GATHER_BCFTOOLS as CONCAT_QUILT2 } from '../../subworkflows/nf-core/vcf_gather_bcftools' // STITCH subworkflows From ada6e8b0dd2b20351c7825c7c6f432d1e1bdf85a Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 13:56:53 -0300 Subject: [PATCH 20/28] update tests quilt2 --- .../utils_nfcore_phaseimpute_pipeline/main.nf | 19 +++++++++++------- .../tests/test_quilt2.nf.test.snap | 20 +++++++++---------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf index b904aa9d..b7cc8759 100644 --- a/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_phaseimpute_pipeline/main.nf @@ -358,16 +358,21 @@ workflow PIPELINE_INITIALISATION { chr_posfile_mis = checkMetaChr(chr_regions, extractChr(ch_posfile), "position", max_chr_names) // Compute the intersection of all chromosomes names - chr_all_mis = chr_ref_mis.concat(chr_chunks_mis, chr_map_mis, chr_panel_mis, chr_posfile_mis) + chr_all_mis_ch = chr_ref_mis.concat(chr_chunks_mis, chr_map_mis, chr_panel_mis, chr_posfile_mis) .unique() - .toList() - .subscribe{ chr -> - if (chr.size() > 0) { - def chr_names = chr.size() > max_chr_names ? chr[0..max_chr_names - 1] + ['...'] : chr - log.warn "The following contigs are absent from at least one file : ${chr_names} and therefore won't be used" } } + chr_all_mis = chr_all_mis_ch.toList() + + chr_all_mis_for_combine = chr_all_mis.map { chr -> [chr] } + + chr_all_mis.subscribe{ chr -> + if (chr.size() > 0) { + def chr_names = chr.size() > max_chr_names ? chr[0..max_chr_names - 1] + ['...'] : chr + log.warn "The following contigs are absent from at least one file : ${chr_names} and therefore won't be used" + } + } ch_regions = ch_regions - .combine(chr_all_mis.toList()) + .combine(chr_all_mis_for_combine) .filter { meta, _regions, chr_mis -> !(meta.chr in chr_mis) } diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap index d20e5293..c50d1b65 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -11,7 +11,7 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -225,10 +225,10 @@ } } ], - "timestamp": "2026-04-05T18:34:06.561257211", + "timestamp": "2026-05-17T13:53:43.556030062", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.0" } }, "Check test_quilt2 - no chunks - with map": { @@ -243,7 +243,7 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -469,10 +469,10 @@ } } ], - "timestamp": "2026-04-15T17:55:36.241000468", + "timestamp": "2026-05-17T13:55:00.045383039", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.0" } }, "Check test_quilt2 - with chunks - with map": { @@ -487,7 +487,7 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -725,10 +725,10 @@ } } ], - "timestamp": "2026-04-15T17:52:36.337568902", + "timestamp": "2026-05-17T13:49:55.979625287", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.0" } } -} +} \ No newline at end of file From ac27f5bb1f19bb4930c19e4ac95d351823a4a86e Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 13:59:46 -0300 Subject: [PATCH 21/28] update usage.md --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 72a95e30..ee95c391 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -542,7 +542,7 @@ Choose `quilt2` by default for new projects. The official QUILT2 documentation d Choose `quilt` when you specifically want the original QUILT workflow. -#### `quilt` +#### `quilt` / `quilt2` The required inputs for `quilt` are BAM/CRAM samples provided in the input samplesheet (`--input`) and a CSV file with the genomic chunks (`--chunks`). From 4d1584b9e1f3928c8312c2b905914c2f37031ab1 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 14:23:44 -0300 Subject: [PATCH 22/28] update nf-tests --- .../phaseimpute/tests/test_all.nf.test.snap | 24 +++++++++---------- .../phaseimpute/tests/test_batch.nf.test.snap | 12 +++++----- .../tests/test_beagle5.nf.test.snap | 20 ++++++++-------- .../phaseimpute/tests/test_dog.nf.test.snap | 12 +++++----- .../tests/test_glimpse1.nf.test.snap | 20 ++++++++-------- .../tests/test_glimpse2.nf.test.snap | 20 ++++++++-------- .../tests/test_minimac4.nf.test.snap | 20 ++++++++-------- .../tests/test_panelprep.nf.test.snap | 24 +++++++++---------- .../phaseimpute/tests/test_quilt.nf.test.snap | 16 ++++++------- .../tests/test_stitch.nf.test.snap | 20 ++++++++-------- 10 files changed, 94 insertions(+), 94 deletions(-) diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 941cd784..00f188dc 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -20,13 +20,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MERGE": { "bcftools": "1.22" @@ -980,10 +980,10 @@ } } ], - "timestamp": "2026-03-30T21:08:32.382829865", + "timestamp": "2026-05-17T14:04:03.385347374", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_all - with map": { @@ -1007,13 +1007,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MERGE": { "bcftools": "1.22" @@ -1991,10 +1991,10 @@ } } ], - "timestamp": "2026-03-30T21:06:13.41408092", + "timestamp": "2026-05-17T14:01:40.731382998", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_batch.nf.test.snap b/workflows/phaseimpute/tests/test_batch.nf.test.snap index c89dde42..27d33fae 100644 --- a/workflows/phaseimpute/tests/test_batch.nf.test.snap +++ b/workflows/phaseimpute/tests/test_batch.nf.test.snap @@ -14,13 +14,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -478,10 +478,10 @@ } } ], - "timestamp": "2026-03-30T12:05:20.063520882", + "timestamp": "2026-05-17T14:05:55.670301894", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_beagle5.nf.test.snap b/workflows/phaseimpute/tests/test_beagle5.nf.test.snap index facf5f37..e663f2e2 100644 --- a/workflows/phaseimpute/tests/test_beagle5.nf.test.snap +++ b/workflows/phaseimpute/tests/test_beagle5.nf.test.snap @@ -8,10 +8,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -186,10 +186,10 @@ } } ], - "timestamp": "2026-03-30T21:12:24.630260449", + "timestamp": "2026-05-17T14:08:12.4413668", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_beagle5 - with chunks - with map": { @@ -201,10 +201,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -403,10 +403,10 @@ } } ], - "timestamp": "2026-03-30T21:11:27.945584828", + "timestamp": "2026-05-17T14:07:12.512029555", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_dog.nf.test.snap b/workflows/phaseimpute/tests/test_dog.nf.test.snap index 7703c473..9758ee98 100644 --- a/workflows/phaseimpute/tests/test_dog.nf.test.snap +++ b/workflows/phaseimpute/tests/test_dog.nf.test.snap @@ -17,13 +17,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MPILEUP": { "bcftools": "1.22" @@ -451,10 +451,10 @@ } } ], - "timestamp": "2026-03-30T12:09:53.930296148", + "timestamp": "2026-05-17T14:09:37.4495481", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap b/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap index 8ff574a6..dc48f9bd 100644 --- a/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap +++ b/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap @@ -14,10 +14,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MERGE": { "bcftools": "1.22" @@ -291,10 +291,10 @@ } } ], - "timestamp": "2026-03-30T21:15:34.51932143", + "timestamp": "2026-05-17T14:11:31.945106119", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_glimpse1 - with chunks - with map": { @@ -312,10 +312,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MERGE": { "bcftools": "1.22" @@ -615,10 +615,10 @@ } } ], - "timestamp": "2026-03-30T21:14:45.704218392", + "timestamp": "2026-05-17T14:10:38.598193973", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap b/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap index 5d54a34e..3d8433c8 100644 --- a/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap @@ -11,10 +11,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -213,10 +213,10 @@ } } ], - "timestamp": "2026-03-30T21:16:21.610584845", + "timestamp": "2026-05-17T14:12:21.250835903", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_glimpse2 - no chunks - no map": { @@ -231,10 +231,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -409,10 +409,10 @@ } } ], - "timestamp": "2026-03-30T21:17:03.924585031", + "timestamp": "2026-05-17T14:13:04.077268746", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_minimac4.nf.test.snap b/workflows/phaseimpute/tests/test_minimac4.nf.test.snap index db5261c6..d211f99a 100644 --- a/workflows/phaseimpute/tests/test_minimac4.nf.test.snap +++ b/workflows/phaseimpute/tests/test_minimac4.nf.test.snap @@ -8,10 +8,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -213,10 +213,10 @@ } } ], - "timestamp": "2026-03-30T21:18:07.290608466", + "timestamp": "2026-05-17T14:14:20.802213098", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_minimac4 - no chunks - no map": { @@ -228,10 +228,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -409,10 +409,10 @@ } } ], - "timestamp": "2026-03-30T21:19:10.383525661", + "timestamp": "2026-05-17T14:15:21.729409654", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_panelprep.nf.test.snap b/workflows/phaseimpute/tests/test_panelprep.nf.test.snap index 8f427a16..c0f2702e 100644 --- a/workflows/phaseimpute/tests/test_panelprep.nf.test.snap +++ b/workflows/phaseimpute/tests/test_panelprep.nf.test.snap @@ -8,13 +8,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_NORM": { "bcftools": "1.22" @@ -215,10 +215,10 @@ ] } ], - "timestamp": "2026-03-30T21:20:00.70817045", + "timestamp": "2026-05-17T14:16:17.989373968", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_panelprep - no chunks - no map": { @@ -230,13 +230,13 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_NORM": { "bcftools": "1.22" @@ -439,10 +439,10 @@ ] } ], - "timestamp": "2026-03-30T21:20:47.660994012", + "timestamp": "2026-05-17T14:17:12.552259413", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_quilt.nf.test.snap b/workflows/phaseimpute/tests/test_quilt.nf.test.snap index b18112cf..33cb22d3 100644 --- a/workflows/phaseimpute/tests/test_quilt.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt.nf.test.snap @@ -11,7 +11,7 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -216,10 +216,10 @@ } } ], - "timestamp": "2026-03-30T21:23:34.774198475", + "timestamp": "2026-05-17T14:20:07.856977292", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_quilt - with chunks - with map": { @@ -234,7 +234,7 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -475,10 +475,10 @@ } } ], - "timestamp": "2026-03-30T21:22:22.804741382", + "timestamp": "2026-05-17T14:19:02.290787063", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file diff --git a/workflows/phaseimpute/tests/test_stitch.nf.test.snap b/workflows/phaseimpute/tests/test_stitch.nf.test.snap index 498e41d1..24a3428b 100644 --- a/workflows/phaseimpute/tests/test_stitch.nf.test.snap +++ b/workflows/phaseimpute/tests/test_stitch.nf.test.snap @@ -11,10 +11,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -218,10 +218,10 @@ } } ], - "timestamp": "2026-03-30T21:25:22.003472554", + "timestamp": "2026-05-17T14:21:56.428487551", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } }, "Check test_stitch - no chunks - no map": { @@ -236,10 +236,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_PLUGINSPLIT": { "bcftools": "1.22" @@ -419,10 +419,10 @@ } } ], - "timestamp": "2026-03-30T21:26:08.794836205", + "timestamp": "2026-05-17T14:22:43.169082186", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file From fa2153be59f9ff2f612759c1b23c73b7f1038d06 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 14:39:49 -0300 Subject: [PATCH 23/28] update geneticmapconvert module --- modules.json | 2 +- .../nf-core/custom/geneticmapconvert/main.nf | 8 ++- .../nf-core/custom/geneticmapconvert/meta.yml | 11 ++-- .../geneticmapconvert/tests/main.nf.test | 55 ++++++++++++++---- .../geneticmapconvert/tests/main.nf.test.snap | 56 +++++++++---------- .../geneticmapconvert/tests/nextflow.config | 2 +- 6 files changed, 87 insertions(+), 47 deletions(-) diff --git a/modules.json b/modules.json index adcf3e35..bf7045d4 100644 --- a/modules.json +++ b/modules.json @@ -81,7 +81,7 @@ }, "custom/geneticmapconvert": { "branch": "master", - "git_sha": "d879be4b3adc1bce389b002bd66f6954630f57d2", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", "installed_by": ["modules"] }, "gawk": { diff --git a/modules/nf-core/custom/geneticmapconvert/main.nf b/modules/nf-core/custom/geneticmapconvert/main.nf index ca437502..4f9070bb 100644 --- a/modules/nf-core/custom/geneticmapconvert/main.nf +++ b/modules/nf-core/custom/geneticmapconvert/main.nf @@ -3,7 +3,7 @@ process CUSTOM_GENETICMAPCONVERT { label 'process_single' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/06/062aabd31ebac6f139125e485d5566e928c1b79caf488daa596df02bd1ccbf23/data': 'community.wave.seqera.io/library/r-data.table_r-janitor_r-r.utils:c8ebef5bb002374e' }" @@ -15,7 +15,7 @@ process CUSTOM_GENETICMAPCONVERT { tuple val(meta), path("${prefix}.plink.map") , emit: plink_map tuple val(meta), path("${prefix}.stitch.map") , emit: stitch_map tuple val(meta), path("${prefix}.minimac.map"), emit: minimac_map - path "versions.yml", emit: versions_geneticmapconvert, topic: versions + path "versions.yml", emit: versions, topic: versions when: task.ext.when == null || task.ext.when @@ -24,6 +24,10 @@ process CUSTOM_GENETICMAPCONVERT { prefix = task.ext.prefix ?: "${meta.id}" args = task.ext.args ?: '' + """ + echo ${args} + """ + template 'geneticmapconvert.R' stub: diff --git a/modules/nf-core/custom/geneticmapconvert/meta.yml b/modules/nf-core/custom/geneticmapconvert/meta.yml index 63427d0c..0eee0c87 100644 --- a/modules/nf-core/custom/geneticmapconvert/meta.yml +++ b/modules/nf-core/custom/geneticmapconvert/meta.yml @@ -91,18 +91,21 @@ output: tab-delimited file with header and columns: chr, pos, cM pattern: "*.minimac.map" - versions_geneticmapconvert: + versions: - versions.yml: type: file description: File containing software versions pattern: "versions.yml" ontologies: - - edam: http://edamontology.org/format_3750 + - edam: http://edamontology.org/format_3750 # YAML topics: versions: - versions.yml: - type: string - description: The name of the process + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML authors: - "@LouisLeNezet" maintainers: diff --git a/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test b/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test index 69ea2a87..3fb055ee 100644 --- a/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test +++ b/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test @@ -14,6 +14,9 @@ nextflow_process { test("Convert map with pos\\tchr\\tcm - with header - meta.chr (glimpse format)") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -29,7 +32,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -54,6 +57,9 @@ nextflow_process { } } when { + params { + module_args = "" + } process { """ input[0] = TABIX_BGZIP.out.output @@ -66,7 +72,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -78,6 +84,9 @@ nextflow_process { test("Convert map with chr\\tpos\\tcm - with header - meta.chr (minimac format)") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -93,7 +102,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -106,7 +115,7 @@ nextflow_process { test("Convert map with pos rate cm - with header - meta.chr (stitch format)") { when { params { - geneticmapconvert_args = "--tolerance 10e-4" + module_args = "--tolerance 10e-4" } process { """ @@ -123,7 +132,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -136,7 +145,7 @@ nextflow_process { test("Convert map with pos rate cm - with header - no meta.chr (stitch format)") { when { params { - geneticmapconvert_args = "--tolerance 10e-4" + module_args = "--tolerance 10e-4" } process { """ @@ -159,7 +168,7 @@ nextflow_process { test("Convert map with pos rate cm - with header - meta.chr (stitch format) -- error rate") { when { params { - geneticmapconvert_args = "--tolerance 10e-6" + module_args = "--tolerance 10e-6" } process { """ @@ -182,7 +191,7 @@ nextflow_process { test("Convert map with pos rate cm - with header - meta.chr (stitch format) -- error tolerance not numeric") { when { params { - geneticmapconvert_args = "--tolerance ABC" + module_args = "--tolerance ABC" } process { """ @@ -204,6 +213,9 @@ nextflow_process { test("Convert map with chr id cm pos - no header - meta.chr (plink format)") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -219,7 +231,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -231,6 +243,9 @@ nextflow_process { test("Test error - chr not corresponding") { when { + params { + module_args = "" + } process { """ input[0] = [ @@ -251,6 +266,9 @@ nextflow_process { test("Test error - badly recognize") { when { + params { + module_args = "" + } process { """ def mapFile = file("${launchDir}/genetic_map") @@ -277,6 +295,9 @@ nextflow_process { test("Test error - unsorted file, duplicate position") { when { + params { + module_args = "" + } process { """ def mapFile = file("${launchDir}/genetic_map") @@ -304,6 +325,9 @@ nextflow_process { test("Test error - multiple chromosomes") { when { + params { + module_args = "" + } process { """ def mapFile = file("${launchDir}/genetic_map") @@ -338,6 +362,9 @@ nextflow_process { test("Test with comma separator and keep id") { when { + params { + module_args = "" + } process { """ def mapFile = file("${launchDir}/genetic_map") @@ -361,7 +388,7 @@ nextflow_process { { assert process.success }, { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml, + path(process.out.versions[0]).yaml, path(process.out.glimpse_map[0][1]).readLines()[0..2], path(process.out.plink_map[0][1]).readLines()[0..2], path(process.out.stitch_map[0][1]).readLines()[0..2], @@ -373,6 +400,9 @@ nextflow_process { test("Test start with blank line") { when { + params { + module_args = "" + } process { """ def mapFile = file("${launchDir}/genetic_map") @@ -400,6 +430,9 @@ nextflow_process { test("Test stub") { options "-stub" when { + params { + module_args = "" + } process { """ input[0] = [ @@ -413,7 +446,7 @@ nextflow_process { then { assert snapshot( sanitizeOutput(process.out), - path(process.out.versions_geneticmapconvert[0]).yaml + path(process.out.versions[0]).yaml ).match() } } diff --git a/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test.snap b/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test.snap index 6618b363..2baf6f2f 100644 --- a/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test.snap +++ b/modules/nf-core/custom/geneticmapconvert/tests/main.nf.test.snap @@ -38,7 +38,7 @@ "test.stitch.map:md5,b2f6729b3c23fb949e06f3e9dcd782c0" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -70,10 +70,10 @@ "chr21\t12970435\t0.00133" ] ], - "timestamp": "2026-03-26T17:41:23.201037083", + "timestamp": "2026-04-21T17:17:09.741694079", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Convert map with chr\\tpos\\tcm - with header - meta.chr (minimac format)": { @@ -115,7 +115,7 @@ "test.stitch.map:md5,b2f6729b3c23fb949e06f3e9dcd782c0" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -147,10 +147,10 @@ "chr21\t12970435\t0.00133" ] ], - "timestamp": "2026-03-26T17:41:45.245563717", + "timestamp": "2026-04-21T17:17:40.527355799", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Convert map with pos rate cm - with header - meta.chr (stitch format)": { @@ -192,7 +192,7 @@ "test.stitch.map:md5,4123204866241e52e702d30f56ec1bd5" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -224,10 +224,10 @@ "chr21\t9928594\t0.0418492" ] ], - "timestamp": "2026-03-26T17:41:56.289472381", + "timestamp": "2026-04-21T16:56:42.982751661", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Convert map with chr id cm pos - no header - meta.chr (plink format)": { @@ -269,7 +269,7 @@ "test.stitch.map:md5,939930334691cc1730c3c322af8c2c32" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -301,10 +301,10 @@ "chr21\t12968320\t0" ] ], - "timestamp": "2026-03-26T17:59:36.461344125", + "timestamp": "2026-04-21T17:18:47.986410489", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test with comma separator and keep id": { @@ -342,7 +342,7 @@ "test.stitch.map:md5,6caae6ed9b5b3d68229a74f528c3e737" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -374,10 +374,10 @@ "chr21\t12970435\t0.00133" ] ], - "timestamp": "2026-03-26T17:43:28.996636872", + "timestamp": "2026-04-21T17:19:48.646532306", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Convert map with pos\\tchr\\tcM - with header - meta.chr (glimpse compressed format)": { @@ -419,7 +419,7 @@ "test.stitch.map:md5,b2f6729b3c23fb949e06f3e9dcd782c0" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -451,10 +451,10 @@ "chr21\t12970435\t0.00133" ] ], - "timestamp": "2026-03-26T17:41:34.420713696", + "timestamp": "2026-04-21T17:17:25.458379776", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } }, "Test stub": { @@ -496,7 +496,7 @@ "test.stitch.map:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions_geneticmapconvert": [ + "versions": [ "versions.yml:md5,9e24243bcd40c742e200d57da746a8b9" ] }, @@ -508,10 +508,10 @@ } } ], - "timestamp": "2026-03-26T17:39:08.936478779", + "timestamp": "2026-04-21T17:20:09.156535388", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.02.0" + "nf-test": "0.9.5", + "nextflow": "25.10.4" } } } \ No newline at end of file diff --git a/modules/nf-core/custom/geneticmapconvert/tests/nextflow.config b/modules/nf-core/custom/geneticmapconvert/tests/nextflow.config index fbad009f..f1dda24f 100644 --- a/modules/nf-core/custom/geneticmapconvert/tests/nextflow.config +++ b/modules/nf-core/custom/geneticmapconvert/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: CUSTOM_GENETICMAPCONVERT { - ext.args = { params.geneticmapconvert_args ?: "" } + ext.args = params.module_args } } From bab06ce3ea76efdcdd198a5eb9adae26ee866c55 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 15:56:23 -0300 Subject: [PATCH 24/28] update snapshots --- conf/steps/initialisation.config | 3 ++- workflows/phaseimpute/tests/test_all.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_beagle5.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_glimpse1.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_glimpse2.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_minimac4.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_panelprep.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_quilt.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_quilt2.nf.test.snap | 8 ++------ workflows/phaseimpute/tests/test_stitch.nf.test.snap | 4 +--- workflows/phaseimpute/tests/test_validate.nf.test.snap | 8 +++----- 11 files changed, 15 insertions(+), 36 deletions(-) diff --git a/conf/steps/initialisation.config b/conf/steps/initialisation.config index 230bd91d..1d840f4f 100644 --- a/conf/steps/initialisation.config +++ b/conf/steps/initialisation.config @@ -30,7 +30,8 @@ process { publishDir = [ path: { "${params.outdir}/initialisation/map_convertion" }, mode: params.publish_dir_mode, - enabled: params.publish_all + enabled: params.publish_all, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } diff --git a/workflows/phaseimpute/tests/test_all.nf.test.snap b/workflows/phaseimpute/tests/test_all.nf.test.snap index 00f188dc..f9006d8e 100644 --- a/workflows/phaseimpute/tests/test_all.nf.test.snap +++ b/workflows/phaseimpute/tests/test_all.nf.test.snap @@ -1281,7 +1281,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -1586,7 +1585,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,30837458e3113f0dd225e3255f3f60ec", @@ -1991,7 +1989,7 @@ } } ], - "timestamp": "2026-05-17T14:01:40.731382998", + "timestamp": "2026-05-17T15:26:51.457738966", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_beagle5.nf.test.snap b/workflows/phaseimpute/tests/test_beagle5.nf.test.snap index e663f2e2..3f3b986a 100644 --- a/workflows/phaseimpute/tests/test_beagle5.nf.test.snap +++ b/workflows/phaseimpute/tests/test_beagle5.nf.test.snap @@ -275,7 +275,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -340,7 +339,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,82c393ee7646f820b76e6bf60e5824c4", @@ -403,7 +401,7 @@ } } ], - "timestamp": "2026-05-17T14:07:12.512029555", + "timestamp": "2026-05-17T15:33:01.814250242", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap b/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap index dc48f9bd..29e89969 100644 --- a/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap +++ b/workflows/phaseimpute/tests/test_glimpse1.nf.test.snap @@ -431,7 +431,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -509,7 +508,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,bda6ac9e13fd82da1c4725287937332d", @@ -615,7 +613,7 @@ } } ], - "timestamp": "2026-05-17T14:10:38.598193973", + "timestamp": "2026-05-17T15:36:32.947739141", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap b/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap index 3d8433c8..1723842c 100644 --- a/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_glimpse2.nf.test.snap @@ -88,7 +88,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -156,7 +155,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,9c74a2688bd333bd9c9919476514258e", @@ -213,7 +211,7 @@ } } ], - "timestamp": "2026-05-17T14:12:21.250835903", + "timestamp": "2026-05-17T15:38:21.333264051", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_minimac4.nf.test.snap b/workflows/phaseimpute/tests/test_minimac4.nf.test.snap index d211f99a..c46915b3 100644 --- a/workflows/phaseimpute/tests/test_minimac4.nf.test.snap +++ b/workflows/phaseimpute/tests/test_minimac4.nf.test.snap @@ -85,7 +85,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -150,7 +149,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,a6ea6fbba2fab0d9899174bee8f9c68f", @@ -213,7 +211,7 @@ } } ], - "timestamp": "2026-05-17T14:14:20.802213098", + "timestamp": "2026-05-17T15:40:08.659438195", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_panelprep.nf.test.snap b/workflows/phaseimpute/tests/test_panelprep.nf.test.snap index c0f2702e..015d1de7 100644 --- a/workflows/phaseimpute/tests/test_panelprep.nf.test.snap +++ b/workflows/phaseimpute/tests/test_panelprep.nf.test.snap @@ -63,7 +63,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -135,7 +134,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", @@ -215,7 +213,7 @@ ] } ], - "timestamp": "2026-05-17T14:16:17.989373968", + "timestamp": "2026-05-17T15:42:03.129095578", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_quilt.nf.test.snap b/workflows/phaseimpute/tests/test_quilt.nf.test.snap index 33cb22d3..883739dc 100644 --- a/workflows/phaseimpute/tests/test_quilt.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt.nf.test.snap @@ -326,7 +326,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -394,7 +393,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,21bcf26940e91c117b1b35f5dff864d6", @@ -475,7 +473,7 @@ } } ], - "timestamp": "2026-05-17T14:19:02.290787063", + "timestamp": "2026-05-17T15:44:52.397915558", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap index c50d1b65..d436be73 100644 --- a/workflows/phaseimpute/tests/test_quilt2.nf.test.snap +++ b/workflows/phaseimpute/tests/test_quilt2.nf.test.snap @@ -328,7 +328,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -396,7 +395,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", @@ -469,7 +467,7 @@ } } ], - "timestamp": "2026-05-17T13:55:00.045383039", + "timestamp": "2026-05-17T15:50:32.683959431", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" @@ -576,7 +574,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -644,7 +641,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,46f8e72c54af931bec43d50346e6c245", @@ -725,7 +721,7 @@ } } ], - "timestamp": "2026-05-17T13:49:55.979625287", + "timestamp": "2026-05-17T15:47:44.884305878", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_stitch.nf.test.snap b/workflows/phaseimpute/tests/test_stitch.nf.test.snap index 24a3428b..1820c115 100644 --- a/workflows/phaseimpute/tests/test_stitch.nf.test.snap +++ b/workflows/phaseimpute/tests/test_stitch.nf.test.snap @@ -93,7 +93,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -161,7 +160,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,20f016802c19197bd01da87b5bebf585", @@ -218,7 +216,7 @@ } } ], - "timestamp": "2026-05-17T14:21:56.428487551", + "timestamp": "2026-05-17T15:52:20.555565947", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/workflows/phaseimpute/tests/test_validate.nf.test.snap b/workflows/phaseimpute/tests/test_validate.nf.test.snap index 23fc79f6..1f212f4e 100644 --- a/workflows/phaseimpute/tests/test_validate.nf.test.snap +++ b/workflows/phaseimpute/tests/test_validate.nf.test.snap @@ -69,7 +69,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -205,7 +204,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,3e44e7e7b563c851f28837f25baf90b9", @@ -262,10 +260,10 @@ ] } ], - "timestamp": "2026-03-30T12:27:20.192868283", + "timestamp": "2026-05-17T15:53:59.562475625", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file From 6cf3ef8ed6a94daa3b10d2f2b7c66b8f20bed136 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Sun, 17 May 2026 16:10:11 -0300 Subject: [PATCH 25/28] update snapshot --- tests/default.nf.test.snap | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 9fa8840c..e38073ff 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -14,10 +14,10 @@ "bcftools": "1.22" }, "BCFTOOLS_INDEX_LIGATE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_INDEX_PHASE": { - "bcftools": "1.22" + "bcftools": "1.23.1" }, "BCFTOOLS_MERGE": { "bcftools": "1.22" @@ -133,7 +133,6 @@ "initialisation/map_convertion/1000GP_chr22.minimac.map", "initialisation/map_convertion/1000GP_chr22.plink.map", "initialisation/map_convertion/1000GP_chr22.stitch.map", - "initialisation/map_convertion/versions.yml", "initialisation/prepare_genome", "initialisation/prepare_genome/GRCh38.s.fa.gz.fai", "initialisation/prepare_genome/GRCh38.s.fa.gz.gzi", @@ -211,7 +210,6 @@ "1000GP_chr22.minimac.map:md5,1e35445b12cf8dfb1273500223a43a70", "1000GP_chr22.plink.map:md5,b70cfdef56d870e575ab29374cfcecd3", "1000GP_chr22.stitch.map:md5,24eea1320e0033b04c5f5e6fb7cff690", - "versions.yml:md5,6d9f679747546761ee89b6411223830d", "GRCh38.s.fa.gz.fai:md5,4f4e0ff133e7a05cb469e345f766ca8c", "GRCh38.s.fa.gz.gzi:md5,09046d9646db2cc5c425f231ce4595d7", "bcftools-stats-subtypes.txt:md5,bda6ac9e13fd82da1c4725287937332d", @@ -317,10 +315,10 @@ } } ], - "timestamp": "2026-03-30T22:37:34.20438651", + "timestamp": "2026-05-17T16:07:33.820698383", "meta": { - "nf-test": "0.9.4", - "nextflow": "26.03.1" + "nf-test": "0.9.5", + "nextflow": "26.04.0" } } } \ No newline at end of file From ced88e56b0f737bc4bd10e4454d96545c2697eba Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 19 May 2026 13:06:54 -0300 Subject: [PATCH 26/28] fix docs --- docs/usage.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index ee95c391..ae92b08f 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -484,15 +484,15 @@ Optionnaly you can provide the following flags: #### Summary table of mandatory (m) and optional (o) parameters in `--steps impute` -| | `--steps impute`(m) | `--input`(m) | `--genome` or `--fasta`(m) | `--panel`(m) | `--posfile`(m) | `--map`(o) | `--chunks`(o) | -| ---------- | ------------------- | ------------ | -------------------------- | ------------ | -------------- | ---------- | ------------- | +| | `--steps impute`(m) | `--input`(m) | `--genome` or `--fasta`(m) | `--panel`(m) | `--posfile`(m/o) | `--map`(o) | `--chunks`(o) | +| ---------- | ------------------- | ------------ | -------------------------- | ------------ | ---------------- | ---------- | ------------- | | `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | ✅ | ✅ | | `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | | `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ✅ | ✅ | -| `QUILT2` | ✅ | ✅ ² | ✅ | ✅ | ❌ | ✅ | ✅ | +| `QUILT2` | ✅ | ✅ ² | ✅ | ✅ ⁵ | ❌ | ✅ | ✅ | | `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ | | `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | -| `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁵ | ✅ | ✅ | +| `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁶ | ✅ | ✅ | > ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF) > ² Alignment files only (i.e. BAM or CRAM) From f9377dbf9760afe3157daa4083d6d7400c36ece5 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 19 May 2026 13:08:36 -0300 Subject: [PATCH 27/28] remove duplicate config --- conf/test_quilt2.config | 6 ------ 1 file changed, 6 deletions(-) diff --git a/conf/test_quilt2.config b/conf/test_quilt2.config index 0dc331e9..c124fbf8 100644 --- a/conf/test_quilt2.config +++ b/conf/test_quilt2.config @@ -16,12 +16,6 @@ process { memory: '4.GB', time: '1.h' ] - - withName: 'NFCORE_PHASEIMPUTE:PHASEIMPUTE:BAM_IMPUTE_QUILT2:QUILT_QUILT2' { - cpus = 1 - ext.args = {"--seed=${params.seed} --use_mspbwt=TRUE --impute_rare_common=FALSE" } - ext.prefix = { "${meta.id}.batch${meta.batch}.${meta.regionout ? meta.regionout.replace(':','_') : meta.chr}.quilt2" } - } } params { From 50f062d4ff6cd983d47461272cbc31443fe00a3b Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Tue, 19 May 2026 13:16:01 -0300 Subject: [PATCH 28/28] fix linting --- docs/usage.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index ae92b08f..6481534d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -486,13 +486,13 @@ Optionnaly you can provide the following flags: | | `--steps impute`(m) | `--input`(m) | `--genome` or `--fasta`(m) | `--panel`(m) | `--posfile`(m/o) | `--map`(o) | `--chunks`(o) | | ---------- | ------------------- | ------------ | -------------------------- | ------------ | ---------------- | ---------- | ------------- | -| `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | ✅ | ✅ | -| `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | -| `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ✅ | ✅ | -| `QUILT2` | ✅ | ✅ ² | ✅ | ✅ ⁵ | ❌ | ✅ | ✅ | -| `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ | -| `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | -| `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁶ | ✅ | ✅ | +| `GLIMPSE1` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ³ | ✅ | ✅ | +| `GLIMPSE2` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | +| `QUILT` | ✅ | ✅ ² | ✅ | ❌ | ✅ ⁴ | ✅ | ✅ | +| `QUILT2` | ✅ | ✅ ² | ✅ | ✅ ⁵ | ❌ | ✅ | ✅ | +| `STITCH` | ✅ | ✅ ² | ✅ | ❌ | ✅ ³ | ✅ | ✅ | +| `BEAGLE5` | ✅ | ✅ ¹ | ✅ | ✅ | ❌ | ✅ | ✅ | +| `MINIMAC4` | ✅ | ✅ ¹ | ✅ | ✅ | ✅ ⁶ | ✅ | ✅ | > ¹ Alignment files as well as variant calling format (i.e. BAM, CRAM, VCF or BCF) > ² Alignment files only (i.e. BAM or CRAM)