Skip to content
76 changes: 76 additions & 0 deletions subworkflows/nf-core/bam_impute_quilt2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2/main'
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main'
include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main'

workflow BAM_IMPUTE_QUILT2 {
take:
ch_input // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ]
ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ]
ch_chunks // channel (optional): [ meta, chr, start, end ]
ch_map // channel (optional): [ meta, genetic_map ]
ch_fasta // channel (optional): [ meta, fasta, fai ]
n_gen // integer: Number of generations since founding or mixing
buffer // integer: Buffer of region to perform imputation over

main:


ch_parameters = ch_reference_panel
.combine(ch_map, by: 0)
.combine(ch_chunks, by: 0)

ch_parameters.ifEmpty {
error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.")
}

ch_bam_params = ch_input
.combine(ch_parameters)
.map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end ->
def regionout = "${chr}"
if (start != [] && end != []) {
regionout = "${chr}:${start}-${end}"
}

[
meta_panel + meta_input + [regionout: regionout],
bam,
bai,
bampath,
bamname,
reference_vcf,
reference_index,
[],
[],
[],
chr,
start,
end,
n_gen,
buffer,
genetic_map,
]
}

QUILT_QUILT2(ch_bam_params, ch_fasta)

ligate_input = QUILT_QUILT2.out.vcf
.join(QUILT_QUILT2.out.tbi)
.map { meta, vcf, index ->
def keys_to_keep = meta.keySet() - ['regionout']
[meta.subMap(keys_to_keep), vcf, index]
}
.groupTuple()

GLIMPSE2_LIGATE(ligate_input)

BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants)

ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join(
BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi),
failOnMismatch: true,
failOnDuplicate: true,
)

emit:
vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ]
}
128 changes: 128 additions & 0 deletions subworkflows/nf-core/bam_impute_quilt2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "bam_impute_quilt2"
description: |
Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome. 'regionout' key will be used to store temporarily the region and therefore shouldn't be used in the meta maps.
keywords:
- bam
- cram
- imputation
- quilt
- quilt2
- vcf
components:
- quilt/quilt2
- glimpse2/ligate
- bcftools/index
input:
- ch_input:
description: |
Channel with target sequencing data and optional rename files.
structure:
- meta:
type: map
description: Metadata map for the input batch.
- bam_or_cram:
type: file
description: Input BAM or CRAM file, or a list of BAM or CRAM files.
pattern: "*.{bam,cram}"
- index:
type: file
description: Index for the BAM or CRAM input.
pattern: "*.{bai,crai}"
- bampaths:
type: file
description: |
Optional text file listing BAM or CRAM paths to impute.
One file per line.
pattern: "*.{txt,tsv}"
- bamnames:
type: file
description: |
Optional text file listing sample names in the same order as `bampaths`.
One file per line.
pattern: "*.{txt,tsv}"
- ch_reference_panel:
description: |
Channel with phased reference panel VCFs per chromosome.
structure:
- meta:
type: map
description: |
Metadata map that will be combined with the input metadata.
Must include `id` for the panel name and `chr` for the chromosome.
- vcf:
type: file
description: Reference panel VCF or BCF.
pattern: "*.{vcf,vcf.gz,bcf}"
- index:
type: file
description: Index for the reference panel VCF or BCF.
pattern: "*.{tbi,csi}"
- ch_chunks:
description: |
Channel with optional imputation chunks per chromosome.
structure:
- meta:
type: map
description: Metadata map with the panel id and chromosome.
- chr:
type: string
description: Chromosome name.
- start:
type: integer
description: Start position of the chunk.
- end:
type: integer
description: End position of the chunk.
- ch_map:
description: |
Channel with optional genetic maps per chromosome.
structure:
- meta:
type: map
description: Metadata map with the panel id and chromosome.
- map:
type: file
description: Genetic map used by QUILT2.
pattern: "*.{txt,map}{,gz}"
- ch_fasta:
description: |
Channel with optional reference FASTA data, required for CRAM inputs.
structure:
- meta:
type: map
description: Metadata map for the reference genome.
- fasta:
type: file
description: Reference genome FASTA.
pattern: "*.{fa,fasta}"
- fai:
type: file
description: FASTA index file.
pattern: "*.fai"
- n_gen:
type: integer
description: Number of generations since founding or mixing.
- buffer:
type: integer
description: Buffer of region to perform imputation over.
output:
- vcf_index:
description: |
Imputed and indexed VCFs after ligating chunked outputs per chromosome.
structure:
- meta:
type: map
description: Metadata map combined from the input batch and panel metadata.
- vcf:
type: file
description: Imputed VCF file.
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
- index:
type: file
description: Index for the imputed VCF or BCF file.
pattern: "*.{tbi,csi}"
authors:
- "@atrigila"
maintainers:
- "@atrigila"
Loading
Loading