nf-core · LuisHeinzlmeier · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026 · Feb 13, 2026
diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml
@@ -27,7 +27,9 @@ env:
 jobs:
   nf-test-changes:
     name: nf-test-changes
-    runs-on: ubuntu-latest
+    runs-on: # use self-hosted runners
+      - runs-on=${{ github.run_id }}-nf-test-changes
+      - runner=4cpu-linux-x64
     outputs:
       shard: ${{ steps.set-shards.outputs.shard }}
       total_shards: ${{ steps.set-shards.outputs.total_shards }}
@@ -59,7 +61,9 @@ jobs:
     name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}"
     needs: [nf-test-changes]
     if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }}
-    runs-on: ubuntu-latest
+    runs-on: # use self-hosted runners
+      - runs-on=${{ github.run_id }}-nf-test
+      - runner=4cpu-linux-x64
     strategy:
       fail-fast: false
       matrix:
@@ -115,7 +119,9 @@ jobs:
   confirm-pass:
     needs: [nf-test]
     if: always()
-    runs-on: ubuntu-latest
+    runs-on: # use self-hosted runners
+      - runs-on=${{ github.run_id }}-confirm-pass
+      - runner=2cpu-linux-x64
     steps:
       - name: One or more tests failed (excluding latest-everything)
         if: ${{ contains(needs.*.result, 'failure') }}

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@
 
 [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/hadge)
 [![GitHub Actions CI Status](https://github.com/nf-core/hadge/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/hadge/actions/workflows/nf-test.yml)
-[![GitHub Actions Linting Status](https://github.com/nf-core/hadge/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/hadge/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hadge/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![GitHub Actions Linting Status](https://github.com/nf-core/hadge/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/hadge/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hadge/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.10634731-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.10634731)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
 [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)
@@ -102,7 +102,6 @@ We thank the following people for their extensive assistance in the development
 - [Luis Heinzlmeier](https://github.com/LuisHeinzlmeier)
 - [Nico Trummer](https://github.com/nictru)
 - [Seo Hyon Kim](https://github.com/seohyonkim)
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
 
 ## Contributions and Support
 
@@ -112,10 +111,31 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 
 ## Citations
 
-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use nf-core/hadge for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
+If you use nf-core/hadge for your analysis, please cite it as follows:
 
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
+> **hadge: a comprehensive pipeline for donor deconvolution in single-cell studies.**
+>
+> Fabiola Curion, Xichen Wu, Lukas Heumos, Mariana Gonzales Andre, Lennard Halle, Melissa Grant-Peters, Charlotte Rich-Griffin, Hing-Yuen Yeung, Calliope A. Dendrou, Herbert B. Schiller & Fabian J. Theis.
+>
+> _Genome Biol._ 2024 Apr 26. doi: [10.1186/s13059-024-03249-z](https://doi.org/10.1186/s13059-024-03249-z).
+
+<details><summary>BibTeX</summary>
+
+```bibtex
+@article{curion2024hadge,
+  title={hadge: a comprehensive pipeline for donor deconvolution in single-cell studies},
+  author={Curion, Fabiola and Wu, Xichen and Heumos, Lukas and Andr{\'e}, Mylene Mariana Gonzales and Halle, Lennard and Ozols, Matiss and Grant-Peters, Melissa and Rich-Griffin, Charlotte and Yeung, Hing-Yuen and Dendrou, Calliope A and others},
+  journal={Genome Biology},
+  volume={25},
+  number={1},
+  pages={109},
+  year={2024},
+  publisher={Springer}
+}
+
+```
+
+</details>
 
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 

diff --git a/bin/update_snapshots.sh b/bin/update_snapshots.sh
@@ -24,7 +24,7 @@ for test_file in "${test_files[@]}"; do
         test_profile="test"
     fi
 
-    command="nf-test test tests/${test_file}.nf.test --profile ${test_profile},docker --update-snapshot"
+    command="nf-test test tests/${test_file}.nf.test --profile ${test_profile},apptainer --update-snapshot"
 
     echo "Updating snapshot for: $test_file"
     echo "Running: ${command}"
@@ -36,7 +36,7 @@ for test_file in "${test_files[@]}"; do
     # test if testing is consistent
     if [[ "$CHECK_CONSISTENCY" == "true" ]]; then
         echo "Re-running test to verify snapshot consistency for: $test_file"
-        command="nf-test test tests/${test_file}.nf.test --profile ${test_profile},docker"
+        command="nf-test test tests/${test_file}.nf.test --profile ${test_profile},apptainer"
         echo "Running: ${command}"
         eval "$command"
         echo "✓ Consistency check passed for: $test_file"

diff --git a/conf/test.config b/conf/test.config
@@ -24,12 +24,12 @@ params {
 
     // Input data
     mode               = 'rescue'
-    hash_tools         = 'hasheddrops,bff,gmm-demux'
+    hash_tools         = 'htodemux,demuxem,bff'
     genetic_tools      = 'freemuxlet,vireo,souporcell'
     input              = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/samplesheet/samplesheet_rescue.csv'
     genome             = 'GRCh38'
     fasta              = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta'
-    bam_qc             = true
+    outdir             = "outdir_test"
 
     // all possible modules
     // hash_tools         = 'htodemux,hasheddrops,multiseq,demuxem,gmm-demux,bff,hashsolo'

diff --git a/conf/test_donor_match.config b/conf/test_donor_match.config
@@ -25,8 +25,9 @@ params {
     // Input data
     mode                    = 'donor_match'
     input                   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/samplesheet/samplesheet_donor_match.csv'
-    demultiplexing_result   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/testdata/donor_match_assignment.csv'
-    vireo_filtered_variants = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/testdata/donor_match_filtered_variants.tsv'
-    cell_genotype           = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/testdata/donor_match.cells.vcf.gz'
-    gt_donors               = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/testdata/donor_match_GT_donors.vireo.vcf.gz'
+    demultiplexing_result   = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/dataset_test/donor_match_assignment.csv'
+    vireo_filtered_variants = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/dataset_test/donor_match_filtered_variants.tsv'
+    cell_genotype           = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/dataset_test/donor_match.cells.vcf.gz'
+    gt_donors               = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/dataset_test/donor_match_GT_donors.vireo.vcf.gz'
+    outdir                  = "outdir_test_donor_match"
 }
diff --git a/conf/test_full.config b/conf/test_full.config
@@ -10,15 +10,31 @@
 ----------------------------------------------------------------------------------------
 */
 
+process {
+    resourceLimits = [
+        cpus: 60,
+        memory: '64.GB',
+        time: '23.h'
+    ]
+
+    withName: SOUPORCELL {
+        cpus = 60
+        memory = 64.GB
+        time = 8.h
+    }
+}
+
 params {
     config_profile_name        = 'Full test profile'
     config_profile_description = 'Full test dataset to check pipeline function'
 
-    // Input data for full size test
-    // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
-    // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
-
-    // Genome references
-    genome = 'R64-1-1'
+    // Input data
+    mode               = 'rescue'
+    hash_tools         = 'htodemux,hasheddrops,multiseq,demuxem,gmm-demux,bff,hashsolo'
+    genetic_tools      = 'demuxlet,freemuxlet,vireo,souporcell'
+    input              = '/lustre/groups/ml01/code/luis.heinzlmeier/hadge/conf/test_full_new.csv'
+    genome             = 'GRCh38'
+    bam_qc             = false
+    find_variants      = false
+    outdir             = "outdir_test_full"
 }
diff --git a/conf/test_genetic.config b/conf/test_genetic.config
@@ -28,5 +28,5 @@ params {
     input         = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/samplesheet/samplesheet_genetic.csv'
     genome        = 'GRCh38'
     fasta         = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/sequence/genome.fasta'
-    bam_qc        = true
+    outdir        = "outdir_test_genetic"
 }
diff --git a/conf/test_hashing.config b/conf/test_hashing.config
@@ -24,8 +24,7 @@ params {
 
     // Input data
     mode       = 'hashing'
-    hash_tools = 'htodemux,hasheddrops,multiseq,gmm-demux,bff,hashsolo'
+    hash_tools = 'htodemux,hasheddrops,multiseq,demuxem,gmm-demux,bff,hashsolo'
     input      = 'https://github.com/nf-core/test-datasets/raw/refs/heads/hadge/samplesheet/samplesheet_hashing.csv'
-
-    // TODO demuxem: include demuxem to hash_tools if #81 is fixed
+    outdir     = "outdir_test_hashing"
 }
diff --git a/docs/usage.md b/docs/usage.md
@@ -41,8 +41,8 @@ Finally, it assigns SNPs to cells to determine donor identity but requires addit
 
 ```csv title="samplesheet.csv"
 sample,bam,vcf,n_samples,barcodes
-id1,donor_genotype_chr21.vcf,2,barcodes.tsv
-id2,donor_genotype_chr21.vcf,2,barcodes.tsv
+id1,chr21.bam,donor_genotype_chr21.vcf,2,barcodes.tsv
+id2,chr21.bam,donor_genotype_chr21.vcf,2,barcodes.tsv
 id3,chr21.bam,donor_genotype_chr21.vcf,2,barcodes.tsv
 ```
 
@@ -142,7 +142,7 @@ id3,rna.tar.gz,hto.tar.gz,chr21.bam,donor_genotype_chr21.vcf,2,barcodes.tsv
 | `rna_matrix` | Full path to the RNA-Seq count matrices provided in a 10x Genomics format and compressed as `.tar.gz`.                                                                                 |
 | `hto_matrix` | Full path to the hashing count matrices provided in a 10x Genomics format and compressed as `.tar.gz`.                                                                                 |
 | `bam`        | Full path to the alignment file (`.bam`).                                                                                                                                              |
-| `vcf`        | Full path to the list of common SNPs (`.vcf`).                                                                                                                                         |
+| `vcf`        | Full path to common SNP genotypes vcf (`.vcf`).                                                                                                                                        |
 | `n_samples`  | The number of multiplexed donors.                                                                                                                                                      |
 | `barcodes`   | Full path to the list of cell barcodes (e.g., `barcodes.tsv` from Cell Ranger)                                                                                                         |
 
@@ -155,21 +155,47 @@ id3,rna.tar.gz,hto.tar.gz,chr21.bam,donor_genotype_chr21.vcf,2,barcodes.tsv
 | hashing     |   ✅   |     ✅     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
 | donor_match |   ✅   |     ❌     |     ❌     | ❌  |    ❌    |    ✅     | ❌  |
 
-| Module      | sample | rna_matrix | hto_matrix | bam | barcodes | n_samples | vcf |
-| ----------- | :----: | :--------: | :--------: | :-: | :------: | :-------: | :-: |
-| htodemux    |   ✅   |     ✅     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| multiseq    |   ✅   |     ✅     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| bff         |   ✅   |     ❌     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| demuxem     |   ✅   |     ✅     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| gmm-demux   |   ✅   |     ❌     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| hasheddrops |   ✅   |    ✅\*    |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| hashsolo    |   ✅   |     ❌     |     ✅     | ❌  |    ❌    |    ❌     | ❌  |
-| vireo       |   ✅   |     ❌     |     ❌     | ✅  |    ✅    |    ✅     | ✅  |
-| demuxlet    |   ✅   |     ❌     |     ❌     | ✅  |    ❌    |    ❌     | ✅  |
-| freemuxlet  |   ✅   |     ❌     |     ❌     | ✅  |    ❌    |    ✅     | ✅  |
-| souporcell  |   ✅   |     ❌     |     ❌     | ✅  |    ✅    |    ✅     | ❌  |
-
-\* if `params.hasheddrops_runEmptyDrops` is true
+| Module      | sample |   rna_matrix   | hto_matrix | bam | barcodes | n_samples | vcf<sup>1</sup> |
+| ----------- | :----: | :------------: | :--------: | :-: | :------: | :-------: | :-------------: |
+| htodemux    |   ✅   |       ✅       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| multiseq    |   ✅   |       ✅       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| bff         |   ✅   |       ❌       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| demuxem     |   ✅   |       ✅       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| gmm-demux   |   ✅   |       ❌       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| hasheddrops |   ✅   | ✅<sup>2</sup> |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| hashsolo    |   ✅   |       ❌       |     ✅     | ❌  |    ❌    |    ❌     |       ❌        |
+| vireo       |   ✅   |       ❌       |     ❌     | ✅  |    ✅    |    ✅     |       ✅        |
+| demuxlet    |   ✅   |       ❌       |     ❌     | ✅  |    ❌    |    ❌     | ✅<sup>3</sup>  |
+| freemuxlet  |   ✅   |       ❌       |     ❌     | ✅  |    ❌    |    ✅     |       ✅        |
+| souporcell  |   ✅   |       ❌       |     ❌     | ✅  |    ✅    |    ✅     |       ❌        |
+
+<sup>1</sup> The requirements for the VCF file differ between genetic deconvolution methods.
+Check out [Demuxafy](https://demultiplexing-doublet-detecting-docs.readthedocs.io/en/latest/DemultiplexingSoftwares.html) to find the right VCF file for the methods you want to use.
+`POPSCLE_DSCPILEUP` (needed for `freemuxlet` and `demuxlet`) requires the VCF file to be sorted the same way as the BAM file. If you encounter an error due to this, consider using `picard SortVcf`.
+
+<sup>2</sup> if `params.hasheddrops_runEmptyDrops` is true
+
+<sup>3</sup> reference SNP genotypes for each individual ([demuxlet docs](https://demultiplexing-doublet-detecting-docs.readthedocs.io/en/latest/Demuxlet.html))
+
+:::
+
+:::tip{collapse title="Recommendations for naming HTO-labels and barcodes"}
+
+1. Avoid single DNA base letters as suffixes
+
+- **Incorrect:** `HTO-A`, `HTO-C`, `HTO-G`, `HTO-T`
+- **Reason:** The `BFF` module uses `cellhashR`'s `ProcessCountMatrix()`, which internally calls `SimplifyHtoNames()` and incorrectly strips single DNA base letters, collapsing `HTO-A`, `HTO-C`, `HTO-G` all to `HTO` and causing a crash.
+
+2. Avoid barcode sequences as part of the label
+
+- **Incorrect:** `HTO-1-ACTGTCTAACGG`
+- **Reason:** `SimplifyHtoNames()` strips the barcode suffix in `BFF`, causing the same HTO to appear as `HTO-1` in `BFF` output but `HTO-1-ACTGTCTAACGG` in other methods, making cross-method comparison unreliable.
+
+3. Avoid using the same trailing suffixes on all barcodes
+
+- **Incorrect:** `AAACCCAAGAAACACT-1` (`-1` at all barcodes)
+- **Reason:** In the `DEMUXEM` module, `pegasusio.read_input()` only removes the suffix from RNA barcodes, but not from HTO barcodes, which leads to a known issue (see [#21](https://github.com/lilab-bcb/demuxEM/issues/21)).
+
 :::
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.

diff --git a/main.nf b/main.nf
@@ -26,9 +26,6 @@ include { getGenomeAttribute      } from './subworkflows/local/utils_nfcore_hadg
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// TODO nf-core: Remove this line if you don't need a FASTA file
-//   This is an example of how to use getGenomeAttribute() to fetch parameters
-//   from igenomes.config using `--genome`
 params.fasta = getGenomeAttribute('fasta')
 
 /*

diff --git a/modules/local/create_anndata_mudata/templates/create_anndata_mudata.py b/modules/local/create_anndata_mudata/templates/create_anndata_mudata.py
@@ -1,5 +1,8 @@
 #!/usr/bin/env python3
 
+import os
+os.environ["NUMBA_CACHE_DIR"] = "./tmp/numba"
+
 # versions
 import platform
 import yaml

diff --git a/modules/local/dropletutils/mtxconvert/templates/convert.R b/modules/local/dropletutils/mtxconvert/templates/convert.R
@@ -4,7 +4,6 @@ library(DropletUtils)
 
 mtx_dir <- "${input_mtx_dir}"
 
-
 sce <- read10xCounts(mtx_dir) # Read to SingleCellExperiment object
 
 print(sce)
@@ -18,19 +17,6 @@ if ("${write_csv}" == "true") {
     write.csv(as.matrix(count_matrix), file = "${prefix}.csv", row.names = TRUE)
 }
 
-# TODO demuxem: remove if demuxEM issue is solved (https://github.com/theislab/hadge/issues/81)
-# Write to h5 file
-# write10xCounts(
-#   path        = "${prefix}.h5",
-#   x           = counts(sce),
-#   barcodes    = colData(sce)\$Barcode,
-#   gene.id     = rownames(sce),
-#   gene.symbol = if (!is.null(rowData(sce)\$Symbol)) rowData(sce)\$Symbol else rownames(sce),
-#   gene.type   = if (!is.null(rowData(sce)\$Type))   rowData(sce)\$Type   else rep("Gene Expression", nrow(sce)),
-#   type        = "HDF5",
-#   version     = "3",           # <-- ensures /matrix layout instead of /unknown
-#   overwrite   = TRUE
-# )
 write10xCounts("${prefix}.h5", count_matrix, type = "HDF5")
 
 ################################################