ScaleRna/nextflow.config at master · ScaleBio/ScaleRna · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
manifest {
  name = 'ScaleRna'
  version = '2.1.0'
  description = 'ScaleBio Seq Suite: RNA workflow'
  homePage = 'https://scale.bio'
}

includeConfig 'modules/create_mtx.config'
includeConfig 'modules/downstream.config'
includeConfig 'modules/scale_plex.config'
includeConfig 'modules/internal.config'

//// Parameter defaults; Can be set at workflow runtime on the nextflow command-line
// See nextflow_schema.json for description
params {
    //// Sample sheet, i.e. path to samples.csv (required!)
    samples = null

    //// Reference genome.json (required!)
    genome = null

    //// Sequencing data input.
    // For alignment, either a runfolder (BCL) or a directory with fastq files is required!
    runFolder = null
    fastqDir = null
    fastqSamplesheet = null // Optional samplesheet for bcl-convert, replacing the auto-generated one
    ultimaCramDir = null // For QuantumScale data sequenced on Ultima
    // 'Reporting'-workflow, i.e. starting from existing alignment results
    reporting = false // Run only cell-filtering and reporting steps, with optional sample merging
    resultDir = null // Output directory for previous pipeline run (to combine multiple runs, specify in samples.csv)

    //// Output options
    outDir = "ScaleRna.out" // Workflow output directory
    bamOut = false // Set to false to skip BAM output from STAR
    bcParserBamOut = false // Set to true to publish unaligned BAMs from bcParser
    fastqOut = false // set to true to publish bcl converted fastq files to outDir

    //// Library structure (barcode locations and sequences).
    // Can be absolute paths or relative to ${projectDir}/references/
    // QuantumScale RNA: "libQuantumV1.0.json"
    // Scale RNA v1.1: "libV1.1.json"
    // Scale RNA v1.0: "libV1.json"
    libStructure = "libQuantumV1.0.json"

    // Enable ScalePlex integration
    scalePlex = false
    // 3L kit: "scaleplexlibV1.json"
    // QuantumScale kit: "scaleplexlibQuantumV1.0.json"
    scalePlexLibStructure = "scaleplexlibQuantumV1.0.json"
    // ScalePlex PCR to RNA PCR mapping for merging ScalePlex library with RNA library
    // Default mentioned in scaleplexlibQuantumV1.json, setting this param overrides the default
    scalePlexToRnaMapping = null

    //// Optional workflow parameters
    merge = true // Generate merged sample outputs across libraries / plates
    bclConvertParams = "" //"--bcl-only-lane 1"
    fastqc = true // Run fastqc on input fastqs

    starFeature = "GeneFull_Ex50pAS" // What read to transcript overlaps STAR counts (--soloFeatures)
    starMulti = "PropUnique" // How to handle reads matching multiple genes (--soloMultiMappers)
    starMultiBarnyard = "Unique" // How to handle reads matching multiple genes for barnyard datasets. (--soloMultiMappers)
    starStrand = "Forward" // Strandedness of RNA reads relative to annotated genes
    starMaxLoci = 6 // Maximum number of Loci multimapping reads can map (--outFilterMultimapNmax)
    roundCounts = false // Round UMI counts to nearest integer

    starTrimming = "" // Trimming in STAR (in addition to cutadapt)

    //// Cell Thresholding parameters
    // See modules/createMtx.config for more options
    minUTC = 100 // Minimum transcript count to consider a barcode as a potential cell
    cellFinder = true // Compare cell-barcode expression profiles to background to call more cells

    fixedCells = false // Call the top "expectedCells" many barcodes per sample as cells (set in samples.csv)
    UTC = 0 // Set a fixed threshold above which all barcodes are called (minUTC < X < UTC go to CellFinder if enabled)

    //// Resources and parallelization
    splitFastq = true // Split fastqs by PCR and RT wells for increased parallelization
    // Max. resources that can be requested for a single task / process
    taskMaxMemory = 256.GB
    taskMaxCpus = 16
    taskMaxTime = 48.h
    // Number of RT barcodes per STAR job when using splitFastq
    rtBarcodesPerStarJob = 4
    // Number of bcParser jobs
    bcParserJobsPerLibName = 16
    // Number of total fastqc jobs
    totalFastqcJobs = 100
    // Number of files (input index2 fastq / input ucram) batched into a single LibraryDetection process
    filesPerLibDetection = 32

    //// Downstream Analysis
    // Enable preliminary seurat clustering and azimuth cell-type classification
    seurat = false
    azimuth = false
    compSamples = false
    // Enables output of UMI count matrices in annData format
    annData = false

    testRun = false // Disable some validations for small test-runs
    help = false
}

process {
    // Default errorStrategy is to retry if process exit code is 137, 138, 139 or 140 or if it is the first or second task attempt
    errorStrategy = { ((task.attempt) <= 2 && (task.exitStatus) != 42) || (task.exitStatus in 137..140) ? 'retry' : 'terminate' }
    maxRetries = 3

    cpus = { max_cpu(2) }
    memory = { max_mem(4.GB * task.attempt) }
    time = params.taskMaxTime

    container = "public.ecr.aws/o5l3p3e4/scalerna:2025-04-16-160428"

    withLabel:small {
        cpus = 1
        memory = { max_mem(2.GB * task.attempt) }
    }
    withLabel:large {
        cpus = { max_cpu(5) }
        memory = { max_mem(30.GB * task.attempt) }
    }
    withLabel:optional {
      errorStrategy = { (task.attempt) <= 2 || (task.exitStatus in 137..140) ? 'retry' : 'ignore' }
    }
    withName:BclConvert {
        container = 'nfcore/bclconvert:3.9.3'
        cpus = { max_cpu(16) }
        memory = { max_mem(120.GB * task.attempt) }
    }
    withName:TrimFq {
	    cpus = { max_cpu(8 * task.attempt) }
	    memory = { max_mem(8.GB * task.attempt) }
    }
    withName:BarcodeParser {
        cpus = { max_cpu(5) }
        memory = { max_mem(6.GB * task.attempt) }
    }
    withName:StarSolo {
        cpus = { max_cpu(16) }
        memory = { max_mem(60.GB * task.attempt) }
    }
    withLabel:report {
        container = "public.ecr.aws/o5l3p3e4/scalernareport:2025-04-16-161858"
        cpus = { max_cpu(5) }
        memory = { max_mem(30.GB * task.attempt) }
    }
}
profiles {
  conda {
    conda.enabled = true
    process.conda = "$projectDir/envs/scalerna.conda.yml"
    process {
      withLabel:report {
        conda = "$projectDir/envs/scalernareport.conda.yml"
      }
    }
  }
  docker {
    // Shared settings for all container engines go here
    docker.enabled = true
    docker.fixOwnership = true
    // docker.runOptions = '-u $(id -u):$(id -g)' // Alt. to fixOwnership; match user ID in container to outside
  }
  singularity {
    singularity.enabled = true
    singularity.autoMounts = true
    docker.enabled = false
  }
  podman {
    podman.enabled = true
    docker.enabled = false
  }
}
conda.createTimeout = '1 h'

// nf-core functions to ensure that resource requirements don't go
// beyond a maximum limit
def max_mem(obj) {
    if (obj.compareTo(params.taskMaxMemory as nextflow.util.MemoryUnit) == 1)
        return params.taskMaxMemory as nextflow.util.MemoryUnit
    else
        return obj
}
def max_cpu(obj) {
    return Math.min(obj as int, params.taskMaxCpus as int)
}