diff --git a/DESCRIPTION b/DESCRIPTION index 29d11ae1..87fd431e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -96,14 +96,17 @@ Collate: 'tupleSelectors.R' 'GwasFineMappingResult.R' 'H2Estimate.R' + 'JointGroup.R' 'LdBlocks.R' 'LdData.R' 'LdStatistic.R' 'LdEigen.R' 'LdScore.R' + 'MashPrior.R' 'QtlDataset.R' 'MultiStudyQtlDataset.R' 'QtlFineMappingResult.R' + 'SldscData.R' 'TwasWeightsEntry.R' 'causalInferencePipeline.R' 'colocPipeline.R' @@ -118,6 +121,7 @@ Collate: 'gwasSumStats.R' 'h2Annotations.R' 'h2EstimationWrappers.R' + 'jointEngine.R' 'jointSpecification.R' 'ld.R' 'mashPipeline.R' diff --git a/NAMESPACE b/NAMESPACE index 4a82e845..995c5163 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,10 +12,12 @@ export(GenotypeHandle) export(GwasFineMappingResult) export(GwasSumStats) export(LdData) +export(MashPrior) export(MultiStudyQtlDataset) export(QtlDataset) export(QtlFineMappingResult) export(QtlSumStats) +export(SldscData) export(TwasWeights) export(TwasWeightsEntry) export(adjustPips) @@ -86,6 +88,8 @@ export(fsusieGetCs) export(fsusieWeights) export(fsusieWrapper) export(getAf) +export(getAnnotCols) +export(getAnnotData) export(getAnnotationMeta) export(getAnnotations) export(getBaseline) @@ -96,7 +100,7 @@ export(getContexts) export(getCorrelation) export(getCs) export(getCtwasMetaData) -export(getCvPerformance) +export(getCvFits) export(getCvResult) export(getDataType) export(getEigenList) @@ -104,6 +108,8 @@ export(getEnrichment) export(getFineMappingResult) export(getFits) export(getFormat) +export(getFrqData) +export(getFullFit) export(getGenome) export(getGenotypeCovariates) export(getGenotypeHandle) @@ -149,6 +155,9 @@ export(getSusieFit) export(getSusieResult) export(getTauBlocks) export(getTopLoci) +export(getTraitNames) +export(getTraitRun) +export(getTraitRuns) export(getTraits) export(getTwasWeights) export(getVarY) @@ -198,6 +207,7 @@ export(mcpRssWeights) export(mcpWeights) export(mergeCtwasBoundaryRegions) export(mergeMashData) +export(mergeSusieCs) export(mergeVariantInfo) export(metaAnalysisPerCell) export(metaSldscRandom) @@ -224,6 +234,8 @@ export(raiss) export(readAfreq) export(readAnnotations) export(readGenotypes) +export(readSldscAnnot) +export(readSldscFrq) export(readSldscTrait) export(regionDataToIndInput) export(regionDataToRssInput) @@ -270,7 +282,9 @@ exportClasses(LdData) exportClasses(LdEigen) exportClasses(LdScore) exportClasses(LdStatistic) +exportClasses(MashPrior) exportClasses(MultiStudyQtlDataset) +exportClasses(SldscData) exportClasses(SumStatsBase) exportMethods(adjustPips) exportMethods(colocboostPipeline) @@ -278,6 +292,8 @@ exportMethods(computeLdScores) exportMethods(estimateH2) exportMethods(fineMappingPipeline) exportMethods(getAf) +exportMethods(getAnnotCols) +exportMethods(getAnnotData) exportMethods(getAnnotationMeta) exportMethods(getAnnotations) exportMethods(getBlockMetadata) @@ -285,7 +301,7 @@ exportMethods(getBlocks) exportMethods(getContexts) exportMethods(getCorrelation) exportMethods(getCs) -exportMethods(getCvPerformance) +exportMethods(getCvFits) exportMethods(getCvResult) exportMethods(getDataType) exportMethods(getEigenList) @@ -293,6 +309,8 @@ exportMethods(getEnrichment) exportMethods(getFineMappingResult) exportMethods(getFits) exportMethods(getFormat) +exportMethods(getFrqData) +exportMethods(getFullFit) exportMethods(getGenome) exportMethods(getGenotypeCovariates) exportMethods(getGenotypeHandle) @@ -336,6 +354,9 @@ exportMethods(getSumstatDf) exportMethods(getSusieFit) exportMethods(getTauBlocks) exportMethods(getTopLoci) +exportMethods(getTraitNames) +exportMethods(getTraitRun) +exportMethods(getTraitRuns) exportMethods(getTraits) exportMethods(getTwasWeights) exportMethods(getVarY) @@ -445,6 +466,7 @@ importFrom(tibble,tibble) importFrom(tictoc,tic) importFrom(tictoc,toc) importFrom(tidyr,separate) +importFrom(tidyselect,all_of) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) importFrom(utils,combn) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index db9f0c57..e435c458 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -495,15 +495,6 @@ setGeneric("getWeights", function(x, ...) standardGeneric("getWeights")) setGeneric("getStandardized", function(x, ...) standardGeneric("getStandardized")) -#' @title Get CV Performance -#' @description Extract cross-validation performance metrics. -#' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. -#' @param ... Class-specific selection arguments. -#' @return Method-specific (typically a list). -#' @export -setGeneric("getCvPerformance", - function(x, ...) standardGeneric("getCvPerformance")) - #' @title Get Model Fits #' @description Extract fitted model objects. #' @param x A \code{TwasWeightsEntry} or \code{TwasWeights}. @@ -512,6 +503,24 @@ setGeneric("getCvPerformance", #' @export setGeneric("getFits", function(x, ...) standardGeneric("getFits")) +#' @title Get the Full-Data Prior from a MashPrior +#' @description Accessor for the \code{fullFit} slot (the full-data data-driven +#' prior payload). +#' @param x A \code{MashPrior} object. +#' @param ... Unused. +#' @return The full-data prior payload, or \code{NULL}. +#' @export +setGeneric("getFullFit", function(x, ...) standardGeneric("getFullFit")) + +#' @title Get the Per-Fold Priors from a MashPrior +#' @description Accessor for the \code{cvFits} slot (per-fold priors + +#' \code{samplePartition}). +#' @param x A \code{MashPrior} object. +#' @param ... Unused. +#' @return The \code{cvFits} list, or \code{NULL}. +#' @export +setGeneric("getCvFits", function(x, ...) standardGeneric("getCvFits")) + #' @title Get Method Names #' @description Extract method names from a collection class. #' @param x A \code{FineMappingResult} or \code{TwasWeights} object. @@ -846,3 +855,68 @@ setGeneric("getTauBlocks", function(x) standardGeneric("getTauBlocks")) #' @return Numeric (length 1). #' @export setGeneric("getH2", function(x) standardGeneric("getH2")) + +# Internal generics for the unified joint-analysis engine (see R/JointGroup.R +# and dev/jointSpecification-s4-refactor.md). Not exported: the engine and its +# fitters are package-internal machinery. + +# fitJointGroup(group, pipeline, token, args) -- multiple dispatch on +# (JointGroup subclass, JointPipeline subclass). The 4 irreducible joint fits +# (individual/sumstats x fm/twas). Returns one fit entry (FineMappingEntry or +# TwasWeightsEntry). +setGeneric("fitJointGroup", + function(group, pipeline, token, args) standardGeneric("fitJointGroup")) + +# construct(pipeline, rows) -- assemble the per-pipeline result collection +# (QtlFineMappingResult vs TwasWeights) from accumulated joint rows. The joint +# row identity (which axes collapse to "joint" + jointStudies/Contexts/Traits) +# is derived from each group's `conditions` by the rows accumulator. +setGeneric("construct", + function(pipeline, rows, ...) standardGeneric("construct")) + +# ---- SldscData accessors ---- +#' @title Get the annotation table from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @return A \code{data.frame} of annotations (CHR, SNP, annotation columns). +#' @rdname getAnnotData +#' @export +setGeneric("getAnnotData", function(x) standardGeneric("getAnnotData")) + +#' @title Get the allele-frequency table from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @return A \code{data.frame} of reference-panel frequencies (SNP, MAF). +#' @rdname getFrqData +#' @export +setGeneric("getFrqData", function(x) standardGeneric("getFrqData")) + +#' @title Get the per-trait runs list from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @return The named list of per-trait \code{single}/\code{joint} runs. +#' @rdname getTraitRuns +#' @export +setGeneric("getTraitRuns", function(x) standardGeneric("getTraitRuns")) + +#' @title Get the trait names from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @return A character vector of trait names. +#' @rdname getTraitNames +#' @export +setGeneric("getTraitNames", function(x) standardGeneric("getTraitNames")) + +#' @title Get the annotation column names from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @return A character vector of annotation column names. +#' @rdname getAnnotCols +#' @export +setGeneric("getAnnotCols", function(x) standardGeneric("getAnnotCols")) + +#' @title Get one trait's run from an SldscData +#' @param x An \code{\link{SldscData}} object. +#' @param trait Character. Trait name. +#' @param ... Further arguments: \code{mode} (\code{"single"}/\code{"joint"}) +#' and \code{idx} (which single run). +#' @return A single run list, the list of single runs, or \code{NULL}. +#' @rdname getTraitRun +#' @export +setGeneric("getTraitRun", + function(x, trait, ...) standardGeneric("getTraitRun")) diff --git a/R/JointGroup.R b/R/JointGroup.R new file mode 100644 index 00000000..3003375a --- /dev/null +++ b/R/JointGroup.R @@ -0,0 +1,111 @@ +# ============================================================================= +# JointGroup S4 hierarchy + dispatch scaffolding +# ----------------------------------------------------------------------------- +# The intermediate contract for the unified joint-analysis engine (see +# dev/jointSpecification-s4-refactor.md). Every enumerator emits a list of +# `JointGroup`s; every fitter consumes one. The grammar/parsing half of +# jointSpecification.R and the auto-detection paths both funnel through this. +# +# JointGroup (VIRTUAL) the conditions fitted jointly: a data.frame with +# one row per fitted condition (= per Y/Z column), +# carrying its (study, context, trait) identity. +# IndividualJointGroup design = individual-level (X, Y) +# SumStatsJointGroup design = summary-statistic (Z, R, N) +# +# The OUTPUT row identity is DERIVED from `conditions`: an axis that takes one +# value across all conditions is fixed (that value); an axis that varies is +# collapsed to "joint" with the distinct members recorded in jointStudies / +# jointContexts / jointTraits. So cross-context / cross-trait / cross-study are +# the single-varying-axis case and composed is the >1-varying-axis case -- +# uniformly, with the actual fitted tuples preserved (composed loses nothing). +# +# JointDispatchCell one row of the wiring table: (pattern, dataForm) +# -> enumerator + minGroup +# JointPipeline (VIRTUAL) pipeline marker carrying per-pipeline config +# FmJointPipeline fine-mapping -> QtlFineMappingResult +# TwasJointPipeline twas weights -> TwasWeights +# +# Construction is validated (new() runs validity), so an enumerator cannot emit +# a malformed group and a mistyped dispatch cell fails at package load. +# ============================================================================= + +#' @include AllGenerics.R +NULL + +# ---- JointGroup virtual base ------------------------------------------------ +setClass("JointGroup", + contains = "VIRTUAL", + representation(conditions = "data.frame"), # one row per condition (Y/Z column) + validity = function(object) { + errors <- character() + if (!all(c("study", "context", "trait") %in% names(object@conditions))) { + errors <- c(errors, + "'conditions' must have columns 'study', 'context', 'trait'") + } else if (nrow(object@conditions) < 1L) { + errors <- c(errors, "a group needs >= 1 condition (Y/Z column)") + } + if (length(errors) == 0L) TRUE else errors + }) + +# ---- IndividualJointGroup --------------------------------------------------- +# `pos` is the per-condition functional position (one per Y column), set only by +# the cross-trait enumerator for fsusie (functional SuSiE over the trait domain); +# empty for every other pattern/method. +setClass("IndividualJointGroup", + contains = "JointGroup", + representation(X = "matrix", Y = "matrix", pos = "numeric"), + validity = function(object) { + errors <- character() + if (nrow(object@X) != nrow(object@Y)) + errors <- c(errors, "X and Y must share the sample (row) dimension") + if (ncol(object@Y) != nrow(object@conditions)) + errors <- c(errors, "ncol(Y) must equal nrow(conditions)") + if (length(object@pos) > 0L && length(object@pos) != ncol(object@Y)) + errors <- c(errors, "when set, 'pos' must have one entry per Y column") + if (length(errors) == 0L) TRUE else errors + }) + +# ---- SumStatsJointGroup ----------------------------------------------------- +setClass("SumStatsJointGroup", + contains = "JointGroup", + representation(Z = "matrix", R = "matrix", N = "numeric"), + validity = function(object) { + errors <- character() + if (nrow(object@R) != ncol(object@R)) + errors <- c(errors, "'R' (LD) must be square") + if (nrow(object@Z) != nrow(object@R)) + errors <- c(errors, "'Z' rows (variants) must match the 'R' dimension") + if (ncol(object@Z) != nrow(object@conditions)) + errors <- c(errors, "ncol(Z) must equal nrow(conditions)") + if (length(errors) == 0L) TRUE else errors + }) + +# ---- JointDispatchCell ------------------------------------------------------ +setClass("JointDispatchCell", + representation( + pattern = "character", # context / trait / study / composed (a label) + dataForm = "character", # individual / sumstats + enumerate = "function", # (data, scope, args) -> list + minGroup = "integer"), # smallest fittable condition count (joint cells + # use >= 2; the univariate cell uses 1) + validity = function(object) { + errors <- character() + if (length(object@dataForm) != 1L || + !object@dataForm %in% c("individual", "sumstats")) + errors <- c(errors, "'dataForm' must be 'individual' or 'sumstats'") + if (length(object@minGroup) != 1L || object@minGroup < 1L) + errors <- c(errors, "'minGroup' must be a single integer >= 1") + if (length(errors) == 0L) TRUE else errors + }) + +# ---- Pipeline markers ------------------------------------------------------- +# Not empty: the `config` list carries the per-pipeline parameter tail +# (coverage/cvFolds/samplePartition/fitFullData/retainFit/... for fm; +# retainFit/retainFitDetail/cvFolds/... for twas), and dispatch on the concrete +# class selects the result type via `construct()`. +setClass("JointPipeline", + contains = "VIRTUAL", + representation(config = "list")) + +setClass("FmJointPipeline", contains = "JointPipeline") +setClass("TwasJointPipeline", contains = "JointPipeline") diff --git a/R/MashPrior.R b/R/MashPrior.R new file mode 100644 index 00000000..1d22abe1 --- /dev/null +++ b/R/MashPrior.R @@ -0,0 +1,117 @@ +# ============================================================================= +# MashPrior S4 class +# ----------------------------------------------------------------------------- +# Data-driven (mash) prior bundle consumed by twasWeightsPipeline (mr.mash) and, +# transitively, by fineMappingPipeline (mvSuSiE). It is an INPUT-only container: +# mashPipeline() produces the prior payload(s); this class packages a full-data +# prior together with optional per-fold (cross-validated) priors plus the fold +# partition they were computed on, so honest CV reuses the SAME folds. +# +# fullFit the full-data data-driven prior payload: the mashPipeline() output +# list(U = , w = ), handed to +# mr.mash as `dataDrivenPriorMatrices` for the full-data fit. +# cvFits NULL, or a list with: +# samplePartition data.frame(Sample, Fold) — the CV folds the +# per-fold priors were computed on. +# perFoldFits list of per-fold prior payloads (each the same +# shape as `fullFit`); perFoldFits[[j]] is the +# prior for fold j, ordered to match +# sort(unique(Fold)). +# ============================================================================= + +#' @include AllGenerics.R +NULL + +#' @title Data-Driven (mash) Prior Bundle +#' @description Input container packaging a full-data data-driven prior with +#' optional per-fold (cross-validated) priors and the fold partition they were +#' computed on. Produced (eventually) by \code{mashPipeline()} and consumed by +#' \code{twasWeightsPipeline()} (mr.mash); the per-fold fits then flow to +#' \code{fineMappingPipeline()} (mvSuSiE) via the resulting +#' \code{\link{TwasWeights}}. +#' @slot fullFit The full-data data-driven prior payload — the +#' \code{mashPipeline()} output \code{list(U, w)}; fed to mr.mash as +#' \code{dataDrivenPriorMatrices} for the full-data fit. \code{NULL} when only +#' per-fold priors are supplied (a CV-only run). +#' @slot cvFits \code{NULL}, or a list with \code{samplePartition} +#' (\code{data.frame(Sample, Fold)}) and \code{perFoldFits} (a list of per-fold +#' prior payloads, \code{perFoldFits[[j]]} for fold \code{j}). +#' @export +setClass("MashPrior", + representation( + fullFit = "ANY", + cvFits = "ANY"), + validity = function(object) { + errors <- character() + if (is.null(object@fullFit) && is.null(object@cvFits)) { + errors <- c(errors, + "a MashPrior must carry at least one of `fullFit` or `cvFits`") + } + cv <- object@cvFits + if (!is.null(cv)) { + if (!is.list(cv) || is.null(cv$perFoldFits)) { + errors <- c(errors, + "`cvFits` must be a list with a `perFoldFits` element") + } else { + if (!is.list(cv$perFoldFits) || length(cv$perFoldFits) == 0L) { + errors <- c(errors, "`cvFits$perFoldFits` must be a non-empty list") + } + sp <- cv$samplePartition + if (!is.null(sp)) { + if (!is.data.frame(sp) || !all(c("Sample", "Fold") %in% names(sp))) { + errors <- c(errors, + "`cvFits$samplePartition` must be a data.frame with `Sample` and `Fold` columns") + } else if (is.list(cv$perFoldFits)) { + nF <- length(unique(sp$Fold)) + if (length(cv$perFoldFits) != nF) { + errors <- c(errors, sprintf( + "`cvFits$perFoldFits` has %d element(s) but the partition defines %d fold(s)", + length(cv$perFoldFits), nF)) + } + } + } + } + } + if (length(errors) == 0L) TRUE else errors + } +) + +#' @title Create a MashPrior Object +#' @description Construct a \code{\link{MashPrior}} bundling a full-data +#' data-driven prior with optional per-fold (cross-validated) priors. +#' @param fullFit Full-data data-driven prior payload (the +#' \code{mashPipeline()} \code{list(U, w)} output), or \code{NULL} for a +#' CV-only bundle. +#' @param cvFits \code{NULL}, or a list with \code{perFoldFits} (a non-empty +#' list of per-fold prior payloads) and optionally \code{samplePartition} +#' (\code{data.frame(Sample, Fold)}). +#' @return A \code{MashPrior} object. +#' @export +MashPrior <- function(fullFit = NULL, cvFits = NULL) { + obj <- new("MashPrior", fullFit = fullFit, cvFits = cvFits) + validObject(obj) + obj +} + +#' @rdname getFullFit +#' @export +setMethod("getFullFit", "MashPrior", function(x, ...) x@fullFit) + +#' @rdname getCvFits +#' @export +setMethod("getCvFits", "MashPrior", function(x, ...) x@cvFits) + +#' @export +setMethod("show", "MashPrior", function(object) { + cat("MashPrior\n") + cat(sprintf(" fullFit: %s\n", + if (is.null(object@fullFit)) "none" else "present")) + cv <- object@cvFits + if (is.null(cv)) { + cat(" cvFits: none\n") + } else { + nF <- if (!is.null(cv$perFoldFits)) length(cv$perFoldFits) else 0L + cat(sprintf(" cvFits: %d per-fold prior(s)%s\n", nF, + if (!is.null(cv$samplePartition)) " + samplePartition" else "")) + } +}) diff --git a/R/SldscData.R b/R/SldscData.R new file mode 100644 index 00000000..721a15e7 --- /dev/null +++ b/R/SldscData.R @@ -0,0 +1,143 @@ +#' @title S-LDSC input data container +#' @description An in-memory bundle of the loaded S-LDSC inputs, produced from +#' the reader functions (\code{\link{readSldscAnnot}}, \code{\link{readSldscFrq}}, +#' \code{\link{readSldscTrait}}) and consumed by +#' \code{\link{sldscPostprocessingPipeline}}. The class itself performs no +#' file I/O: the user runs the readers, then constructs an \code{SldscData} +#' from those in-memory objects, and the pipeline does all computation on it. +#' @slot annot A \code{data.frame} of target annotations with at least +#' \code{CHR} and \code{SNP} columns plus one or more annotation columns +#' (\code{BP}/\code{CM} optional). +#' @slot frq A \code{data.frame} of reference-panel allele frequencies with +#' \code{SNP} and \code{MAF} columns (a 0-row frame when no \code{.frq} data +#' was supplied). +#' @slot traits A named list, one entry per trait, each a list with a +#' \code{single} element (list of per-target \code{\link{readSldscTrait}} +#' runs) and an optional \code{joint} element (a single run, or \code{NULL}). +#' @name SldscData-class +#' @include AllGenerics.R +#' @importFrom methods new validObject is +#' @exportClass SldscData +NULL + +setClass("SldscData", + slots = c( + annot = "data.frame", + frq = "data.frame", + traits = "list" + ), + prototype = list( + annot = data.frame(), + frq = data.frame(), + traits = list() + )) + +setValidity("SldscData", function(object) { + errs <- character(0) + + annot <- object@annot + if (!all(c("CHR", "SNP") %in% names(annot))) + errs <- c(errs, "`annot` must have columns CHR and SNP.") + annotCols <- setdiff(names(annot), c("CHR", "SNP", "BP", "CM")) + if (length(annotCols) == 0L) + errs <- c(errs, "`annot` must have at least one annotation column beyond CHR/SNP/BP/CM.") + + frq <- object@frq + if (nrow(frq) > 0L && !all(c("SNP", "MAF") %in% names(frq))) + errs <- c(errs, "non-empty `frq` must have columns SNP and MAF.") + + tr <- object@traits + if (length(tr) > 0L) { + if (is.null(names(tr)) || any(!nzchar(names(tr)))) + errs <- c(errs, "`traits` must be a named list (one entry per trait).") + for (nm in names(tr)) { + t <- tr[[nm]] + if (!is.list(t) || !("single" %in% names(t))) + errs <- c(errs, sprintf( + "traits[['%s']] must be a list with a `single` element.", nm)) + else if (!is.list(t$single)) + errs <- c(errs, sprintf( + "traits[['%s']]$single must be a list of runs.", nm)) + } + } + + if (length(errs)) errs else TRUE +}) + +#' Construct an SldscData object +#' +#' Bundles the in-memory outputs of the S-LDSC readers into a single object for +#' \code{\link{sldscPostprocessingPipeline}}. Performs no file I/O. +#' +#' @param annot A target-annotation \code{data.frame} (e.g. from +#' \code{\link{readSldscAnnot}}): \code{CHR}, \code{SNP}, and one or more +#' annotation columns. +#' @param frq Optional reference-panel allele-frequency \code{data.frame} (e.g. +#' from \code{\link{readSldscFrq}}): \code{SNP}, \code{MAF}. \code{NULL} (the +#' default) stores an empty frame, which disables MAF-based filtering. +#' @param traits A named list of per-trait runs; each entry a list with a +#' \code{single} list (per-target \code{\link{readSldscTrait}} outputs) and an +#' optional \code{joint} run. +#' @return An \code{SldscData} object. +#' @seealso \code{\link{readSldscAnnot}}, \code{\link{readSldscFrq}}, +#' \code{\link{readSldscTrait}}, \code{\link{sldscPostprocessingPipeline}} +#' @rdname SldscData +#' @export +SldscData <- function(annot, frq = NULL, traits = list()) { + if (missing(annot)) stop("SldscData: `annot` is required.") + if (is.null(frq)) frq <- data.frame() + obj <- new("SldscData", + annot = as.data.frame(annot), + frq = as.data.frame(frq), + traits = traits) + validObject(obj) + obj +} + +# ---- accessors ---- + +#' @rdname getAnnotData +#' @export +setMethod("getAnnotData", "SldscData", function(x) x@annot) + +#' @rdname getFrqData +#' @export +setMethod("getFrqData", "SldscData", function(x) x@frq) + +#' @rdname getTraitRuns +#' @export +setMethod("getTraitRuns", "SldscData", function(x) x@traits) + +#' @rdname getTraitNames +#' @export +setMethod("getTraitNames", "SldscData", function(x) names(x@traits)) + +#' @rdname getAnnotCols +#' @export +setMethod("getAnnotCols", "SldscData", + function(x) setdiff(names(x@annot), c("CHR", "SNP", "BP", "CM"))) + +#' @rdname getTraitRun +#' @export +setMethod("getTraitRun", "SldscData", + function(x, trait, mode = c("single", "joint"), idx = NULL) { + mode <- match.arg(mode) + t <- x@traits[[trait]] + if (is.null(t)) return(NULL) + if (mode == "joint") return(t$joint) + if (is.null(idx)) return(t$single) + if (idx > length(t$single)) return(NULL) + t$single[[idx]] + }) + +#' @rdname SldscData +setMethod("show", "SldscData", function(object) { + cat("SldscData\n") + cat(" annotations (", length(getAnnotCols(object)), "): ", + paste(getAnnotCols(object), collapse = ", "), "\n", sep = "") + cat(" annot SNPs: ", nrow(object@annot), + " | frq SNPs: ", nrow(object@frq), "\n", sep = "") + cat(" traits (", length(object@traits), "): ", + paste(names(object@traits), collapse = ", "), "\n", sep = "") + invisible(object) +}) diff --git a/R/TwasWeightsEntry.R b/R/TwasWeightsEntry.R index 914a924c..d70c7fbd 100644 --- a/R/TwasWeightsEntry.R +++ b/R/TwasWeightsEntry.R @@ -15,7 +15,7 @@ setClass("TwasWeightsEntry", variantIds = "character", weights = "ANY", fits = "ANY", - cvPerformance = "ANY", + cvResult = "ANY", standardized = "logical", dataType = "ANY" ), @@ -81,20 +81,25 @@ setClass("TwasWeightsEntry", #' collection. #' @param variantIds Character vector of variant IDs. #' @param weights Numeric vector or matrix. -#' @param fits Optional method-specific fit object. -#' @param cvPerformance Optional list of CV metrics. +#' @param fits Optional method-specific fit object (the full-data fit; e.g. the +#' mr.mash fit's \code{{dataDrivenPriorMatrices, w0, V}}). +#' @param cvResult Optional cross-validation payload: a list mirroring +#' \code{FineMappingEntry@cvResult} with \code{samplePartition}, +#' \code{predictions}, \code{performance}, and (mr.mash only) \code{foldFits} +#' — the per-fold fits that \code{fineMappingPipeline}'s mvSuSiE path consumes +#' as per-fold priors. #' @param standardized Logical (length 1). #' @param dataType Optional data-type tag. #' @return A \code{TwasWeightsEntry} object. #' @export TwasWeightsEntry <- function(variantIds, weights, fits = NULL, - cvPerformance = NULL, standardized = FALSE, + cvResult = NULL, standardized = FALSE, dataType = NULL) { obj <- new("TwasWeightsEntry", variantIds = as.character(variantIds), weights = weights, fits = fits, - cvPerformance = cvPerformance, + cvResult = cvResult, standardized = isTRUE(standardized), dataType = dataType) validObject(obj) @@ -116,10 +121,10 @@ setMethod("getVariantIds", "TwasWeightsEntry", setMethod("getFits", "TwasWeightsEntry", function(x, ...) x@fits) -#' @rdname getCvPerformance +#' @rdname getCvResult #' @export -setMethod("getCvPerformance", "TwasWeightsEntry", - function(x, ...) x@cvPerformance) +setMethod("getCvResult", "TwasWeightsEntry", + function(x, ...) x@cvResult) #' @rdname getStandardized #' @export @@ -135,6 +140,6 @@ setMethod("getDataType", "TwasWeightsEntry", setMethod("show", "TwasWeightsEntry", function(object) { cat(sprintf("TwasWeightsEntry: %d variants, standardized=%s\n", length(object@variantIds), object@standardized)) - hasCv <- !is.null(object@cvPerformance) + hasCv <- !is.null(object@cvResult) cat(sprintf(" CV performance: %s\n", hasCv)) }) diff --git a/R/causalInferencePipeline.R b/R/causalInferencePipeline.R index ae34c843..f3b4cad9 100644 --- a/R/causalInferencePipeline.R +++ b/R/causalInferencePipeline.R @@ -52,24 +52,24 @@ #' @param rsqCutoff Numeric (length 1). When \code{> 0}, performs CV weight #' selection (ports the legacy \code{twas_pipeline} \code{pick_best_model} + #' \code{update_twas_method}): per \code{(study, context, trait, gwasStudy)} -#' keep only the method whose \code{cvPerformance} \code{rsqOption} metric is +#' keep only the method whose \code{cvResult} \code{rsqOption} metric is #' highest among methods that clear both \code{rsqCutoff} and the #' \code{rsqPvalCutoff} gate AND that produced a finite TWAS Z (the NA/Inf #' re-selection); groups where no method clears the cutoffs are dropped. A -#' group whose methods carry no usable \code{cvPerformance} (the SS-TWAS -#' path) keeps all methods. Needs the \code{twasWeights} \code{cvPerformance}, +#' group whose methods carry no usable \code{cvResult} (the SS-TWAS +#' path) keeps all methods. Needs the \code{twasWeights} \code{cvResult}, #' so selection is a no-op on the fineMappingResult-only path. Default #' \code{0} (no selection; score every method). #' @param rsqPvalCutoff Numeric (length 1). CV-p-value gate for weight #' selection (ports legacy \code{rsq_pval_cutoff}): a method is eligible only -#' when its \code{cvPerformance} \code{rsqPvalOption} metric is +#' when its \code{cvResult} \code{rsqPvalOption} metric is #' \code{< rsqPvalCutoff}. Default \code{Inf} (no p-value gate). A finite #' value activates selection even when \code{rsqCutoff = 0}. -#' @param rsqOption Character. Which \code{cvPerformance} metric is the +#' @param rsqOption Character. Which \code{cvResult} metric is the #' "r-squared" used for the cutoff and ranking (ports legacy #' \code{rsq_option}); typically \code{"rsq"} or \code{"adj_rsq"}. #' Default \code{"rsq"}. -#' @param rsqPvalOption Character vector of candidate \code{cvPerformance} +#' @param rsqPvalOption Character vector of candidate \code{cvResult} #' metric names for the p-value gate (ports legacy \code{rsq_pval_option}); #' the first one present in a tuple's metrics is used. Default #' \code{c("adj_rsq_pval", "pval")}. @@ -308,14 +308,14 @@ causalInferencePipeline <- function(gwasSumStats, } # Resolve one CV metric (rsqOption / rsqPvalOption) for a single tuple from the -# TwasWeights cvPerformance, which the individual-level CV path stores as a list +# TwasWeights cvResult, which the individual-level CV path stores as a list # with a named $metrics vector (corr, rsq, adj_rsq, pval, RMSE, MAE); a bare # metrics vector / data frame is tolerated too. `which` is a vector of candidate # metric names; the first present is used. Returns NA when no usable metric. # @noRd .cipCvMetric <- function(twasWeights, study, context, trait, method, which) { perf <- tryCatch( - getCvPerformance(twasWeights, study = study, context = context, + getCvResult(twasWeights, study = study, context = context, trait = trait, method = method), error = function(e) NULL) if (is.null(perf)) return(NA_real_) diff --git a/R/colocPipeline.R b/R/colocPipeline.R index 8e27de0c..bf6504c9 100644 --- a/R/colocPipeline.R +++ b/R/colocPipeline.R @@ -142,8 +142,10 @@ colocPipeline <- function(qtlFineMappingResult, paste(missingCols, collapse = ", ")) } if (!requireNamespace("coloc", quietly = TRUE)) { + # nocov start stop("Package 'coloc' is required for colocPipeline. ", "Install with: install.packages('coloc').") + # nocov end } if (!methods::is(qtlFineMappingResult, "QtlFineMappingResult")) { stop("`qtlFineMappingResult` must be a QtlFineMappingResult ", diff --git a/R/colocboostPipeline.R b/R/colocboostPipeline.R index d6e8af59..b97298d9 100644 --- a/R/colocboostPipeline.R +++ b/R/colocboostPipeline.R @@ -111,7 +111,9 @@ setGeneric("colocboostPipeline", # Run colocboost() with tryCatch + timing. .cbRun <- function(label, args) { if (!requireNamespace("colocboost", quietly = TRUE)) { + # nocov start stop("The colocboost package is required for colocboostPipeline().") + # nocov end } t1 <- Sys.time() args <- Filter(Negate(is.null), args) @@ -170,10 +172,6 @@ setGeneric("colocboostPipeline", # Returns the retained Y (NULL when no outcome clears the threshold). .cbPipSkipOutcomes <- function(X, Y, cutoff) { if (is.null(cutoff) || is.na(cutoff) || cutoff == 0) return(Y) - if (!requireNamespace("susieR", quietly = TRUE)) { - warning("susieR not available; pipCutoffToSkip filter not applied.") - return(Y) - } if (!is.double(X)) storage.mode(X) <- "double" # susieR needs double X thr <- if (cutoff < 0) 3 / ncol(X) else cutoff keep <- logical(ncol(Y)) diff --git a/R/ctwasPipeline.R b/R/ctwasPipeline.R index dd8bf142..a1e94320 100644 --- a/R/ctwasPipeline.R +++ b/R/ctwasPipeline.R @@ -163,8 +163,10 @@ assembleCtwasInputs <- function(gwasSumStats, twasWeights, minPipCutoff = 0, maxNumVariants = Inf) { if (!requireNamespace("ctwas", quietly = TRUE)) { + # nocov start stop("Package 'ctwas' is required for the cTWAS pipeline. ", "Install from https://github.com/xinhe-lab/ctwas .") + # nocov end } if (missing(gwasSumStats) || !is.list(gwasSumStats) || methods::is(gwasSumStats, "GwasSumStats")) @@ -339,7 +341,9 @@ estCtwasParam <- function(inputs, fallbackToPrefit = FALSE, ...) { if (!requireNamespace("ctwas", quietly = TRUE)) { + # nocov start stop("Package 'ctwas' is required for estCtwasParam.") + # nocov end } groupPriorVarStructure <- match.arg(groupPriorVarStructure) # ctwas::assemble_region_data assumes z_gene is non-NULL; when the @@ -440,7 +444,9 @@ screenCtwasRegions <- function(estResult, ncore = 1L, ...) { if (!requireNamespace("ctwas", quietly = TRUE)) { + # nocov start stop("Package 'ctwas' is required for screenCtwasRegions.") + # nocov end } # ctwas::screen_regions requires thin = 1 region_data; expand the # thinned set first when assemble_region_data was called with thin < 1 @@ -492,7 +498,9 @@ finemapCtwasRegions <- function(screenResult, ncore = 1L, ...) { if (!requireNamespace("ctwas", quietly = TRUE)) { + # nocov start stop("Package 'ctwas' is required for finemapCtwasRegions.") + # nocov end } rd <- screenResult$screened_region_data fmRes <- if (length(rd) == 0L) { @@ -574,8 +582,10 @@ mergeCtwasBoundaryRegions <- function(finemapResult, L = 5L, ncore = 1L, ...) { + # nocov start if (!requireNamespace("ctwas", quietly = TRUE)) stop("Package 'ctwas' is required for mergeCtwasBoundaryRegions.") + # nocov end fmRes <- finemapResult$finemap_res if (is.null(fmRes) || nrow(fmRes) == 0L) { message("mergeCtwasBoundaryRegions: no first-pass finemap result; ", diff --git a/R/fineMappingPipeline.R b/R/fineMappingPipeline.R index fb36c578..2a756e02 100644 --- a/R/fineMappingPipeline.R +++ b/R/fineMappingPipeline.R @@ -263,9 +263,9 @@ setGeneric("fineMappingPipeline", # susieR::susie_rss to switch between susie / susieInf / # susieAsh variants. NA for non-SuSiE-family methods. # -# `mrmash` is intentionally listed with both impls NULL so the capability -# checker emits a clear rejection ("mr.mash is a TWAS-weight-oriented -# method — use twasWeightsPipeline()"). +# This table lists ONLY fine-mapping methods. TWAS-weight-oriented tokens +# (e.g. mr.mash) are not here -- they live in .fmTwasOnlyTokens and are rejected +# with a clear pointer to twasWeightsPipeline() (see .fmCheckMethodCapabilities). # # @noRd .fineMappingMethodCapabilities <- list( @@ -303,15 +303,13 @@ setGeneric("fineMappingPipeline", multivariate = TRUE, gwasAllowed = FALSE, unmappableEffects = NA_character_, - args = list()), - mrmash = list( - individualImpl = NULL, - sumstatImpl = NULL, - multivariate = TRUE, - gwasAllowed = FALSE, - unmappableEffects = NA_character_, args = list())) +# TWAS-weight-oriented method tokens. NOT fine-mapping methods (they belong to +# twasWeightsPipeline); enumerated only so .fmCheckMethodCapabilities rejects +# them with a clear pointer rather than an "unknown token" error. +.fmTwasOnlyTokens <- c("mrmash") + # Normalize a user-supplied `methods` argument into a character vector of # canonical tokens. Mirrors `.twasNormalizeMethods` but the fine-mapping @@ -330,7 +328,7 @@ setGeneric("fineMappingPipeline", # Mirrors the convention of .twasNormalizeMethods so the two pipelines # expose the same shape on the user side. # @noRd -.fmNormalizeMethods <- function(methods) { +.fmNormalizeMethods <- function(methods, L = 20L, Lgreedy = 5L) { if (is.null(methods) || length(methods) == 0L) { stop("fineMappingPipeline: `methods` must be a non-empty character ", "vector or named list of = entries.") @@ -355,35 +353,43 @@ setGeneric("fineMappingPipeline", stop("fineMappingPipeline: `methods` must be a character vector or ", "named list. Got class '", class(methods)[[1L]], "'.") } + # SuSiE-family fit defaults live here (the single source of truth), not in + # CLI wrappers: seed L / L_greedy on every susie-family token whose kwargs did + # not already set them. + for (tk in intersect(tokens, c("susie", "susieInf", "susieAsh"))) { + if (is.null(methodArgs[[tk]][["L"]])) methodArgs[[tk]][["L"]] <- L + if (is.null(methodArgs[[tk]][["L_greedy"]])) methodArgs[[tk]][["L_greedy"]] <- Lgreedy + } list(tokens = tokens, methodArgs = methodArgs) } # Enforce input-class / method compatibility against the fine-mapping -# capability table. Hard-rejects `mrmash` (a TWAS-weight-oriented -# method). Routes the input class through individual / sumstat / GWAS -# branches and emits a single error listing every offending token. +# capability table. Rejects TWAS-weight-oriented tokens (.fmTwasOnlyTokens, +# e.g. mr.mash) with a clear pointer to twasWeightsPipeline(). Routes the input +# class through individual / sumstat / GWAS branches and emits a single error +# listing every offending token. # @noRd .fmCheckMethodCapabilities <- function(tokens, inputKind) { if (length(tokens) == 0L) return(invisible(NULL)) caps <- .fineMappingMethodCapabilities - unknown <- setdiff(tokens, names(caps)) + unknown <- setdiff(tokens, c(names(caps), .fmTwasOnlyTokens)) if (length(unknown) > 0L) { stop(sprintf( "fineMappingPipeline: unknown method token(s): %s. Known tokens: %s.", paste(unknown, collapse = ", "), paste(names(caps), collapse = ", "))) } - hardRejections <- list( - mrmash = "mr.mash is a TWAS-weight-oriented method; use twasWeightsPipeline()") individualKinds <- c("QtlDataset", "MultiStudyQtlDataset") bad <- character(0); reason <- character(0) for (tk in tokens) { - info <- caps[[tk]] - if (tk %in% names(hardRejections)) { - bad <- c(bad, tk); reason <- c(reason, hardRejections[[tk]]) + if (tk %in% .fmTwasOnlyTokens) { + bad <- c(bad, tk) + reason <- c(reason, + "is a TWAS-weight-oriented method; use twasWeightsPipeline()") next } + info <- caps[[tk]] if (inputKind %in% individualKinds) { if (is.null(info$individualImpl)) { bad <- c(bad, tk) @@ -654,7 +660,7 @@ setGeneric("fineMappingPipeline", coverage, secondaryCoverage, signalCutoff, minAbsCorr, csInput = NULL, af = NULL, region = NULL, trim = NULL, - medianAbsCorr = NULL) { + medianAbsCorr = NULL, conditionIdx = NULL) { # Inherit `trim` from the calling method's frame if not passed in # explicitly. The 10 internal call sites don't currently forward it # (they predate the trim knob) so we look it up from the caller. This @@ -678,7 +684,7 @@ setGeneric("fineMappingPipeline", signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, medianAbsCorr = medianAbsCorr, region = region, - csInput = csInput, trim = isTRUE(trim)) + csInput = csInput, conditionIdx = conditionIdx, trim = isTRUE(trim)) out <- formatFinemappingOutput(post, primaryMethod = method) # `formatFinemappingOutput` returns a list with $finemappingEntry as a # bare FineMappingEntry per the helper's contract. @@ -725,14 +731,18 @@ setGeneric("fineMappingPipeline", # when no fit was supplied at all). # @noRd .buildMvsusieReweightedPrior <- function(fitParts, conditionNames, - weightsTol = 1e-10) { + weightsTol = 1e-10, overrideU = NULL) { R <- length(conditionNames) canonical <- function(V) list( priorVariance = mvsusieR::create_mixture_prior( R = R, include_indices = conditionNames), residualVariance = V) if (is.null(fitParts)) return(canonical(NULL)) - ddpm <- fitParts$dataDrivenPriorMatrices + # `overrideU` (mode C / hybrid): reuse this fit's reweighted mixture weights + # (w0) and residual variance (V) but swap in a different set of data-driven + # covariance matrices -- the per-fold mash prior U. Components are matched to + # w0 by name, so the override U must share component names with the fit. + ddpm <- if (!is.null(overrideU)) overrideU else fitParts$dataDrivenPriorMatrices if (is.null(ddpm) || is.null(ddpm$U)) return(canonical(fitParts$V)) w0Updated <- rescaleCovW0(fitParts$w0) w0Updated <- w0Updated[names(w0Updated) %in% names(ddpm$U)] @@ -753,19 +763,23 @@ setGeneric("fineMappingPipeline", # first non-NULL payload. The fit may span more conditions than the mvsusie # block fits -- `.buildMvsusieReweightedPrior(include_indices=)` subsets it. # -# `context` is optional and disambiguates joint fits, which key differently: -# * per-context / cross-context mvsusie -> (study, trait=tid) [context = NULL] -# * cross-trait joint mvsusie -> (study, trait="joint", context=cx) -# Returns NULL when no TwasWeights is supplied or it carries no matching mr.mash -# fit (caller then falls back to the canonical prior). +# Each axis is optional: a NULL axis is NOT filtered (match-any). A joint fit is +# shared across all its per-context rows, so the consumer fixes the constant axes +# and leaves the jointed (varying) axis NULL -- e.g. cross-context mvsusie keys on +# (study, trait) with context = NULL; cross-trait keys on (study, context) with +# trait = NULL (see .jointPriorKey). Returns NULL when no TwasWeights is supplied +# or it carries no matching mr.mash fit (caller falls back to the canonical prior). # @noRd -.fmLookupMrmashFit <- function(twasWeights, study, trait, context = NULL) { +.fmLookupMrmashFit <- function(twasWeights, study = NULL, trait = NULL, + context = NULL) { if (is.null(twasWeights)) return(NULL) - sel <- as.character(twasWeights$study) == study & - as.character(twasWeights$trait) == trait & - as.character(twasWeights$method) == "mrmash" - if (!is.null(context)) - sel <- sel & as.character(twasWeights$context) == context + # Each per-context mr.mash row of a joint group carries the SHARED joint fit, + # so the consumer matches the FIXED axes and leaves the jointed axis NULL + # (match-any). study/trait/context = NULL means "do not filter that axis". + sel <- as.character(twasWeights$method) == "mrmash" + if (!is.null(study)) sel <- sel & as.character(twasWeights$study) == study + if (!is.null(trait)) sel <- sel & as.character(twasWeights$trait) == trait + if (!is.null(context)) sel <- sel & as.character(twasWeights$context) == context for (i in which(sel)) { f <- getFits(twasWeights$entry[[i]]) if (!is.null(f)) return(f) @@ -773,6 +787,58 @@ setGeneric("fineMappingPipeline", NULL } +# Locate the retained per-fold mr.mash CV payload for one (study, trait[, +# context]) inside a `TwasWeights` collection: the mrmash entry's `cvResult`, +# carrying `foldFits` (per-fold lean payloads) + `samplePartition` (the folds +# the per-fold priors were computed on). These let the mvSuSiE CV use an honest +# per-fold prior instead of reusing the full-data prior on every fold. Returns +# NULL when no TwasWeights / no matching mr.mash CV result with fold fits. +# @noRd +.fmLookupMrmashCv <- function(twasWeights, study = NULL, trait = NULL, + context = NULL) { + if (is.null(twasWeights)) return(NULL) + sel <- as.character(twasWeights$method) == "mrmash" + if (!is.null(study)) sel <- sel & as.character(twasWeights$study) == study + if (!is.null(trait)) sel <- sel & as.character(twasWeights$trait) == trait + if (!is.null(context)) sel <- sel & as.character(twasWeights$context) == context + for (i in which(sel)) { + cv <- getCvResult(twasWeights$entry[[i]]) + if (!is.null(cv) && !is.null(cv$foldFits)) return(cv) + } + NULL +} + +# Build the per-fold mvSuSiE reweighted priors for cross-validation from a +# TwasWeights mr.mash CV payload (`mvCv` from .fmLookupMrmashCv). For each fold: +# * full per-fold fit (carries its own w0) -> reweight that fit [mode B] +# * prior-only stub (U but no w0) -> reuse `fullFitParts` w0/V with +# the fold's U via overrideU [mode C] +# Returns a list named by fold id (as character, matching samplePartition$Fold); +# each element a list(priorVariance, residualVariance). NULL if no fold fits. +# @noRd +.fmBuildMvsusiePriorCv <- function(mvCv, fullFitParts, conditionNames, + weightsTol = 1e-10) { + if (is.null(mvCv) || is.null(mvCv$foldFits)) return(NULL) + foldFits <- mvCv$foldFits + sp <- mvCv$samplePartition + foldIds <- if (!is.null(sp)) sort(unique(sp$Fold)) else seq_along(foldFits) + out <- setNames(vector("list", length(foldIds)), as.character(foldIds)) + for (i in seq_along(foldIds)) { + # Match the fold fit by name ("fold_") when available, else by position. + nm <- paste0("fold_", foldIds[[i]]) + ff <- if (!is.null(names(foldFits)) && nm %in% names(foldFits)) foldFits[[nm]] + else if (length(foldFits) >= i) foldFits[[i]] else NULL + if (is.null(ff)) next + out[[i]] <- if (!is.null(ff$w0)) { + .buildMvsusieReweightedPrior(ff, conditionNames, weightsTol) + } else { + .buildMvsusieReweightedPrior(fullFitParts, conditionNames, weightsTol, + overrideU = ff$dataDrivenPriorMatrices) + } + } + out +} + # PCA-reduce a (samples x traits) phenotype matrix to its top `nPCs` principal # component scores, for the `usePCA` top-PC susie path. Centers + scales # (matching the legacy fsusie.R susie_on_top_pc), dropping incomplete rows and @@ -1182,7 +1248,8 @@ setGeneric("fineMappingPipeline", # @noRd .fmCrossValidate <- function(X, Y, tokens, methodArgs, fold, samplePartition = NULL, coverage = 0.95, - pos = NULL, verbose = 1, mvPrior = NULL) { + pos = NULL, verbose = 1, mvPrior = NULL, + mvPriorCv = NULL) { if (length(tokens) == 0L) return(NULL) if (!is.matrix(Y)) { Y <- matrix(Y, ncol = 1L, @@ -1207,12 +1274,19 @@ setGeneric("fineMappingPipeline", Xtr <- X[!isTest, , drop = FALSE] Xte <- X[isTest, , drop = FALSE] Ytr <- Y[!isTest, , drop = FALSE] + # Honest per-fold mvSuSiE prior when supplied (the fold's own mr.mash-derived + # prior); otherwise the single full-data prior is reused on every fold. + mvPriorThisFold <- if (!is.null(mvPriorCv)) { + p <- mvPriorCv[[as.character(j)]] + if (is.null(p)) mvPrior else p + } else mvPrior # Drop columns with zero variance in this training fold. keepCol <- .nonzeroVarColumns(Xtr) XtrK <- Xtr[, keepCol, drop = FALSE] for (tk in tokens) { W <- tryCatch( - .fmFoldWeights(tk, XtrK, Ytr, coverage, methodArgs[[tk]], pos, mvPrior), + .fmFoldWeights(tk, XtrK, Ytr, coverage, methodArgs[[tk]], pos, + mvPriorThisFold), error = function(e) { if (verbose >= 1) message(sprintf(" CV fold %s, method %s failed: %s", @@ -1283,6 +1357,8 @@ setMethod("fineMappingPipeline", "QtlDataset", jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, + L = 20L, + Lgreedy = 5L, coverage = 0.95, secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, @@ -1316,7 +1392,7 @@ setMethod("fineMappingPipeline", "QtlDataset", } xRegions <- .makeXRegions(region, jointRegions) parsedJointSpec <- parseJointSpecification(jointSpecification, data) - norm <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods, L = L, Lgreedy = Lgreedy) tokens <- norm$tokens methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "QtlDataset") @@ -1334,7 +1410,10 @@ setMethod("fineMappingPipeline", "QtlDataset", coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, methodArgs = methodArgs, xRegions = xRegions, twasWeights = twasWeights, - dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff) + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + cvFolds = cvFolds, samplePartition = samplePartition, + pipCutoffToSkip = pipCutoffToSkip, + fineMappingResult = fineMappingResult) tokens <- setdiff(tokens, c("mvsusie", "fsusie")) methodArgs <- methodArgs[tokens] if (length(tokens) == 0L) { @@ -1532,361 +1611,30 @@ setMethod("fineMappingPipeline", "QtlDataset", } } - # ---- mvsusie dispatch: joint over selected (contexts, traits). - if (length(mvTokens) > 0L) { - if (!requireNamespace("mvsusieR", quietly = TRUE)) { - stop("mvsusie requires the mvsusieR package. Install with: ", - "devtools::install_github('stephenslab/mvsusieR')") - } - # Detection: when multiple contexts AND single trait => multi-context - # mvsusie (group by trait). When single context AND multiple traits => - # multi-trait mvsusie (group by context). When both multi, iterate per - # context for the multi-trait fit (same convention as the design doc: - # "sequential per-context multi-trait when both are multi"). - mvJobs <- list() - if (nCtx >= 2L && nTraits == 1L) { - # Single trait across many contexts. - mvJobs[[length(mvJobs) + 1L]] <- list( - mode = "multiContext", trait = allTraits[[1L]], - contexts = useCtx) - } else if (nCtx == 1L && nTraits >= 2L) { - mvJobs[[length(mvJobs) + 1L]] <- list( - mode = "multiTrait", context = useCtx[[1L]], - traits = perCtxTraits[[useCtx[[1L]]]]) - } else { - # Both multi => sequential per-context multi-trait fit. - for (ctx in useCtx) { - tr <- perCtxTraits[[ctx]] - if (length(tr) < 2L) next - mvJobs[[length(mvJobs) + 1L]] <- list( - mode = "multiTrait", context = ctx, traits = tr) - } - } - - for (job in mvJobs) { - if (identical(job$mode, "multiContext")) { - tid <- job$trait - # Joint Y across contexts for this single trait. X is drawn from each - # region block (cis or explicit region) and merged across regions. - contextsHere <- job$contexts - Yres <- .fmResidPheno( - data, contexts = contextsHere, traitId = tid, naAction = naAction) - if (length(contextsHere) == 1L) - Yres <- setNames(list(Yres), contextsHere) - baseSamples <- Reduce(intersect, lapply(Yres, rownames)) - - # Resume cache: every (study, ctx, tid, mvsusie) row. - allCached <- TRUE - for (ctx in contextsHere) { - if (is.null(.fmCacheLookup(fineMappingResult, study, ctx, tid, "mvsusie"))) { - allCached <- FALSE; break - } - } - if (allCached) { - for (ctx in contextsHere) { - pushRow(study, ctx, tid, "mvsusie", - .fmCacheLookup(fineMappingResult, study, ctx, tid, "mvsusie")) - } - next - } - - # SER pre-screen: drop contexts with no single-effect signal before - # the joint fit (faithful port of skipConditions). Screen the first - # region block; skip the trait entirely when < 2 contexts survive. - if (.fmScreenActive(pipCutoffToSkip)) { - rg0 <- xRegions[[1L]] - Xscr <- if (is.null(rg0)) { - .fmResidGeno(data, contexts = contextsHere, traitId = tid, - cisWindow = cisWindow, samples = baseSamples) - } else { - .fmResidGeno(data, contexts = contextsHere, region = rg0, - samples = baseSamples) - } - csS <- intersect(baseSamples, rownames(Xscr)) - if (length(csS) >= 2L) { - Yscr <- do.call(cbind, lapply(contextsHere, - function(ctx) Yres[[ctx]][csS, 1L])) - kept <- contextsHere[.fmSerScreenColumns( - Xscr[csS, , drop = FALSE], Yscr, pipCutoffToSkip)] - if (length(kept) < 2L) { - if (verbose >= 1) - message(sprintf( - "Skipping mvsusie (multi-context) for trait='%s': < 2 contexts pass the SER pre-screen.", - tid)) - next - } - if (length(kept) < length(contextsHere)) { - if (verbose >= 1) - message(sprintf( - "mvsusie (multi-context) trait='%s': SER pre-screen kept %d of %d contexts.", - tid, length(kept), length(contextsHere))) - contextsHere <- kept - } - } - } - - if (verbose >= 1) - message(sprintf("Fitting mvsusie (multi-context) for trait='%s' ...", tid)) - # Data-driven mvSuSiE prior: if a prior mr.mash run was supplied via - # `twasWeights`, reuse its fitted mixture weights + residual covariance - # for this (study, trait) -> reweighted create_mixture_prior; else the - # lookup returns NULL and `.buildMvsusieReweightedPrior` falls back to - # the canonical prior (unchanged behavior). Keyed on (study, trait): - # the fit may span more contexts than survive the SER pre-screen, and - # `include_indices = colnames(Yc)` subsets it to the fitted contexts. - mvFitParts <- .fmLookupMrmashFit(twasWeights, study, tid) - fitOneRegion <- function(rg) { - X <- if (is.null(rg)) { - .fmResidGeno(data, contexts = contextsHere, traitId = tid, - cisWindow = cisWindow, samples = baseSamples) - } else { - .fmResidGeno(data, contexts = contextsHere, region = rg, - samples = baseSamples) - } - cs <- intersect(baseSamples, rownames(X)) - if (length(cs) < 2L) { - stop("fineMappingPipeline(QtlDataset, mvsusie multi-context): ", - "insufficient shared samples across selected contexts.") - } - Xc <- X[cs, , drop = FALSE] - afVec <- .fmAfForX(data, Xc, traitId = tid, region = rg, - cisWindow = cisWindow) - Yc <- do.call(cbind, lapply(contextsHere, function(ctx) { - ym <- Yres[[ctx]][cs, , drop = FALSE] - colnames(ym) <- ctx - ym - })) - mvPrior <- .buildMvsusieReweightedPrior( - mvFitParts, colnames(Yc), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - X = Xc, Y = Yc, - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", dataX = Xc, dataY = NULL, - coverage = coverage, secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, - af = afVec, csInput = "X") - if (cvFolds > 1L) { - cv <- .fmCrossValidate(Xc, Yc, "mvsusie", methodArgs, cvFolds, - samplePartition = samplePartition, - coverage = coverage, verbose = verbose, - mvPrior = mvPrior) - entry <- .fmAttachCv(entry, .fmSliceCv(cv, "mvsusie")) - } - entry - } - entry <- .fmJointBlocks(xRegions, fitOneRegion) - # Share the joint (merged) entry across contexts via copy-on-modify. - for (ctx in contextsHere) { - pushRow(study, ctx, tid, "mvsusie", entry) - } - - } else { # multiTrait - ctx <- job$context - traits <- job$traits - # Resume cache: every (study, ctx, trait, mvsusie) row. - allCached <- TRUE - for (tid in traits) { - if (is.null(.fmCacheLookup(fineMappingResult, study, ctx, tid, "mvsusie"))) { - allCached <- FALSE; break - } - } - if (allCached) { - for (tid in traits) { - pushRow(study, ctx, tid, "mvsusie", - .fmCacheLookup(fineMappingResult, study, ctx, tid, "mvsusie")) - } - next - } - - Y <- .fmResidPheno( - data, contexts = ctx, traitId = traits, naAction = naAction) - - # SER pre-screen: drop traits with no single-effect signal before the - # joint fit (faithful port of skipConditions). Skip the context's - # mvsusie when < 2 traits survive. - if (.fmScreenActive(pipCutoffToSkip)) { - rg0 <- xRegions[[1L]] - Xscr <- if (is.null(rg0)) { - .fmResidGeno(data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, samples = rownames(Y)) - } else { - .fmResidGeno(data, contexts = ctx, region = rg0, - samples = rownames(Y)) - } - csS <- intersect(rownames(Xscr), rownames(Y)) - if (length(csS) >= 2L) { - keep <- .fmSerScreenColumns( - Xscr[csS, , drop = FALSE], Y[csS, , drop = FALSE], - pipCutoffToSkip) - if (sum(keep) < 2L) { - if (verbose >= 1) - message(sprintf( - "Skipping mvsusie (multi-trait) for context='%s': < 2 traits pass the SER pre-screen.", - ctx)) - next - } - if (sum(keep) < length(traits)) { - if (verbose >= 1) - message(sprintf( - "mvsusie (multi-trait) context='%s': SER pre-screen kept %d of %d traits.", - ctx, sum(keep), length(traits))) - traits <- traits[keep] - Y <- Y[, keep, drop = FALSE] - } - } - } - - if (verbose >= 1) - message(sprintf("Fitting mvsusie (multi-trait) for context='%s' ...", ctx)) - fitOneRegion <- function(rg) { - X <- if (is.null(rg)) { - .fmResidGeno(data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, samples = rownames(Y)) - } else { - .fmResidGeno(data, contexts = ctx, region = rg, - samples = rownames(Y)) - } - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf( - "fineMappingPipeline(QtlDataset, mvsusie multi-trait): too few shared samples in context '%s'.", - ctx)) - } - Xc <- X[common, , drop = FALSE] - Yc <- Y[common, , drop = FALSE] - afVec <- .fmAfForX(data, Xc, traitId = traits, region = rg, - cisWindow = cisWindow) - # Multi-trait mvsusie conditions are traits (one context), so there - # is no mr.mash-over-contexts fit to reweight from -- the data-driven - # prior (keyed on a single (study, trait)) does not apply here. Keep - # the canonical prior. - mvBaseArgs <- list( - X = Xc, Y = Yc, - prior_variance = mvsusieR::create_mixture_prior(R = ncol(Yc)), - coverage = coverage) - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", dataX = Xc, dataY = NULL, - coverage = coverage, secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, - af = afVec, csInput = "X") - if (cvFolds > 1L) { - cv <- .fmCrossValidate(Xc, Yc, "mvsusie", methodArgs, cvFolds, - samplePartition = samplePartition, - coverage = coverage, verbose = verbose) - entry <- .fmAttachCv(entry, .fmSliceCv(cv, "mvsusie")) - } - entry - } - entry <- .fmJointBlocks(xRegions, fitOneRegion) - for (tid in traits) { - pushRow(study, ctx, tid, "mvsusie", entry) - } - } - } - } - - # ---- fsusie dispatch: joint multi-trait per context. - if (length(fsTokens) > 0L) { - if (!requireNamespace("fsusieR", quietly = TRUE)) { - stop("fsusie requires the fsusieR package. Install with: ", - "devtools::install_github('stephenslab/fsusieR')") - } - for (ctx in useCtx) { - traits <- perCtxTraits[[ctx]] - if (length(traits) < 2L) { - stop(sprintf( - "fineMappingPipeline(QtlDataset, fsusie): context '%s' has %d trait(s); fsusie needs at least 2 within a context.", - ctx, length(traits))) - } - # Resume cache. - allCached <- TRUE - for (tid in traits) { - if (is.null(.fmCacheLookup(fineMappingResult, study, ctx, tid, "fsusie"))) { - allCached <- FALSE; break - } - } - if (allCached) { - for (tid in traits) { - pushRow(study, ctx, tid, "fsusie", - .fmCacheLookup(fineMappingResult, study, ctx, tid, "fsusie")) - } - next - } - - Y <- .fmResidPheno( - data, contexts = ctx, traitId = traits, naAction = naAction) - - # Per-trait genomic positions for the wavelet model. Region-independent - # (depends on the trait set / Y columns): midpoint of each trait range. - se <- getPhenotypes(data, contexts = ctx, traitId = traits) - rrIds <- rownames(se) - ord <- match(colnames(Y), rrIds) - if (anyNA(ord)) { - stop("fineMappingPipeline(QtlDataset, fsusie): unable to align trait positions to Y columns.") - } - rr <- SummarizedExperiment::rowRanges(se)[ord] - pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 - - if (verbose >= 1) - message(sprintf("Fitting fsusie for context='%s' (multi-trait, %d traits) ...", - ctx, length(traits))) - fitOneRegion <- function(rg) { - X <- if (is.null(rg)) { - .fmResidGeno(data, contexts = ctx, traitId = traits, - cisWindow = cisWindow, samples = rownames(Y)) - } else { - .fmResidGeno(data, contexts = ctx, region = rg, - samples = rownames(Y)) - } - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf("fineMappingPipeline(QtlDataset, fsusie): too few shared samples in context '%s'.", ctx)) - } - Xc <- X[common, , drop = FALSE] - Yc <- Y[common, , drop = FALSE] - afVec <- .fmAfForX(data, Xc, traitId = traits, region = rg, - cisWindow = cisWindow) - fit <- do.call(fitFsusie, - .fmMergeUserArgs(list(X = Xc, Y = Yc, pos = pos), - "fsusie", methodArgs[["fsusie"]])) - # Collapse the functional fit to a variants x features TWAS weight - # matrix now, while fitted_wc/csd_X are still present (trimming drops - # them). Stored on $coef so a trimmed fit can still yield weights. - fit$coef <- tryCatch( - fsusieWeights(fsusieFit = fit, variantIds = colnames(Xc)), - error = function(e) NULL) - fit <- .setFinemappingFitClass(fit, "fsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "fsusie", dataX = Xc, dataY = NULL, - coverage = coverage, secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, - af = afVec, csInput = "fsusie") - if (cvFolds > 1L) { - cv <- .fmCrossValidate(Xc, Yc, "fsusie", methodArgs, cvFolds, - samplePartition = samplePartition, - coverage = coverage, pos = pos, - verbose = verbose) - entry <- .fmAttachCv(entry, .fmSliceCv(cv, "fsusie")) - } - entry - } - entry <- .fmJointBlocks(xRegions, fitOneRegion) - for (tid in traits) { - pushRow(study, ctx, tid, "fsusie", entry) - } - } + # ---- Multivariate dispatch via the joint engine (auto-detected shape). + # mvsusie / fsusie WITHOUT an explicit jointSpecification: synthesize the + # natural joint spec from the data shape (.fmSynthesizeJointSpec) and route + # through the SAME engine as an explicit jointSpecification. This unifies the + # two formerly-separate joint paths and gives the multi-trait fit the data- + # driven mr.mash prior the old auto-detection path lacked. (When an explicit + # jointSpecification ran above, mvsusie/fsusie were already removed from the + # token set, so mvTokens/fsTokens are empty here.) Univariate tokens kept + # their per-(context, trait) iteration above; their rows merge below. + if (length(mvTokens) > 0L || length(fsTokens) > 0L) { + autoJoint <- .fmDispatchJointSpecsQtlDataset( + .fmSynthesizeJointSpec(nCtx, nTraits), data, + c(mvTokens, fsTokens), contexts, traitId, cisWindow, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs, xRegions = xRegions, + twasWeights = twasWeights, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + cvFolds = cvFolds, samplePartition = samplePartition, + pipCutoffToSkip = pipCutoffToSkip, + fineMappingResult = fineMappingResult) + jointResult <- if (is.null(jointResult)) autoJoint + else if (is.null(autoJoint)) jointResult + else .rbindFineMappingResult(jointResult, autoJoint, + ldSketch = NULL) } perTupleResult <- if (length(rowEntries) > 0L) @@ -2098,7 +1846,8 @@ setMethod("fineMappingPipeline", "QtlSumStats", coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, methodArgs = methodArgs, twasWeights = twasWeights, - dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff) + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + fineMappingResult = fineMappingResult) tokens <- setdiff(tokens, c("mvsusie", "fsusie")) methodArgs <- methodArgs[tokens] if (length(tokens) == 0L) { @@ -2224,87 +1973,26 @@ setMethod("fineMappingPipeline", "QtlSumStats", } } - # ---- mvsusie dispatch: per (study, trait) across selected contexts. + # ---- Multivariate dispatch via the joint engine (auto-detected shape). + # mvsusie WITHOUT an explicit jointSpecification: route the cross-context RSS + # joint (one fit per (study, trait) with >= 2 contexts) through the SAME + # engine as an explicit jointSpecification, replacing the duplicated per- + # group mvsusie_rss loop. The guard above already rejected the all-single- + # context case; the cross-context enumerator skips any 1-context group. (When + # an explicit jointSpecification ran above, mvTokens is empty here.) if (length(mvTokens) > 0L) { - if (!requireNamespace("mvsusieR", quietly = TRUE)) { - stop("mvsusie requires the mvsusieR package. Install with: ", - "devtools::install_github('stephenslab/mvsusieR')") - } - groupKey <- paste(studyCol[selRows], traitCol[selRows], sep = "||") - groups <- split(selRows, groupKey) - for (gkey in names(groups)) { - gIdx <- groups[[gkey]] - if (length(gIdx) < 2L) next - st <- studyCol[gIdx[[1L]]] - tr <- traitCol[gIdx[[1L]]] - ctxNames <- contextCol[gIdx] - - # Resume cache. - allCached <- TRUE - for (ctx in ctxNames) { - if (is.null(.fmCacheLookup(fineMappingResult, st, ctx, tr, "mvsusie"))) { - allCached <- FALSE; break - } - } - if (allCached) { - for (ctx in ctxNames) { - pushRow(st, ctx, tr, "mvsusie", - .fmCacheLookup(fineMappingResult, st, ctx, tr, "mvsusie")) - } - next - } - - firstMc <- S4Vectors::mcols(data$entry[[gIdx[[1L]]]]) - if (!"SNP" %in% colnames(firstMc)) - stop("fineMappingPipeline(QtlSumStats, mvsusie): entry has no SNP mcol.") - variantIds <- as.character(firstMc$SNP) - Z <- matrix(NA_real_, nrow = length(variantIds), ncol = length(gIdx), - dimnames = list(variantIds, ctxNames)) - nVec <- numeric(length(gIdx)) - for (kk in seq_along(gIdx)) { - mc <- S4Vectors::mcols(data$entry[[gIdx[kk]]]) - if (!identical(as.character(mc$SNP), variantIds)) { - stop("fineMappingPipeline(QtlSumStats, mvsusie): every entry in ", - "the (study='", st, "', trait='", tr, "') group must share an ", - "identical SNP order after summaryStatsQc().") - } - Z[, kk] <- as.numeric(mc$Z) - nVec[kk] <- stats::median(as.numeric(mc$N), na.rm = TRUE) - } - ldMat <- .fmLdFromSketch(ldSketch, variantIds) - - if (verbose >= 1) - message(sprintf("Fitting mvsusie (RSS) for (study='%s', trait='%s', %d contexts) ...", - st, tr, length(ctxNames))) - # Data-driven reweighted prior from a prior mr.mash (RSS) run, looked up - # on (study, trait); mvsusie_rss takes the same create_mixture_prior + - # residual_variance (K x K condition residual covariance) as fitMvsusie. - # NULL twasWeights / no fit -> canonical prior (unchanged behavior). - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, st, tr), colnames(Z), - dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - Z = Z, R = ldMat, N = as.numeric(stats::median(nVec)), - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusieRss, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - ent <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - for (ctx in ctxNames) { - pushRow(st, ctx, tr, "mvsusie", ent) - } - } + autoJoint <- .fmDispatchJointSpecsQtlSumStats( + list(list(axes = "context", scope = NULL)), data, mvTokens, + contexts, traitId, + coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, + methodArgs = methodArgs, + twasWeights = twasWeights, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + fineMappingResult = fineMappingResult) + jointResult <- if (is.null(jointResult)) autoJoint + else if (is.null(autoJoint)) jointResult + else .rbindFineMappingResult(jointResult, autoJoint, + ldSketch = ldSketch) } perTupleResult <- if (length(rowEntries) > 0L) @@ -2331,6 +2019,8 @@ setMethod("fineMappingPipeline", "GwasSumStats", function(data, methods, addSusieInf = TRUE, + L = 20L, + Lgreedy = 5L, coverage = 0.95, secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, @@ -2341,7 +2031,7 @@ setMethod("fineMappingPipeline", "GwasSumStats", trim = TRUE, ...) { .fmAssertQcd(data) - norm <- .fmNormalizeMethods(methods) + norm <- .fmNormalizeMethods(methods, L = L, Lgreedy = Lgreedy) tokens <- norm$tokens methodArgs <- norm$methodArgs .fmCheckMethodCapabilities(tokens, "GwasSumStats") diff --git a/R/fineMappingWrappers.R b/R/fineMappingWrappers.R index 1a199f79..814e2608 100644 --- a/R/fineMappingWrappers.R +++ b/R/fineMappingWrappers.R @@ -259,6 +259,7 @@ postprocessFinemappingFits <- function(fits, dataX, dataY = NULL, minAbsCorr = 0.8, medianAbsCorr = NULL, csInput = NULL, + conditionIdx = NULL, trim = TRUE) { fits <- fits[!vapply(fits, is.null, logical(1))] if (length(fits) == 0) stop("At least one fine-mapping fit must be supplied.") @@ -280,6 +281,7 @@ postprocessFinemappingFits <- function(fits, dataX, dataY = NULL, priorEffTol = priorEffTol, minAbsCorr = minAbsCorr, medianAbsCorr = medianAbsCorr, csInput = csInput, + conditionIdx = conditionIdx, trim = trim ) }) @@ -349,6 +351,7 @@ postprocessFinemappingFit.susiF <- function(fit, method = "fsusie", csInput = NU trim = TRUE, minAbsCorr = 0.8, medianAbsCorr = NULL, + conditionIdx = NULL, csInput = c("X", "Xcorr", "fsusie")) { csInput <- match.arg(csInput) variantNames <- extractVariantNames(fit) @@ -367,7 +370,7 @@ postprocessFinemappingFit.susiF <- function(fit, method = "fsusie", csInput = NU fit, csTables, variantNames = variantNames, sumstats = sumstats, af = af, method = method, signalCutoff = 0, dataX = dataX, dataY = dataY, otherQuantities = otherQuantities, - region = region + region = region, conditionIdx = conditionIdx ) # When `trim = TRUE` we store a minimal subset of the fit on the @@ -570,7 +573,7 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, af = NULL, method, signalCutoff = 0, dataX = NULL, dataY = NULL, otherQuantities = NULL, - region = NULL) { + region = NULL, conditionIdx = NULL) { if (missing(method) || is.null(method) || length(method) != 1L || is.na(method) || !nzchar(method)) { stop("buildTopLoci: `method` is required (e.g. \"susie\", \"susieInf\").") @@ -591,10 +594,22 @@ buildTopLoci <- function(fit, csTables, variantNames, sumstats = NULL, } else NA_character_ grange <- .parseGrange(region) - # Per-variant posterior effect / SE, computed once across all variants. + # Per-variant posterior effect / SE. For a multi-condition (mvsusie) fit `mu`/ + # `mu2` are 3-D (L x variants x conditions); `conditionIdx` selects one + # condition's slice so the 2-D collapse below yields THAT condition's posterior + # (the per-context-row representation). conditionIdx = NULL keeps the 2-D path + # (univariate); a 3-D fit without a conditionIdx leaves posterior NA. alpha <- as.matrix(fit$alpha) - mu <- if (!is.null(fit$mu)) as.matrix(fit$mu) else NULL - mu2 <- if (!is.null(fit$mu2)) as.matrix(fit$mu2) else NULL + sliceCond <- function(arr) { + if (is.null(arr)) return(NULL) + if (length(dim(arr)) == 3L) { + if (is.null(conditionIdx)) return(NULL) + return(as.matrix(arr[, , conditionIdx])) + } + as.matrix(arr) + } + mu <- sliceCond(fit$mu) + mu2 <- sliceCond(fit$mu2) postMean <- if (!is.null(mu) && all(dim(alpha) == dim(mu))) { colSums(alpha * mu) } else rep(NA_real_, length(variantNames)) @@ -1038,7 +1053,9 @@ fsusieGetCs <- function(fsusieObj, X, requestedCoverage = 0.95) { fsusieWrapper <- function(X, Y, pos, L, prior, maxSnpEm, covLev, minPurity, maxScale, ...) { # Make sure fsusieR installed if (!requireNamespace("fsusieR", quietly = TRUE)) { + # nocov start stop("To use this function, please install fsusieR: https://github.com/stephenslab/fsusieR") + # nocov end } # Run fsusie fsusieObj <- fsusieR::susiF( @@ -1129,3 +1146,603 @@ fitMvsusieRss <- function(Z, R, N, prior_variance, coverage = 0.95, ...) { fitFsusie <- function(X, Y, pos, ...) { fsusieR::susiF(X = X, Y = Y, pos = pos, ...) } + +# ============================================================================= +# SuSiE / mvSuSiE / fSuSiE TWAS weight extractors +# (relocated from regularizedRegressionWrappers.R: the SuSiE-family weight +# extractors live with the rest of the fine-mapping/SuSiE wrappers). +# ============================================================================= + +# Shared helper for susie/susieAsh/susieInf weight extraction. +# @param fit A susie fit object (or NULL to fit from X, y). +# @param X Genotype matrix (optional). +# @param y Phenotype vector (optional). +# @param requiredFields Fields that must be present in the fit to extract weights. +# @param fitArgs Extra arguments passed to susieR::susie when fit is NULL. +# @param ... Additional arguments forwarded to susieR::susie. +#' @importFrom susieR coef.susie susie +#' @noRd +.susieExtractWeights <- function(fit, X, y, requiredFields, fitArgs = list(), retainFit = FALSE, ...) { + if (is.null(fit)) { + fit <- do.call(susie, c(list(X = X, y = y), fitArgs, list(...))) + } + if (!is.null(X) && length(fit$pip) != ncol(X)) { + stop(paste0( + "Dimension mismatch on number of variant in susie fit ", length(fit$pip), + " and TWAS weights ", ncol(X), ". " + )) + } + if (all(requiredFields %in% names(fit))) { + fit$intercept <- 0 + weights <- coef.susie(fit)[-1] + } else { + weights <- rep(0, length(fit$pip)) + } + if (retainFit) attr(weights, "fit") <- fit + return(weights) +} + +#' Compute SuSiE TWAS weights +#' +#' Extracts coefficients from an existing SuSiE fit or fits `susieR::susie()` +#' from `X` and `y` before extracting weights. +#' +#' @param X Genotype matrix. Required when `susieFit` is NULL. +#' @param y Phenotype vector. Required when `susieFit` is NULL. +#' @param susieFit Optional fitted SuSiE object. +#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. +#' @param ... Additional arguments passed to `susieR::susie()` when fitting. +#' @return Numeric vector of variant weights. +#' @export +susieWeights <- function(X = NULL, y = NULL, susieFit = NULL, retainFit = FALSE, ...) { + .susieExtractWeights(susieFit, X, y, + requiredFields = c("alpha", "mu", "X_column_scale_factors"), + retainFit = retainFit, ...) +} + +#' Compute SuSiE-ASH TWAS weights +#' +#' Extracts coefficients from an existing SuSiE-ASH fit or fits `susieR::susie()` +#' with `unmappable_effects = "ash"`. +#' +#' @param X Genotype matrix. Required when `susieAshFit` is NULL. +#' @param y Phenotype vector. Required when `susieAshFit` is NULL. +#' @param susieAshFit Optional fitted SuSiE-ASH object. +#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. +#' @param ... Additional arguments passed to `susieR::susie()` when fitting. +#' @return Numeric vector of variant weights. +#' @export +susieAshWeights <- function(X = NULL, y = NULL, susieAshFit = NULL, retainFit = FALSE, ...) { + .susieExtractWeights(susieAshFit, X, y, + requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), + fitArgs = list(unmappable_effects = "ash", convergence_method = "pip"), + retainFit = retainFit, ...) +} + +#' Compute SuSiE-inf TWAS weights +#' +#' Extracts coefficients from an existing SuSiE-inf fit or fits `susieR::susie()` +#' with `unmappable_effects = "inf"`. +#' +#' @section Non-zero weights with zero PIPs: +#' SuSiE-inf decomposes effects into a mappable component (driven by `alpha * +#' mu`, reported as per-variant PIPs) and an infinitesimal component (driven by +#' `theta`). When the fit converges with no mappable effects -- all `V` and `mu` +#' zero, so every `pip == 0` -- the returned weights are still non-zero because +#' `susieR::coef.susie` adds `theta / X_column_scale_factors` to the mappable +#' coefficient. This is intentional: it captures diffuse polygenic signal that +#' the mappable component could not localize to any credible set. Consumers +#' that interpret per-variant PIPs as a gate on whether to use the weights +#' should be aware that low or zero PIPs do not imply zero TWAS weights here. +#' +#' @param X Genotype matrix. Required when `susieInfFit` is NULL. +#' @param y Phenotype vector. Required when `susieInfFit` is NULL. +#' @param susieInfFit Optional fitted SuSiE-inf object. +#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. +#' @param ... Additional arguments passed to `susieR::susie()` when fitting. +#' @return Numeric vector of variant weights. +#' @export +susieInfWeights <- function(X = NULL, y = NULL, susieInfFit = NULL, retainFit = FALSE, ...) { + .susieExtractWeights(susieInfFit, X, y, + requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), + fitArgs = list(unmappable_effects = "inf", convergence_method = "pip"), + retainFit = retainFit, ...) +} +# Internal helper: extract weights from a susieRss fit. +# Mirrors .susie_extract_weights but uses the RSS interface. +#' @importFrom susieR coef.susie susie_rss +#' @noRd +.susieRssExtractWeights <- function(fit, z, R, n, + requiredFields, fitArgs = list(), + retainFit = FALSE) { + if (is.null(fit)) { + fit <- do.call(susie_rss, c(list(z = z, R = R, n = n), fitArgs)) + } + if (length(fit$pip) != nrow(R)) { + stop(paste0( + "Dimension mismatch: susieRss fit has ", length(fit$pip), + " variants but R has ", nrow(R), " rows.")) + } + if (all(requiredFields %in% names(fit))) { + fit$intercept <- 0 + weights <- coef.susie(fit)[-1] + } else { + weights <- rep(0, length(fit$pip)) + } + if (retainFit) attr(weights, "fit") <- fit + return(weights) +} + +#' Compute SuSiE-RSS TWAS weights +#' +#' Extracts coefficients from an existing SuSiE-RSS fit or fits +#' \code{susieR::susie_rss()} from summary statistics and LD. +#' +#' @param stat List with components \code{z} (z-scores), \code{n} (sample sizes). +#' @param LD LD correlation matrix. +#' @param susieRssFit Optional pre-fitted SuSiE-RSS object. +#' @param retainFit If TRUE, stores the fitted object as an attribute. +#' @param methodArgs Named list of additional arguments passed to +#' \code{susieR::susie_rss()}. Use this instead of \code{...} to avoid +#' partial matching of short argument names (e.g. \code{L}) to the +#' \code{LD} parameter. +#' @return Numeric vector of variant weights. +#' @export +susieRssWeights <- function(stat, LD, susieRssFit = NULL, retainFit = TRUE, + methodArgs = list()) { + .susieRssExtractWeights(fit = susieRssFit, z = stat$z, R = LD, n = median(stat$n), + requiredFields = c("alpha", "mu", "X_column_scale_factors"), + fitArgs = methodArgs, + retainFit = retainFit) +} + +#' Compute SuSiE-inf-RSS TWAS weights +#' +#' Extracts coefficients from an existing SuSiE-inf-RSS fit or fits +#' \code{susieR::susie_rss()} with \code{unmappable_effects = "inf"}. +#' +#' @inheritParams susieRssWeights +#' @param susieInfRssFit Optional pre-fitted SuSiE-inf-RSS object. +#' @return Numeric vector of variant weights. +#' @export +susieInfRssWeights <- function(stat, LD, susieInfRssFit = NULL, retainFit = TRUE, + methodArgs = list()) { + .susieRssExtractWeights(fit = susieInfRssFit, z = stat$z, R = LD, n = median(stat$n), + requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), + fitArgs = c(list(unmappable_effects = "inf", convergence_method = "pip"), methodArgs), + retainFit = retainFit) +} + +#' Compute SuSiE-ASH-RSS TWAS weights +#' +#' Extracts coefficients from an existing SuSiE-ASH-RSS fit or fits +#' \code{susieR::susie_rss()} with \code{unmappable_effects = "ash"}. +#' +#' @inheritParams susieRssWeights +#' @param susieAshRssFit Optional pre-fitted SuSiE-ASH-RSS object. +#' @return Numeric vector of variant weights. +#' @export +susieAshRssWeights <- function(stat, LD, susieAshRssFit = NULL, retainFit = TRUE, + methodArgs = list()) { + .susieRssExtractWeights(fit = susieAshRssFit, z = stat$z, R = LD, n = median(stat$n), + requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), + fitArgs = c(list(unmappable_effects = "ash", convergence_method = "pip"), methodArgs), + retainFit = retainFit) +} +#' Compute mvSuSiE TWAS weights +#' +#' Extracts coefficients from an existing mvSuSiE fit or fits `fitMvsusie()` +#' from `X` and `Y`. +#' +#' @param mvsusieFit Optional fitted mvSuSiE object. +#' @param X Genotype matrix. Required when `mvsusieFit` is NULL. +#' @param Y Phenotype matrix. Required when `mvsusieFit` is NULL. +#' @param priorVariance Optional mvSuSiE prior variance list. +#' @param residualVariance Optional residual variance matrix. +#' @param L Maximum number of components. +#' @param LGreedy Initial greedy number of components. +#' @param verbose If TRUE, prints mvSuSiE fitting progress. +#' @param ... Additional arguments passed to `fitMvsusie()` when fitting. +#' @return Matrix of variant weights. +#' @export +mvsusieWeights <- function(mvsusieFit = NULL, X = NULL, Y = NULL, + priorVariance = NULL, residualVariance = NULL, + L = 30, LGreedy = 5, verbose = FALSE, ...) { + if (!requireNamespace("mvsusieR", quietly = TRUE)) { + # nocov start + stop("Package 'mvsusieR' is required. Install with: devtools::install_github('stephenslab/mvsusieR')") + # nocov end + } + if (is.null(mvsusieFit)) { + message("mvsusieFit is not provided; fitting mvSuSiE now ...") + if (is.null(X) || is.null(Y)) { + stop("Both X and Y must be provided if mvsusieFit is NULL.") + } + if (is.null(priorVariance)) priorVariance <- mvsusieR::create_mixture_prior(R = ncol(Y)) + if (!is.null(LGreedy)) LGreedy <- min(LGreedy, L) + + mvsusieFit <- fitMvsusie( + X = X, Y = Y, L = L, L_greedy = LGreedy, prior_variance = priorVariance, + residual_variance = residualVariance, + estimate_residual_variance = TRUE, + verbose = verbose, ... + ) + } + return(mvsusieR::coef.mvsusie(mvsusieFit)[-1, ]) +} + +# Build the wavelet synthesis (inverse-DWT) matrix S (n_wac x nFeat) for the +# basis fSuSiE uses, by reconstructing each unit wavelet coefficient through the +# SAME $D / $C assignment as out_prep.susiF (detail columns -> $D, the coarsest +# scaling column -> last $C entry), then `wavethresh::wr`. A wavelet-coefficient +# row `c` then maps to the feature domain as `c %*% S`. `scaleCols` is the +# column index of the scaling coefficient(s) (per the prior family). fSuSiE's +# default basis (DaubLeAsymm, filter 10) matches `wavethresh::wd`'s default, the +# same one out_prep uses, so the plain `wd(rep(0, nWac))` template is consistent. +# @noRd +.fsusieSynthesisMatrix <- function(nWac, scaleCols) { + template <- wavethresh::wd(rep(0, nWac)) + reconstructUnit <- function(k) { + coeffRow <- numeric(nWac) + coeffRow[k] <- 1 + temp <- template + temp$D <- coeffRow[-scaleCols] + temp$C[length(temp$C)] <- sum(coeffRow[scaleCols]) + as.numeric(wavethresh::wr(temp)) + } + do.call(rbind, lapply(seq_len(nWac), reconstructUnit)) +} + +#' Compute fSuSiE feature-level TWAS weights +#' +#' Collapses a functional SuSiE (\code{fsusieR::susiF}) fit back to a +#' \code{variants x features} weight matrix usable for TWAS prediction of each +#' molecular feature. fSuSiE fits the regression in the wavelet domain, storing +#' per-SNP posterior-mean wavelet effects \code{fitted_wc[[l]]} +#' (\code{nSNP x n_wac}) and inclusion probabilities \code{alpha[[l]]}. Because +#' the inverse wavelet transform \code{wr()} is linear, the posterior-mean +#' prediction pushes through to a per-SNP, per-feature weight matrix: +#' \deqn{W[j, f] = \sum_l alpha[[l]][j] \cdot +#' \mathrm{wr}\!\left(fitted\_wc[[l]][j, ] / csd\_X[j]\right)[f].} +#' This is the exact analog of \code{coef.susie} for scalar SuSiE (all SNPs, +#' alpha-weighted), which spreads weight across the credible set — more robust +#' for out-of-sample TWAS than fSuSiE's in-sample lead-SNP summary +#' (\code{update_cal_indf}). +#' +#' The reconstruction uses the raw posterior wavelet coefficients +#' \code{fitted_wc}, so it is independent of the \code{post_processing} mode +#' (\code{"smash"}/\code{"TI"}/\code{"HMM"}/\code{"none"}) — that smoothing only +#' denoises the alpha-collapsed display curve \code{fitted_func}, never the +#' per-SNP predictive coefficients. The \code{$D}/\code{$C} coefficient layout +#' and wavelet basis mirror \code{out_prep.susiF}, so the feature-domain output +#' matches fSuSiE's own conventions. +#' +#' @param fsusieFit A fitted \code{fsusieR::susiF} object. Must retain +#' \code{fitted_wc}, \code{alpha}, \code{csd_X}, \code{n_wac}, and +#' \code{outing_grid} (i.e. an untrimmed fit). Required. +#' @param X,Y Accepted for call-compatibility with the multivariate +#' weight-method dispatch in \code{\link{learnTwasWeights}}, which invokes +#' every method as \code{fn(X = ., Y = ., ...)}. fSuSiE is a functional method +#' that cannot be refit from a bare \code{(X, Y)} pair (it needs feature +#' positions and the wavelet model), so these are ignored: a fitted +#' \code{fsusieFit} is always required. +#' @param variantIds Optional character vector of variant IDs (length = number +#' of SNPs in the fit) for the matrix row names. Defaults to +#' \code{names(fsusieFit$csd_X)} / \code{names(fsusieFit$pip)}. +#' @param featureNames Optional character vector of feature (outcome) names for +#' the matrix column names. Defaults to the fit's \code{outing_grid}. +#' @param retainFit If TRUE, stores the fit as an attribute on the result. +#' @return A numeric matrix of variant (rows) by feature (columns) weights. +#' @export +fsusieWeights <- function(fsusieFit = NULL, X = NULL, Y = NULL, + variantIds = NULL, featureNames = NULL, + retainFit = FALSE) { + if (is.null(fsusieFit)) { + stop("fsusieWeights: `fsusieFit` is required. fSuSiE is functional and ", + "cannot be refit from a bare (X, Y); fit it via fineMappingPipeline() ", + "and pass the fitted fsusieR::susiF object.") + } + # Fast path: a trimmed fit carries the precomputed variants x features weight + # matrix in `$coef` (fineMappingPipeline computes it eagerly while the full + # fit is in hand, because trimming drops fitted_wc/csd_X/...). Return it. + if (is.matrix(fsusieFit$coef) && + is.null(fsusieFit$fitted_wc)) { + W <- fsusieFit$coef + if (!is.null(variantIds) && length(variantIds) == nrow(W)) + rownames(W) <- variantIds + if (retainFit) attr(W, "fit") <- fsusieFit + return(W) + } + if (!requireNamespace("fsusieR", quietly = TRUE)) { + # nocov start + stop("Package 'fsusieR' is required for fsusieWeights().") + # nocov end + } + if (!requireNamespace("wavethresh", quietly = TRUE)) { + # nocov start + stop("Package 'wavethresh' is required for fsusieWeights().") + # nocov end + } + fit <- fsusieFit + missingSlots <- setdiff(c("fitted_wc", "alpha", "csd_X", "n_wac", + "outing_grid"), names(fit)) + if (length(missingSlots) > 0L) { + stop("fsusieWeights: the fSuSiE fit is missing required slot(s): ", + paste(missingSlots, collapse = ", "), + ". Pass an untrimmed fit (these are dropped when trimmed).") + } + + csdX <- as.numeric(fit$csd_X) + p <- length(csdX) + nWac <- fit$n_wac + + # alpha may be a list (one vector per effect, the fsusieR::susiF default) or + # a matrix/data.frame (L x nSNP) after fsusieWrapper reshaping. Normalize to + # a list of per-effect vectors. + alpha <- fit$alpha + alphaList <- if (is.list(alpha) && !is.data.frame(alpha)) { + lapply(alpha, as.numeric) + } else { + am <- as.matrix(alpha) + lapply(seq_len(nrow(am)), function(l) as.numeric(am[l, ])) + } + L <- length(fit$fitted_wc) + + # Scaling-coefficient column(s): the coarsest level for a per-scale prior, + # else the last column. Mirrors the two branches of out_prep.susiF. + perScale <- "mixture_normal_per_scale" %in% class(fsusieR::get_G_prior(fit)) + indxLst <- fsusieR::gen_wavelet_indx(log2(length(fit$outing_grid))) + scaleCols <- if (perScale) indxLst[[length(indxLst)]] + else ncol(as.matrix(fit$fitted_wc[[1L]])) + + # One inverse transform per wavelet coefficient (built once), then every SNP / + # effect is a matrix multiply: W = sum_l (alpha_l/csd_X-scaled fitted_wc_l) %*% S. + S <- .fsusieSynthesisMatrix(nWac, scaleCols) + nFeat <- ncol(S) + invCsd <- 1 / csdX + + W <- matrix(0, nrow = p, ncol = nFeat) + for (l in seq_len(L)) { + wc <- as.matrix(fit$fitted_wc[[l]]) + rowScale <- alphaList[[l]] * invCsd + W <- W + (rowScale * wc) %*% S + } + + rn <- variantIds + if (is.null(rn)) rn <- names(fit$csd_X) + if (is.null(rn)) rn <- names(fit$pip) + if (!is.null(rn) && length(rn) == p) rownames(W) <- rn + cn <- featureNames + if (is.null(cn) && !is.null(fit$outing_grid) && + length(fit$outing_grid) == nFeat) { + cn <- as.character(fit$outing_grid) + } + if (!is.null(cn) && length(cn) == nFeat) colnames(W) <- cn + if (retainFit) attr(W, "fit") <- fit + W +} +#' Compute mvSuSiE-RSS TWAS weights from summary statistics +#' +#' Multi-context summary-statistics analog of \code{\link{mvsusieWeights}}: +#' extracts coefficients from an existing \code{mvsusieR::mvsusie_rss} fit, +#' or fits one from \code{stat$z} (variants x conditions) and \code{LD}. +#' +#' Follows the \code{*_rss_weights(stat, LD, ...)} contract. Expects +#' \code{stat$z} to be a numeric matrix (variants x conditions) and +#' \code{stat$n} a per-context vector or scalar. +#' +#' @param stat A list with \code{z} (matrix variants x conditions) and +#' \code{n} (numeric vector or scalar). +#' @param LD LD correlation matrix. +#' @param mvsusieRssFit Optional pre-fitted \code{mvsusieRss} object. +#' @param priorVariance Optional mvSuSiE prior variance specification. +#' When NULL, \code{mvsusieR::create_mixture_prior()} is used with +#' \code{R = ncol(stat$z)}. +#' @param residualVariance Optional residual covariance matrix. +#' @param L Maximum number of single effects (default 30). +#' @param LGreedy Initial greedy effect count (default 5). +#' @param retainFit If TRUE, attaches the fitted object as an attribute. +#' @param ... Additional arguments forwarded to \code{mvsusieR::mvsusie_rss}. +#' +#' @return A numeric matrix of per-variant per-context weights +#' (variants x conditions). +#' @export +mvsusieRssWeights <- function(stat, LD, mvsusieRssFit = NULL, + priorVariance = NULL, + residualVariance = NULL, + L = 30, LGreedy = 5, + retainFit = FALSE, ...) { + if (!requireNamespace("mvsusieR", quietly = TRUE)) { + # nocov start + stop("Package 'mvsusieR' is required. ", + "Install with: devtools::install_github('stephenslab/mvsusieR')") + # nocov end + } + if (is.null(mvsusieRssFit)) { + Z <- if (is.matrix(stat$z)) stat$z else as.matrix(stat$z) + if (ncol(Z) < 2) { + stop("mvsusieRssWeights expects stat$z to have >= 2 columns ", + "(one per context). For single-context use susieRssWeights().") + } + # mvsusieR::mvsusie_rss expects N to be a single scalar + nScalar <- as.numeric(stats::median(stat$n)) + if (is.null(priorVariance)) { + priorVariance <- mvsusieR::create_mixture_prior(R = ncol(Z)) + } + if (!is.null(LGreedy)) LGreedy <- min(LGreedy, L) + mvsusieRssFit <- fitMvsusieRss( + Z = Z, R = LD, N = nScalar, + prior_variance = priorVariance, + residual_variance = residualVariance, ... + ) + } + weights <- mvsusieR::coef.mvsusie(mvsusieRssFit)[-1, , drop = FALSE] + if (retainFit) attr(weights, "fit") <- mvsusieRssFit + weights +} + +# ============================================================================= +# Cross-condition credible-set merging +# ============================================================================= + +#' Merge SuSiE credible sets across conditions +#' +#' Reconciles per-condition (univariate) SuSiE fine-mapping into a single set of +#' merged credible sets. Each row of the supplied +#' \code{\link{QtlFineMappingResult}} is treated as one condition (its +#' \code{topLoci} carrying that condition's credible sets); credible sets that +#' share variants across conditions are unioned via connected components, and +#' every variant is reported with its merged credible-set label plus the maximum +#' and median PIP across the conditions it appears in. A typical use is selecting +#' a representative lead variant per merged credible set to assemble the +#' \code{"strong"} input for \code{\link{mashPipeline}}. +#' +#' @param fineMappingResult A \code{\link{QtlFineMappingResult}} (or any +#' \code{FineMappingResult}) produced by per-condition SuSiE fine-mapping. Each +#' entry's \code{topLoci} must carry a credible-set column +#' (\code{cs_}, e.g. \code{cs_95}, with values such as +#' \code{"susie_1"} where the trailing integer is the set index and \code{_0} +#' means "not in a credible set") and a PIP column. +#' @param coverage Credible-set coverage level selecting the \code{cs_*} column +#' (default \code{0.95} -> \code{cs_95}). +#' @return A \code{data.frame} with one row per variant: \code{variant_id}, +#' \code{credibleSetNames} (the merged credible-set label), \code{maxPip} and +#' \code{medianPip}; or \code{NULL} when no credible sets are present. +#' @seealso \code{\link{fineMappingPipeline}}, \code{\link{mashPipeline}} +#' @importFrom purrr map_dfr +#' @importFrom stats median +#' @export +mergeSusieCs <- function(fineMappingResult, coverage = 0.95) { + if (!is(fineMappingResult, "FineMappingResultBase")) { + stop("`fineMappingResult` must be a QtlFineMappingResult (or FineMappingResult).") + } + csCol <- paste0("cs_", as.integer(round(coverage * 100))) + + # Identify variant IDs that are associated with more than one credible set. + identifyOverlapSets <- function(variantsSetsAndPipsList) { + overlapSets <- list() + for (variantId in names(variantsSetsAndPipsList)) { + sets <- variantsSetsAndPipsList[[variantId]][["sets"]] + if (length(sets) > 1) { + overlapSets[[variantId]] <- sets + } + } + return(overlapSets) + } + # Merge overlapping credible sets using connected components. + mergeAndUpdateOverlapSets <- function(variantsSetsAndPipsList, overlapSets) { + allSets <- unique(unlist(overlapSets)) + if (length(allSets) == 0) return(list()) + + parent <- setNames(allSets, allSets) + findRoot <- function(x) { + while (!identical(parent[[x]], x)) x <- parent[[x]] + x + } + unionSets <- function(a, b) { + rootA <- findRoot(a) + rootB <- findRoot(b) + if (!identical(rootA, rootB)) parent[[rootB]] <<- rootA + } + + for (sets in overlapSets) { + if (length(sets) > 1) { + for (s in sets[-1]) unionSets(sets[[1]], s) + } + } + + components <- split(names(parent), vapply(names(parent), findRoot, character(1))) + setNameMap <- list() + for (members in components) { + label <- paste(sort(members), collapse = ",") + for (s in members) { + setNameMap[[s]] <- label + } + } + + # Update each variant's credible set names + updatedCredibleSets <- lapply( + setNames(names(variantsSetsAndPipsList), names(variantsSetsAndPipsList)), + function(variantId) { + currentSets <- variantsSetsAndPipsList[[variantId]][["sets"]] + mapped <- intersect(currentSets, names(setNameMap)) + if (length(mapped) > 0) { + setNameMap[[mapped[1]]] + } else { + paste(sort(unique(currentSets)), collapse = ",") + } + } + ) + return(updatedCredibleSets) + } + + # Each row (entry) of the fine-mapping result is one condition. Build a flat + # data frame of (variant_id, pip, set_name) across conditions, giving each + # condition's credible sets a unique "cs__" label. + extractTopLoci <- function() { + entries <- fineMappingResult$entry + rows <- map_dfr(seq_along(entries), function(i) { + topLoci <- .translateLegacyTopLociCsColumns(getTopLoci(entries[[i]])) + if (is.null(topLoci) || nrow(topLoci) == 0 || !(csCol %in% names(topLoci))) + return(NULL) + pipCol <- resolvePipColumn(topLoci) + if (is.null(pipCol)) return(NULL) + csIdx <- .fmCsIdx(topLoci[[csCol]]) + setNum <- unique(csIdx) + setNum <- setNum[!is.na(setNum) & setNum != 0] + if (length(setNum) == 0) return(NULL) + + map_dfr(setNum, function(sn) { + keep <- !is.na(csIdx) & csIdx == sn + df <- topLoci[keep, c("variant_id", pipCol), drop = FALSE] + names(df)[names(df) == pipCol] <- "pip" + df$set_name <- paste0("cs_", i, "_", sn) + df + }) + }) + + if (is.null(rows) || nrow(rows) == 0) return(list()) + + # Aggregate by variant_id preserving first-seen order. + seenOrder <- unique(rows$variant_id) + splitRows <- split(rows, factor(rows$variant_id, levels = seenOrder)) + lapply(splitRows, function(df) { + list(sets = df$set_name, pips = df$pip) + }) + } + + combineTopLoci <- function(extractedResult) { + if (length(extractedResult) == 0) return(NULL) + + overlapSets <- identifyOverlapSets(extractedResult) + hasOverlaps <- length(overlapSets) != 0 + mergedSets <- if (hasOverlaps) { + mergeAndUpdateOverlapSets(extractedResult, overlapSets = overlapSets) + } else { + NULL + } + + topLociDf <- do.call(rbind, lapply(names(extractedResult), function(variantId) { + maxPip <- max(unlist(extractedResult[[variantId]]$pips)) + medianPip <- median(unlist(extractedResult[[variantId]]$pips)) + credibleSetNames <- if (hasOverlaps) { + mergedSets[[variantId]] + } else { + paste(sort(unique(unlist(extractedResult[[variantId]]$sets))), collapse = ",") + } + data.frame( + variant_id = variantId, credibleSetNames = credibleSetNames, + maxPip = maxPip, medianPip = medianPip, stringsAsFactors = FALSE + ) + })) + return(topLociDf) + } + + extractedTopLoci <- extractTopLoci() + if (length(extractedTopLoci) == 0) return(NULL) + combinedTopLociDf <- combineTopLoci(extractedTopLoci) + if (is.null(combinedTopLociDf) || nrow(combinedTopLociDf) == 0) return(NULL) + combinedTopLociDf <- combinedTopLociDf[!duplicated(combinedTopLociDf$variant_id), ] + rownames(combinedTopLociDf) <- NULL + return(combinedTopLociDf) +} diff --git a/R/genotypeIo.R b/R/genotypeIo.R index 68c6d975..c8d481c0 100644 --- a/R/genotypeIo.R +++ b/R/genotypeIo.R @@ -40,10 +40,12 @@ setMethod("readGenotypes", #' @keywords internal .makeGdsHandle <- function(path) { + # nocov start if (!requireNamespace("SNPRelate", quietly = TRUE)) stop("Package 'SNPRelate' is required for reading GDS files.") if (!requireNamespace("gdsfmt", quietly = TRUE)) stop("Package 'gdsfmt' is required for reading GDS files.") + # nocov end if (!file.exists(path)) stop("GDS file not found: ", path) @@ -67,8 +69,10 @@ setMethod("readGenotypes", #' @keywords internal .makeVcfHandle <- function(path, ...) { + # nocov start if (!requireNamespace("VariantAnnotation", quietly = TRUE)) stop("Package 'VariantAnnotation' is required for reading VCF files.") + # nocov end if (!file.exists(path)) stop("VCF file not found: ", path) @@ -103,8 +107,10 @@ setMethod("readGenotypes", #' @keywords internal .makePlink1Handle <- function(path, ...) { + # nocov start if (!requireNamespace("snpStats", quietly = TRUE)) stop("Package 'snpStats' is required for reading plink1 files.") + # nocov end stem <- .plinkStem(path) bedFile <- paste0(stem, ".bed") @@ -150,8 +156,10 @@ setMethod("readGenotypes", #' @keywords internal .makePlink2Handle <- function(path, ...) { + # nocov start if (!requireNamespace("pgenlibr", quietly = TRUE)) stop("Package 'pgenlibr' is required for reading plink2 files.") + # nocov end stem <- .plinkStem(path) @@ -624,7 +632,9 @@ readFam <- function(bed) { # open bed/bim/fam: A PLINK 1 .bed is a valid .pgen openBed <- function(bed) { if (!requireNamespace("pgenlibr", quietly = TRUE)) { + # nocov start stop("To use this function, please install pgenlibr: https://cran.r-project.org/web/packages/pgenlibr/index.html") + # nocov end } rawSCt <- nrow(readFam(bed)) return(pgenlibr::NewPgen(bed, raw_sample_ct = rawSCt)) @@ -798,7 +808,9 @@ resolvePlink2Paths <- function(prefix) { #' @noRd readPvar <- function(pvarPath) { if (!requireNamespace("pgenlibr", quietly = TRUE)) { + # nocov start stop("pgenlibr is required. Install from https://cran.r-project.org/web/packages/pgenlibr/index.html") + # nocov end } pvar <- pgenlibr::NewPvar(pvarPath) on.exit(pgenlibr::ClosePvar(pvar), add = TRUE) diff --git a/R/jointEngine.R b/R/jointEngine.R new file mode 100644 index 00000000..9ec0e747 --- /dev/null +++ b/R/jointEngine.R @@ -0,0 +1,905 @@ +# ============================================================================= +# Joint-analysis engine (Phase 2; dev/jointSpecification-s4-refactor.md) +# ----------------------------------------------------------------------------- +# Replaces the ~14 hand-written joint-dispatch leaf functions with: the uniform +# JointGroup contract (R/JointGroup.R), per-(dataForm, pipeline) `fitJointGroup` +# methods, one enumerator per (pattern, dataForm), one `.jointDispatchTable` +# wiring row per valid cell, and the `.runJointCell` engine. +# +# Identity model: a group's `conditions` data.frame (one row per Y/Z column) +# carries each fitted condition's (study, context, trait). The output row keying +# is DERIVED -- an axis that varies across conditions collapses to "joint" with +# members in jointStudies/jointContexts/jointTraits; a constant axis keeps its +# value. cross-context / cross-trait / cross-study are the single-varying-axis +# case; composed is >1 varying axis. Fitters are shared across patterns; only +# enumeration differs. +# ============================================================================= + +#' @include AllGenerics.R JointGroup.R +NULL + +# ---- identity derivation ---------------------------------------------------- + +# The data-driven-prior LOOKUP key for a group's conditions: a varying (jointed) +# axis -> NULL (match-any, because the shared joint mr.mash fit lives on every +# per-context row), a constant axis -> its single value. Used only to find the +# mr.mash fit; the OUTPUT rows carry each condition's REAL (study, context, +# trait). +.jointPriorKey <- function(conditions) { + axisVal <- function(ax) { + u <- unique(as.character(conditions[[ax]])) + if (length(u) > 1L) NULL else u[[1L]] + } + list(study = axisVal("study"), context = axisVal("context"), + trait = axisVal("trait")) +} + +# The ";"-joined distinct members of a varying axis (the per-row provenance tag +# jointStudies/Contexts/Traits), or NA when the axis is constant. +.jointAxisMembers <- function(conditions, ax) { + u <- unique(as.character(conditions[[ax]])) + if (length(u) > 1L) paste(u, collapse = ";") else NA_character_ +} + +# Slice a fine-mapping per-method CV payload (.fmSliceCv output: +# list(samplePartition, prediction = list(_predicted = sample x condition), +# performance = list(_performance = condition x 6))) down to one condition r, +# so each per-context FineMappingEntry carries that context's CV. +.fmSliceCvCondition <- function(cv, r) { + if (is.null(cv)) return(NULL) + out <- list(samplePartition = cv$samplePartition) + if (!is.null(cv$prediction)) + out$prediction <- lapply(cv$prediction, function(m) m[, r, drop = FALSE]) + if (!is.null(cv$performance)) + out$performance <- lapply(cv$performance, function(m) m[r, , drop = FALSE]) + out +} + +# Slice a twas joint cvResult (.jointTwasCvResult output: list(samplePartition, +# predictions = sample x condition, metrics = condition x 6, foldFits)) to one +# condition r. The per-fold mr.mash fits span all conditions, so foldFits is +# shared unchanged. +.sliceTwasCvResultToCondition <- function(cvRes, r) { + if (is.null(cvRes)) return(NULL) + list(samplePartition = cvRes$samplePartition, + predictions = if (!is.null(cvRes$predictions)) + cvRes$predictions[, r, drop = TRUE] else NULL, + metrics = if (!is.null(cvRes$metrics)) + cvRes$metrics[r, , drop = TRUE] else NULL, + foldFits = cvRes$foldFits) +} + +# Mutable accumulator for the joint rows the engine assembles; each add() +# appends one fitted group as one result row, deriving its identity + joint* +# members from the group's conditions. +.jointRows <- function() { + e <- new.env(parent = emptyenv()) + e$study <- character(0); e$context <- character(0); e$trait <- character(0) + e$method <- character(0); e$entries <- list() + e$jointStudies <- character(0); e$jointContexts <- character(0) + e$jointTraits <- character(0) + e$add <- function(study, context, trait, method, entry, + jointStudies = NA_character_, + jointContexts = NA_character_, + jointTraits = NA_character_) { + e$study <- c(e$study, study) + e$context <- c(e$context, context) + e$trait <- c(e$trait, trait) + e$method <- c(e$method, method) + e$entries[[length(e$entries) + 1L]] <- entry + e$jointStudies <- c(e$jointStudies, jointStudies) + e$jointContexts <- c(e$jointContexts, jointContexts) + e$jointTraits <- c(e$jointTraits, jointTraits) + } + e +} + +# ---- fitters (fitJointGroup) ------------------------------------------------ + +# (individual, fine-mapping) -> mvSuSiE joint fit + honest per-fold CV prior. +setMethod("fitJointGroup", signature("IndividualJointGroup", "FmJointPipeline"), + function(group, pipeline, token, args) { + cfg <- pipeline@config + Xc <- group@X; Yc <- group@Y; nCond <- ncol(Yc) + # fsusie: functional SuSiE over the trait domain (cross-trait, individual- + # level only -- no multi-context, no RSS). One per-condition entry per trait + # (no data-driven prior); uses the enumerator-stored per-trait `pos`. + if (identical(token, "fsusie")) { + if (length(group@pos) != nCond) + stop("fitJointGroup: fsusie requires per-trait positions ('pos'); ", + "it is cross-trait individual-level only.") + verbose <- if (is.null(cfg$verbose)) 1 else cfg$verbose + fit <- do.call(fitFsusie, + .fmMergeUserArgs(list(X = Xc, Y = Yc, pos = group@pos), + "fsusie", args$methodArgs[["fsusie"]])) + # Collapse the functional fit to a variants x features weight matrix now, + # while fitted_wc/csd_X are still present (trimming drops them); store on + # $coef so a trimmed fit can still yield TWAS weights (port of mvJobs). + fit$coef <- tryCatch( + fsusieWeights(fsusieFit = fit, variantIds = colnames(Xc)), + error = function(e) NULL) + fit <- .setFinemappingFitClass(fit, "fsusie") + cvM <- NULL + cvFolds <- if (is.null(cfg$cvFolds)) 0L else cfg$cvFolds + if (cvFolds > 1L) { + cv <- .fmCrossValidate(Xc, Yc, "fsusie", args$methodArgs, cvFolds, + samplePartition = cfg$samplePartition, + coverage = cfg$coverage, pos = group@pos, + verbose = verbose) + cvM <- .fmSliceCv(cv, "fsusie") + } + return(lapply(seq_len(nCond), function(r) { + e <- .fmPostprocessOne( + fit = fit, method = "fsusie", dataX = Xc, dataY = NULL, conditionIdx = r, + coverage = cfg$coverage, secondaryCoverage = cfg$secondaryCoverage, + signalCutoff = cfg$signalCutoff, minAbsCorr = cfg$minAbsCorr, + csInput = "fsusie") + if (!is.null(cvM)) e <- .fmAttachCv(e, .fmSliceCvCondition(cvM, r)) + e + })) + } + if (!identical(token, "mvsusie")) + stop("fitJointGroup(IndividualJointGroup, FmJointPipeline): unsupported ", + "token '", token, "' (expected 'mvsusie' or 'fsusie').") + ddCut <- if (is.null(cfg$dataDrivenPriorWeightsCutoff)) 1e-10 + else cfg$dataDrivenPriorWeightsCutoff + verbose <- if (is.null(cfg$verbose)) 1 else cfg$verbose + # SER pre-screen: drop conditions with no single-effect signal before the + # joint fit (port of mvJobs' skipConditions). < 2 survivors -> skip the whole + # joint (an all-NULL list, which .runJointCell turns into zero rows). The fit, + # prior, and CV all run on the surviving columns only; each survivor's per- + # condition posterior is later sliced at its position in the fitted set. + keep <- rep(TRUE, nCond) + if (.fmScreenActive(args$pipCutoffToSkip)) { + keep <- as.logical(.fmSerScreenColumns(Xc, Yc, args$pipCutoffToSkip)) + if (sum(keep) < 2L) { + if (verbose >= 1) + message(sprintf( + "Skipping mvsusie joint fit: < 2 of %d conditions pass the SER pre-screen.", + nCond)) + return(vector("list", nCond)) + } + if (sum(keep) < nCond && verbose >= 1) + message(sprintf( + "mvsusie joint fit: SER pre-screen kept %d of %d conditions.", + sum(keep), nCond)) + } + survivors <- which(keep) + Ys <- Yc[, survivors, drop = FALSE] + key <- .jointPriorKey(group@conditions) + mvFitParts <- .fmLookupMrmashFit(args$twasWeights, key$study, key$trait, + context = key$context) + mvCv <- .fmLookupMrmashCv(args$twasWeights, key$study, key$trait, + context = key$context) + mvPrior <- .buildMvsusieReweightedPrior(mvFitParts, colnames(Ys), ddCut) + mvBaseArgs <- list(X = Xc, Y = Ys, + prior_variance = mvPrior$priorVariance, + coverage = cfg$coverage) + if (!is.null(mvPrior$residualVariance)) + mvBaseArgs$residual_variance <- mvPrior$residualVariance + fit <- do.call(fitMvsusie, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + args$methodArgs[["mvsusie"]])) + fit <- .setFinemappingFitClass(fit, "mvsusie") + cvM <- NULL + cvFolds <- if (is.null(cfg$cvFolds)) 0L else cfg$cvFolds + if (cvFolds > 1L) { + sp <- cfg$samplePartition + if (is.null(sp) && !is.null(mvCv)) sp <- mvCv$samplePartition + mvPriorCv <- .fmBuildMvsusiePriorCv(mvCv, mvFitParts, colnames(Ys), ddCut) + cv <- .fmCrossValidate(Xc, Ys, "mvsusie", args$methodArgs, cvFolds, + samplePartition = sp, coverage = cfg$coverage, + verbose = verbose, mvPrior = mvPrior, + mvPriorCv = mvPriorCv) + cvM <- .fmSliceCv(cv, "mvsusie") + } + # One per-context entry per ORIGINAL condition: NULL for screened-out columns + # (skipped downstream), else that condition's posterior (sliced at its + # position in the fitted survivor set) with shared pip/cs + its CV slice. + lapply(seq_len(nCond), function(i) { + if (!keep[i]) return(NULL) + r <- match(i, survivors) + e <- .fmPostprocessOne( + fit = fit, method = "mvsusie", dataX = Xc, dataY = NULL, conditionIdx = r, + coverage = cfg$coverage, secondaryCoverage = cfg$secondaryCoverage, + signalCutoff = cfg$signalCutoff, minAbsCorr = cfg$minAbsCorr, + csInput = "X") + if (!is.null(cvM)) e <- .fmAttachCv(e, .fmSliceCvCondition(cvM, r)) + e + }) + }) + +# (sumstats, fine-mapping) -> mvSuSiE-rss joint fit. RSS has no sample folds and +# no fsusie variant. +setMethod("fitJointGroup", signature("SumStatsJointGroup", "FmJointPipeline"), + function(group, pipeline, token, args) { + if (identical(token, "fsusie")) + stop("fsusie has no RSS variant; it requires individual-level input.") + if (!identical(token, "mvsusie")) + stop("fitJointGroup(SumStatsJointGroup, FmJointPipeline): unsupported ", + "token '", token, "' (expected 'mvsusie').") + cfg <- pipeline@config + ddCut <- if (is.null(cfg$dataDrivenPriorWeightsCutoff)) 1e-10 + else cfg$dataDrivenPriorWeightsCutoff + key <- .jointPriorKey(group@conditions) + mvFitParts <- .fmLookupMrmashFit(args$twasWeights, key$study, key$trait, + context = key$context) + mvPrior <- .buildMvsusieReweightedPrior(mvFitParts, colnames(group@Z), ddCut) + mvBaseArgs <- list(Z = group@Z, R = group@R, + N = as.numeric(stats::median(group@N)), + prior_variance = mvPrior$priorVariance, + coverage = cfg$coverage) + if (!is.null(mvPrior$residualVariance)) + mvBaseArgs$residual_variance <- mvPrior$residualVariance + fit <- do.call(fitMvsusieRss, + .fmMergeUserArgs(mvBaseArgs, "mvsusie", + args$methodArgs[["mvsusie"]])) + fit <- .setFinemappingFitClass(fit, "mvsusie") + # One per-condition entry (RSS has no sample folds). + lapply(seq_len(ncol(group@Z)), function(r) .fmPostprocessOne( + fit = fit, method = "mvsusie", dataX = group@R, dataY = NULL, + conditionIdx = r, coverage = cfg$coverage, + secondaryCoverage = cfg$secondaryCoverage, signalCutoff = cfg$signalCutoff, + minAbsCorr = cfg$minAbsCorr, csInput = "Xcorr")) + }) + +# Reshape a twasWeightsCv() result into the single joint entry's cvResult: the +# out-of-fold prediction matrix, the per-condition metric rows, and the per-fold +# mr.mash fits (named fold_) that fineMappingPipeline's mvSuSiE path consumes. +.jointTwasCvResult <- function(cv, token) { + if (is.null(cv)) return(NULL) + pickByBase <- function(lst) { + if (is.null(lst) || length(lst) == 0L) return(NULL) + bare <- sub("(_predicted|Predicted|_performance|Performance)$", "", names(lst)) + hit <- which(bare == token) + if (length(hit) == 0L) NULL else lst[[hit[[1L]]]] + } + ffKey <- paste0(token, "_weights") + foldFits <- if (!is.null(cv$foldFits)) { + ff <- lapply(cv$foldFits, function(f) f[[ffKey]]) + if (all(vapply(ff, is.null, logical(1)))) NULL else ff + } else NULL + list(samplePartition = cv$samplePartition, + predictions = pickByBase(cv$prediction), + metrics = pickByBase(cv$performance), + foldFits = foldFits) +} + +# learnTwasWeights key for a bare token (fine-mapping tokens key differently, +# e.g. susieInf -> susie_inf_weights). +.twasMethodKey <- function(token) { + ad <- .twasFineMappingMethodAdapters[[token]] + if (!is.null(ad)) ad$methodKey else paste0(token, "_weights") +} + +# Fine-mapping CV handoff for one twas method: extract that method's out-of-fold +# predictions + performance from fineMappingPipeline's retained CV (shared fold +# partition), shaped like .jointTwasCvResult so the per-condition slice reuses it +# instead of re-cross-validating an FM-derived method (susie / mvsusie / ...). +.twasFmHandoffCv <- function(fineMappingCv, token) { + if (is.null(fineMappingCv) || is.null(fineMappingCv$prediction)) return(NULL) + base <- sub("(_predicted|Predicted)$", "", names(fineMappingCv$prediction)) + hit <- which(base == token) + if (length(hit) == 0L) return(NULL) + pBase <- sub("(_performance|Performance)$", "", names(fineMappingCv$performance)) + pHit <- which(pBase == token) + list(samplePartition = fineMappingCv$samplePartition, + predictions = fineMappingCv$prediction[[hit[[1L]]]], + metrics = if (length(pHit)) fineMappingCv$performance[[pHit[[1L]]]] else NULL, + foldFits = NULL) +} + +# (individual, twas) -> ONE weight method fit over the group's conditions, as +# per-condition entries (sliced from the variants x conditions weight matrix), +# each with its full-data weights + retained fit + per-condition CV slice. This +# is the SHARED per-method twas fitting (one method per call, like the FM +# fitters); the SR-TWAS ensemble combines methods in a layer above (see +# .twasEnsembleLayer). Owns the orchestration formerly in .twasWeightsPipelineMatrix: +# FM-fit injection (FM-derived tokens extract from the precomputed fit), the FM +# CV handoff (reuse fine-mapping's own CV), spike-and-slab pi from an internal +# mr.ash fit, CV knobs, and fitFullData = FALSE (CV-only) entries. +setMethod("fitJointGroup", signature("IndividualJointGroup", "TwasJointPipeline"), + function(group, pipeline, token, args) { + cfg <- pipeline@config + Xc <- group@X; Yc <- group@Y; nCond <- ncol(Yc) + cond <- group@conditions + fitFullData <- if (is.null(cfg$fitFullData)) TRUE else isTRUE(cfg$fitFullData) + cvFolds <- if (is.null(cfg$cvFolds)) 0L else cfg$cvFolds + rfd <- if (is.null(cfg$retainFitDetail)) "slim" else cfg$retainFitDetail + verbose <- if (is.null(cfg$verbose)) 1 else cfg$verbose + stdz <- isTRUE(cfg$standardized) + estimatePi <- isTRUE(cfg$estimatePi) + methodKey <- .twasMethodKey(token) + # Method args: prefer the full methodList (the unified pipeline path), fall + # back to methodArgs (the explicit-jointSpec dispatchers). + ma <- if (!is.null(args$methodList) && methodKey %in% names(args$methodList)) + args$methodList[[methodKey]] + else if (!is.null(args$methodArgs)) args$methodArgs[[methodKey]] else NULL + if (is.null(ma)) ma <- list() + fittedModels <- if (!is.null(args$fittedModels)) args$fittedModels else list() + # FM-fit injection: an FM-derived token extracts its weights from the + # precomputed fine-mapping fit rather than refitting. + adapter <- .twasFineMappingMethodAdapters[[token]] + if (!is.null(adapter) && !is.null(fittedModels[[token]]) && + is.null(ma[[adapter$fitArg]])) { + ma[[adapter$fitArg]] <- fittedModels[[token]] + } + # Spike-and-slab pi from an internal mr.ash fit (self-contained per method). + if (estimatePi && token %in% c("bayes_c", "bayes_b")) { + mrA <- learnTwasWeights(Xc, Yc, weightMethods = list(mrash_weights = list()), + study = as.character(cond$study[1L]), + context = as.character(cond$context[1L]), + trait = as.character(cond$trait[1L]), + retainFits = TRUE, standardized = stdz, + dataType = cfg$dataType, verbose = 0) + piHat <- as.numeric(estimateSparsity(mrA)) + if (token == "bayes_c" && is.null(ma$pi)) ma$pi <- piHat + if (token == "bayes_b" && is.null(ma$probIn)) ma$probIn <- piHat + } + wm <- setNames(list(ma), methodKey) + + W <- NULL; fitParts <- NULL; vids <- colnames(Xc) + if (fitFullData) { + tw <- learnTwasWeights(Xc, Yc, weightMethods = wm, + study = as.character(cond$study[1L]), + context = as.character(cond$context[1L]), + trait = as.character(cond$trait[1L]), + fittedModels = fittedModels, + retainFits = TRUE, retainFitDetail = rfd, + standardized = stdz, dataType = cfg$dataType, verbose = verbose) + base <- tw$entry[[1L]] + W <- getWeights(base) + if (!is.matrix(W)) + W <- matrix(W, ncol = nCond, dimnames = list(getVariantIds(base), NULL)) + fitParts <- getFits(base); vids <- getVariantIds(base) + } + cvRes <- NULL + if (cvFolds > 1L) { + # FM-derived method: reuse fine-mapping's own CV (shared partition); a + # method whose full-data weights are all zero is skipped (nothing to CV). + cvRes <- .twasFmHandoffCv(args$fineMappingCv, token) + if (is.null(cvRes) && !(!is.null(W) && all(W == 0))) { + # Leakage guard: a single full-data data-driven mr.mash prior reused + # across folds means each fold's prior saw its own held-out samples. + if (is.null(args$dataDrivenPriorMatricesCv) && + !is.null(ma$dataDrivenPriorMatrices)) { + warning("Cross-validating mr.mash with a single data-driven prior ", + "computed on the full data: the same prior is reused for ", + "every fold, so each fold's prior was informed by its own ", + "held-out samples (information leakage). Supply per-fold ", + "priors via dataDrivenPriorMatricesCv (--mixture-prior-cv) ", + "for honest cross-validation.") + } + sp <- if (!is.null(args$samplePartition)) args$samplePartition + else cfg$samplePartition + mcv <- if (is.null(cfg$maxCvVariants) || cfg$maxCvVariants <= 0) Inf + else cfg$maxCvVariants + cv <- twasWeightsCv(Xc, Yc, fold = cvFolds, samplePartitions = sp, + weightMethods = wm, retainFits = TRUE, maxNumVariants = mcv, + numThreads = if (is.null(cfg$cvThreads)) 1 else cfg$cvThreads, + data_driven_prior_matrices_cv = args$dataDrivenPriorMatricesCv, + verbose = verbose) + cvRes <- .jointTwasCvResult(cv, token) + } + } + # One per-condition entry: that condition's weight column + the shared fit + + # its CV slice. fitFullData = FALSE -> CV-only entry. + lapply(seq_len(nCond), function(r) { + cvR <- if (!is.null(cvRes)) .sliceTwasCvResultToCondition(cvRes, r) else NULL + if (is.null(W)) { + TwasWeightsEntry(variantIds = character(0), weights = NULL, + cvResult = cvR, standardized = stdz, + dataType = cfg$dataType) + } else { + TwasWeightsEntry(variantIds = vids, weights = W[, r], fits = fitParts, + cvResult = cvR, standardized = stdz, + dataType = cfg$dataType) + } + }) + }) + +# (sumstats, twas) -> mr.mash-rss joint fit as ONE matrix entry. No sample folds. +setMethod("fitJointGroup", signature("SumStatsJointGroup", "TwasJointPipeline"), + function(group, pipeline, token, args) { + cfg <- pipeline@config + rfd <- if (is.null(cfg$retainFitDetail)) "slim" else cfg$retainFitDetail + weights <- mrmashRssWeights(stat = list(z = group@Z, N = group@N), + LD = group@R, retainFit = TRUE, fitDetail = rfd) + vids <- rownames(weights); if (is.null(vids)) vids <- rownames(group@Z) + fitParts <- attr(weights, "fit") + if (!is.matrix(weights)) + weights <- matrix(weights, ncol = ncol(group@Z), dimnames = list(vids, NULL)) + # One per-condition entry: that condition's weight column + the shared fit. + lapply(seq_len(ncol(weights)), function(r) + TwasWeightsEntry(variantIds = vids, weights = weights[, r], fits = fitParts, + standardized = TRUE, dataType = cfg$dataType)) + }) + +# ---- result construction (construct) ---------------------------------------- + +# Both pipelines assemble identically-shaped joint rows; only the result +# collection differs (the axis-3 divergence the markers encode). Only the joint* +# columns for axes that actually vary are attached. +.constructJointArgs <- function(pipeline, rows) { + a <- list(study = rows$study, context = rows$context, trait = rows$trait, + method = rows$method, entry = rows$entries, + ldSketch = pipeline@config$ldSketch) + if (any(!is.na(rows$jointStudies))) a$jointStudies <- rows$jointStudies + if (any(!is.na(rows$jointContexts))) a$jointContexts <- rows$jointContexts + if (any(!is.na(rows$jointTraits))) a$jointTraits <- rows$jointTraits + a +} + +setMethod("construct", "FmJointPipeline", + function(pipeline, rows, ...) { + if (length(rows$entries) == 0L) return(NULL) + do.call(QtlFineMappingResult, .constructJointArgs(pipeline, rows)) + }) + +setMethod("construct", "TwasJointPipeline", + function(pipeline, rows, ...) { + if (length(rows$entries) == 0L) return(NULL) + do.call(TwasWeights, .constructJointArgs(pipeline, rows)) + }) + +# ---- enumerators (pattern x dataForm -> list) -------------------- + +# cross-context / individual: one group per scoped trait present in >= 2 scoped +# contexts (the conditions are those (study, context, trait) rows). +.enumCrossContextIndividual <- function(data, scope, args = list()) { + study <- getStudy(data) + if (!(study %in% scope$studies)) return(list()) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + if (length(scopedContexts) < 2L) return(list()) + verbose <- if (is.null(args$verbose)) 1 else args$verbose + groups <- list() + for (tid in scopedTraits) { + xy <- .buildIndividualCrossContextXY( + data, tid, scopedContexts, args$cisWindow, verbose, + label = "jointCrossContext", region = args$region) + if (is.null(xy)) next + groups[[length(groups) + 1L]] <- new("IndividualJointGroup", + conditions = data.frame(study = study, context = xy$perTraitContexts, + trait = tid, stringsAsFactors = FALSE), + X = xy$X, Y = xy$Y) + } + groups +} + +# cross-context / sumstats. +.enumCrossContextSumstats <- function(data, scope, args = list()) { + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + groups <- list() + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + if (length(scopedContexts) < 2L) next + for (tid in scopedTraits) { + tupleRows <- which(studyCol == s & traitCol == tid & + contextCol %in% scopedContexts) + if (length(tupleRows) < 2L) next + ctxNames <- contextCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, ctxNames, + errorLabel = "jointCrossContext (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + groups[[length(groups) + 1L]] <- new("SumStatsJointGroup", + conditions = data.frame(study = s, context = ctxNames, trait = tid, + stringsAsFactors = FALSE), + Z = jz$Z, R = ldMat, N = jz$nVec) + } + } + groups +} + +# cross-trait / individual: one group per scoped context with >= 2 scoped traits. +.enumCrossTraitIndividual <- function(data, scope, args = list()) { + study <- getStudy(data) + if (!(study %in% scope$studies)) return(list()) + scopedContexts <- scope$contexts[[study]] + scopedTraits <- scope$traits[[study]] + verbose <- if (is.null(args$verbose)) 1 else args$verbose + groups <- list() + for (cx in scopedContexts) { + xy <- .buildIndividualCrossTraitXY( + data, cx, scopedTraits, args$cisWindow, verbose, + label = "jointCrossTrait", study = study, region = args$region) + if (is.null(xy)) next + # Functional positions (one per trait column) for fsusie's domain; mvsusie + # ignores them. Matches the trait order of Y. + rr <- SummarizedExperiment::rowRanges(xy$se) + rr <- rr[match(colnames(xy$Y), rownames(xy$se))] + pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 + groups[[length(groups) + 1L]] <- new("IndividualJointGroup", + conditions = data.frame(study = study, context = cx, + trait = xy$traitsHere, stringsAsFactors = FALSE), + X = xy$X, Y = xy$Y, pos = as.numeric(pos)) + } + groups +} + +# cross-trait / sumstats. +.enumCrossTraitSumstats <- function(data, scope, args = list()) { + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + groups <- list() + for (s in scope$studies) { + scopedContexts <- scope$contexts[[s]] + scopedTraits <- scope$traits[[s]] + for (cx in scopedContexts) { + tupleRows <- which(studyCol == s & contextCol == cx & + traitCol %in% scopedTraits) + if (length(tupleRows) < 2L) next + trNames <- traitCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, trNames, errorLabel = "jointCrossTrait (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + groups[[length(groups) + 1L]] <- new("SumStatsJointGroup", + conditions = data.frame(study = s, context = cx, trait = trNames, + stringsAsFactors = FALSE), + Z = jz$Z, R = ldMat, N = jz$nVec) + } + } + groups +} + +# cross-study / sumstats (no individual form: individual-level studies have +# disjoint samples). One group per (context, trait) present in >= 2 scoped +# studies; the study axis varies -> "joint" + jointStudies. +.enumCrossStudySumstats <- function(data, scope, args = list()) { + ldSketch <- getLdSketch(data) + studyCol <- as.character(data$study) + contextCol <- as.character(data$context) + traitCol <- as.character(data$trait) + allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) + allTrs <- unique(unlist(scope$traits, use.names = FALSE)) + groups <- list() + for (cx in allCtxs) { + for (tid in allTrs) { + tupleRows <- which(contextCol == cx & traitCol == tid & + studyCol %in% scope$studies) + keep <- vapply(tupleRows, function(r) { + s <- studyCol[r] + (cx %in% scope$contexts[[s]]) && (tid %in% scope$traits[[s]]) + }, logical(1)) + tupleRows <- tupleRows[keep] + if (length(tupleRows) < 2L) next + stNames <- studyCol[tupleRows] + jz <- .buildJointSumstatZMatrix( + data, tupleRows, stNames, errorLabel = "jointCrossStudy") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + groups[[length(groups) + 1L]] <- new("SumStatsJointGroup", + conditions = data.frame(study = stNames, context = cx, trait = tid, + stringsAsFactors = FALSE), + Z = jz$Z, R = ldMat, N = jz$nVec) + } + } + groups +} + +# composed / individual: ONE group joining every scoped (context, trait) tuple +# for the study. Both context and trait vary across conditions, so both collapse +# to "joint" (the conditions model handles multi-varying-axis uniformly; if the +# tuples happen to share a context it degrades to cross-trait, and vice versa). +.enumComposedIndividual <- function(data, scope, args = list()) { + study <- getStudy(data) + if (!(study %in% scope$studies)) return(list()) + verbose <- if (is.null(args$verbose)) 1 else args$verbose + xy <- .buildComposedIndividualXY( + data, scope, study, args$cisWindow, verbose, + label = "composed", region = args$region) + if (is.null(xy)) return(list()) + # Conditions follow the fitted Y columns ("context:trait"), so dropped tuples + # don't desync conditions from Y. Split on the first ":" (contexts are simple + # labels; trait ids may themselves contain ":"). + labs <- colnames(xy$Y) + conds <- data.frame( + study = study, + context = sub(":.*$", "", labs), + trait = sub("^[^:]*:", "", labs), + stringsAsFactors = FALSE) + list(new("IndividualJointGroup", conditions = conds, X = xy$X, Y = xy$Y)) +} + +# univariate / individual: one 1-condition group per (study, context, trait) in +# scope -- the per-(context, trait) iteration expressed as engine groups, so +# univariate methods (lasso / enet / susie / ...) flow through the SAME per- +# method fitter + ensemble layer as the joint ones (minGroup = 1). +.enumUnivariateIndividual <- function(data, scope, args = list()) { + study <- getStudy(data) + if (!(study %in% scope$studies)) return(list()) + naAction <- if (is.null(args$naAction)) "drop" else args$naAction + groups <- list() + for (cx in scope$contexts[[study]]) { + se <- getPhenotypes(data, contexts = cx) + for (tid in intersect(scope$traits[[study]], rownames(se))) { + Y <- .fmResidPheno(data, contexts = cx, traitId = tid, naAction = naAction) + X <- if (is.null(args$region)) + .fmResidGeno(data, contexts = cx, traitId = tid, + cisWindow = args$cisWindow) + else .fmResidGeno(data, contexts = cx, region = args$region) + common <- intersect(rownames(X), rownames(Y)) + if (length(common) < 2L) next + groups[[length(groups) + 1L]] <- new("IndividualJointGroup", + conditions = data.frame(study = study, context = cx, trait = tid, + stringsAsFactors = FALSE), + X = X[common, , drop = FALSE], Y = Y[common, , drop = FALSE]) + } + } + groups +} + +# composed / sumstats: general N-axis joint. `args$axes` (subset of study / +# context / trait) names the collapsed axes; rows split by the complement +# (fixed) axes form one group each. Reuses .enumerateComposedSumstatGroups. +.enumComposedSumstats <- function(data, scope, args = list()) { + axes <- args$axes + if (is.null(axes)) axes <- c("context", "trait") + ldSketch <- getLdSketch(data) + gi <- .enumerateComposedSumstatGroups(list(axes = axes), data, scope) + if (is.null(gi)) return(list()) + groups <- list() + for (gIdx in gi$groups) { + if (length(gIdx) < 2L) next + colLabels <- vapply(gIdx, function(i) + paste(gi$studyCol[i], gi$contextCol[i], gi$traitCol[i], sep = ":"), + character(1L)) + jz <- .buildJointSumstatZMatrix( + data, gIdx, colLabels, errorLabel = "composed (QtlSumStats)") + ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) + groups[[length(groups) + 1L]] <- new("SumStatsJointGroup", + conditions = data.frame(study = gi$studyCol[gIdx], + context = gi$contextCol[gIdx], + trait = gi$traitCol[gIdx], stringsAsFactors = FALSE), + Z = jz$Z, R = ldMat, N = jz$nVec) + } + groups +} + +# ---- engine ----------------------------------------------------------------- + +# Twas per-group args: resolve the group's fine-mapping fits + CV (keyed on its +# first condition -- the joint fit is shared across conditions) and fix ONE +# shared fold partition (so every method's out-of-fold CV predictions align for +# the ensemble layer). Returns `args` unchanged for fine-mapping pipelines. +.twasGroupArgs <- function(g, pipeline, args) { + if (!is(pipeline, "TwasJointPipeline")) return(args) + cfg <- pipeline@config + cond <- g@conditions + out <- args + fmRes <- args$fineMappingResult + if (!is.null(fmRes)) { + s1 <- as.character(cond$study[[1L]]) + c1 <- as.character(cond$context[[1L]]) + t1 <- as.character(cond$trait[[1L]]) + nR <- if (is.null(args$nRegions)) 1L else args$nRegions + bi <- if (is.null(args$regionIndex)) 1L else args$regionIndex + af <- .twasFineMappingFits(fmRes, study = s1, context = c1, trait = t1) + out$fittedModels <- if (is.null(af)) list() else .twasFitsForRegion(af, bi, nR) + out$fineMappingCv <- .twasCvResultFor(fmRes, s1, c1, t1) + } + cvF <- if (is.null(cfg$cvFolds)) 0L else cfg$cvFolds + if (cvF > 1L && is(g, "IndividualJointGroup")) { + sp <- args$samplePartition + if (is.null(sp)) sp <- cfg$samplePartition + if (is.null(sp) && !is.null(out$fineMappingCv)) + sp <- out$fineMappingCv$samplePartition + if (is.null(sp)) + sp <- .normalizeCvFolds(cvF, NULL, rownames(g@X))$samplePartition + out$samplePartition <- sp + } + out +} + +# Run one dispatch cell: enumerate joint groups, fit each method (S4 dispatch on +# the group x pipeline pair) per group, accumulate per-context rows, build the +# per-pipeline result. The loop is GROUP-outer / token-inner so the twas ensemble +# layer can combine a group's per-method fits in place (FM is unaffected by the +# loop order). Per-method fitting is identical for FM and twas -- one method -> +# per-condition entries; the SR-TWAS ensemble is a layer ON TOP of that. +.runJointCell <- function(cell, pipeline, data, scope, tokens, args = list()) { + groups <- cell@enumerate(data, scope, args) + groups <- Filter(function(g) nrow(g@conditions) >= cell@minGroup, groups) + if (length(groups) == 0L) return(NULL) + doEnsemble <- is(pipeline, "TwasJointPipeline") && + isTRUE(pipeline@config$ensemble) + rows <- .jointRows() + for (g in groups) { + cond <- g@conditions + # Provenance: the ";"-joined members of each varying axis, identical on every + # per-context row of this joint group. + js <- .jointAxisMembers(cond, "study") + jc <- .jointAxisMembers(cond, "context") + jt <- .jointAxisMembers(cond, "trait") + addEntries <- function(entries, method) { + for (i in seq_len(min(length(entries), nrow(cond)))) { + e <- entries[[i]] + if (is.null(e)) next + rows$add(study = as.character(cond$study[[i]]), + context = as.character(cond$context[[i]]), + trait = as.character(cond$trait[[i]]), + method = method, entry = e, + jointStudies = js, jointContexts = jc, jointTraits = jt) + } + } + # Twas: resolve this group's fine-mapping fits + CV (keyed on its first + # condition; the joint fit is shared across conditions) and fix ONE fold + # partition up front, so every method's out-of-fold CV predictions are + # aligned for the ensemble layer. FM leaves args untouched. + fitArgs <- .twasGroupArgs(g, pipeline, args) + # Per-method fit -> per-condition entries -> rows (shared FM + twas). Retain + # each method's entries so the twas ensemble layer can combine them. + perTokenEntries <- list() + for (token in tokens) { + # Resume cache: if every condition of this group is already present in the + # prior partial result (args$cache), reuse those entries instead of + # refitting. All-or-nothing per group. FM passes a QtlFineMappingResult; + # twas passes a TwasWeights -- the lookup matches the pipeline. + entries <- NULL + if (!is.null(args$cache)) { + lookup <- if (is(pipeline, "TwasJointPipeline")) .twasCacheLookup + else .fmCacheLookup + cached <- lapply(seq_len(nrow(cond)), function(i) + lookup(args$cache, as.character(cond$study[[i]]), + as.character(cond$context[[i]]), + as.character(cond$trait[[i]]), token)) + if (!any(vapply(cached, is.null, logical(1)))) entries <- cached + } + if (is.null(entries)) entries <- fitJointGroup(g, pipeline, token, fitArgs) + if (is.null(entries) || length(entries) == 0L) next + perTokenEntries[[token]] <- entries + addEntries(entries, token) + } + # SR-TWAS ensemble layer: combine the group's per-method per-condition fits + # (CV predictions + weights) into ensemble per-context rows -- built ON TOP + # of the shared per-method fitting above, never inside it. + if (doEnsemble && length(perTokenEntries) >= 2L) { + addEntries(.twasEnsembleLayer(g, perTokenEntries, pipeline@config), + "ensemble") + } + } + construct(pipeline, rows) +} + +# SR-TWAS ensemble LAYER (twas only): combine a group's per-method per-condition +# fits into ensemble per-condition entries -- built ON TOP of the shared per- +# method fitting, never inside it. For each condition r, gather the methods' +# retained out-of-fold CV predictions + weights + R^2, drop methods below the +# R^2 cutoff (stacking needs >= 2), and combine via the `ensembleWeights` +# primitive PER CONTEXT (the sliced single-condition inputs -> contextIndex = 1). +# Returns a length-nCond list of ensemble TwasWeightsEntry (NULL where < 2 +# methods qualify). All methods share the group's fold partition (the runner +# fixes it before fitting), so their out-of-fold predictions are comparable. +.twasEnsembleLayer <- function(group, perTokenEntries, cfg) { + tokens <- names(perTokenEntries) + Y <- group@Y + r2Cut <- if (is.null(cfg$ensembleR2Threshold)) 0.01 else cfg$ensembleR2Threshold + solver <- if (is.null(cfg$ensembleSolver)) "quadprog" else cfg$ensembleSolver + alpha <- if (is.null(cfg$ensembleAlpha)) 1 else cfg$ensembleAlpha + stdz <- isTRUE(cfg$standardized) + lapply(seq_len(nrow(group@conditions)), function(r) { + preds <- list(); wts <- list(); rsq <- c() + for (tk in tokens) { + e <- perTokenEntries[[tk]][[r]] + if (is.null(e)) next + cv <- getCvResult(e); w <- getWeights(e) + if (is.null(cv) || is.null(cv$predictions) || is.null(w)) next + pr <- cv$predictions + preds[[paste0(tk, "_predicted")]] <- + matrix(as.numeric(pr), ncol = 1L, dimnames = list(names(pr), NULL)) + wts[[paste0(tk, "_weights")]] <- + matrix(as.numeric(w), ncol = 1L, dimnames = list(getVariantIds(e), NULL)) + mt <- cv$metrics + rsq[tk] <- if (!is.null(mt) && "rsq" %in% names(mt)) mt[["rsq"]] else NA_real_ + } + passing <- names(rsq)[!is.na(rsq) & rsq >= r2Cut] + if (length(passing) < 2L) return(NULL) + ens <- tryCatch(ensembleWeights( + cvResults = list(prediction = preds[paste0(passing, "_predicted")]), + Y = Y[, r], twasWeightList = wts[paste0(passing, "_weights")], + contextIndex = 1, solver = solver, alpha = alpha), + error = function(err) NULL) + if (is.null(ens) || is.null(ens$ensembleTwasWeights)) return(NULL) + ew <- ens$ensembleTwasWeights + vids <- if (!is.null(names(ew))) names(ew) else rownames(ew) + if (is.null(vids)) vids <- getVariantIds(perTokenEntries[[passing[1L]]][[r]]) + TwasWeightsEntry( + variantIds = vids, weights = as.numeric(ew), + cvResult = list(methodCoef = ens$methodCoef, + methodPerformance = ens$methodPerformance), + standardized = stdz, dataType = cfg$dataType) + }) +} + +# ---- wiring table ----------------------------------------------------------- +# Valid cells are rows; invalid cells are absences (a lookup miss is the error). +.jointDispatchTable <- list( + new("JointDispatchCell", pattern = "context", dataForm = "individual", + enumerate = .enumCrossContextIndividual, minGroup = 2L), + new("JointDispatchCell", pattern = "context", dataForm = "sumstats", + enumerate = .enumCrossContextSumstats, minGroup = 2L), + new("JointDispatchCell", pattern = "trait", dataForm = "individual", + enumerate = .enumCrossTraitIndividual, minGroup = 2L), + new("JointDispatchCell", pattern = "trait", dataForm = "sumstats", + enumerate = .enumCrossTraitSumstats, minGroup = 2L), + new("JointDispatchCell", pattern = "study", dataForm = "sumstats", + enumerate = .enumCrossStudySumstats, minGroup = 2L), + new("JointDispatchCell", pattern = "composed", dataForm = "individual", + enumerate = .enumComposedIndividual, minGroup = 2L), + new("JointDispatchCell", pattern = "composed", dataForm = "sumstats", + enumerate = .enumComposedSumstats, minGroup = 2L), + # Univariate: per-(context, trait) 1-condition groups (twas individual only), + # so univariate methods route through the same engine fitter + ensemble layer. + new("JointDispatchCell", pattern = "univariate", dataForm = "individual", + enumerate = .enumUnivariateIndividual, minGroup = 1L) +) + +.lookupJointCell <- function(pattern, dataForm) { + for (cell in .jointDispatchTable) + if (cell@pattern == pattern && cell@dataForm == dataForm) return(cell) + stop(sprintf("No joint dispatch cell for pattern='%s', dataForm='%s'.", + pattern, dataForm)) +} + +# Run a parsed jointSpecification through the engine: for each spec resolve its +# scope, map its axes to a (pattern, dataForm) cell, and run every requested +# joint method (token) through `.runJointCell`, rbinding the per-spec results. +# Shared by the fm + twas QtlDataset / QtlSumStats / MultiStudy dispatchers -- +# the marker (pipeline) selects the result type and the rbind. `args` is the +# per-run engine payload (twasWeights, methodArgs, cisWindow, region, ...). +.runJointSpecs <- function(parsedJointSpec, data, dataForm, pipeline, + jointMethods, contexts, traitIds, args = list()) { + if (length(jointMethods) == 0L || length(parsedJointSpec) == 0L) return(NULL) + ldSketch <- pipeline@config$ldSketch + isFm <- is(pipeline, "FmJointPipeline") + out <- NULL + for (spec in parsedJointSpec) { + scope <- .fmResolveSpecScope(spec, data, contexts = contexts, + traitIds = traitIds) + # Region mode WITHOUT an explicit traitId: restrict scoped traits to the + # genes overlapping the locus (matches fineMappingPipeline's univariate + # region trait selection, where traitId -- when given -- already pins the + # gene set and takes precedence over region). Gene coordinates are context- + # independent, so the first scoped context's SE provides them. + if (dataForm == "individual" && is.null(traitIds) && !is.null(args$region)) { + for (st in names(scope$traits)) { + ctxs <- scope$contexts[[st]] + if (length(ctxs) == 0L) next + se <- getPhenotypes(data, contexts = ctxs[[1L]]) + scope$traits[[st]] <- .fmTraitsInRegion( + se, intersect(scope$traits[[st]], rownames(se)), args$region) + } + } + pattern <- if (length(spec$axes) > 1L) "composed" else spec$axes[[1L]] + cell <- .lookupJointCell(pattern, dataForm) + spArgs <- c(args, list(axes = spec$axes)) + # One call per spec with ALL methods: .runJointCell loops them per group so + # the twas ensemble layer can combine a group's per-method fits. + res <- .runJointCell(cell, pipeline, data, scope, jointMethods, spArgs) + if (is.null(res)) next + out <- if (is.null(out)) res + else if (isFm) .rbindFineMappingResult(out, res, ldSketch = ldSketch) + else .rbindTwasWeights(out, res, ldSketch = ldSketch) + } + out +} + +# Individual-level (QtlDataset) input cannot joint over study: studies have +# disjoint samples (cross-study joints live on the sumstats slot). Preserve the +# historical axis-specific error messages. +.jointRejectStudyOnIndividual <- function(parsedJointSpec) { + for (spec in parsedJointSpec) { + if ("study" %in% spec$axes) { + if (length(spec$axes) > 1L) + stop("composed joint axes including 'study' require sumstats input.") + stop("jointSpecification with axis 'study' requires sumstats input ", + "(QtlDataset is a single individual-level study).") + } + } +} diff --git a/R/jointSpecification.R b/R/jointSpecification.R index 6612a94f..f0a73f71 100644 --- a/R/jointSpecification.R +++ b/R/jointSpecification.R @@ -107,17 +107,20 @@ if (is(data, "MultiStudyQtlDataset")) { indDatasets <- getQtlDatasets(data) ss <- getSumStats(data) - if (!is.null(study) && study %in% names(indDatasets)) - return(.spListTraits(indDatasets[[study]], context = context)) - if (!is.null(ss) && - (is.null(study) || study %in% unique(as.character(ss$study)))) - return(.spListTraits(ss, study = study, context = context)) + # No study filter: aggregate traits across every component (individual + + # sumstats). This must precede the per-study branches -- otherwise a present + # sumStats slot short-circuits and shadows the individual-level studies' + # traits when study = NULL. if (is.null(study)) { out <- character(0) - for (qd in indDatasets) out <- c(out, .spListTraits(qd)) - if (!is.null(ss)) out <- c(out, .spListTraits(ss)) + for (qd in indDatasets) out <- c(out, .spListTraits(qd, context = context)) + if (!is.null(ss)) out <- c(out, .spListTraits(ss, context = context)) return(unique(out)) } + if (study %in% names(indDatasets)) + return(.spListTraits(indDatasets[[study]], context = context)) + if (!is.null(ss) && study %in% unique(as.character(ss$study))) + return(.spListTraits(ss, study = study, context = context)) return(character(0)) } stop(".spListTraits: unsupported class: ", class(data)[[1L]]) @@ -687,6 +690,18 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { } +# Subset `traits` to those whose phenotype coordinates overlap `region` +# (the genes at a locus). region = NULL -> all `traits` unchanged (gene/cisWindow +# mode does not region-filter). Mirrors fineMappingPipeline's univariate region +# trait selection (ids[overlapsAny(rowRanges(se), region)]) so the joint-engine +# region path joins the same gene set. +# @noRd +.fmTraitsInRegion <- function(se, traits, region) { + if (is.null(region) || length(traits) == 0L) return(traits) + rr <- SummarizedExperiment::rowRanges(se) + traits[IRanges::overlapsAny(rr[traits], region)] +} + # Build a multi-trait Y matrix for a single (study, context) from an # individual-level QtlDataset. Returns list(X, Y, traitsHere, se) or NULL # when fewer than 2 traits live in the context or the sample / complete-Y @@ -696,6 +711,8 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { cisWindow, verbose, label, study, region = NULL) { se <- getPhenotypes(data, contexts = cx) + # scopedTraits is already region-restricted upstream (.runJointSpecs) when + # region mode is used without an explicit traitId. traitsHere <- intersect(scopedTraits, rownames(se)) if (length(traitsHere) < 2L) { if (verbose >= 1) @@ -732,6 +749,7 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { tuples <- list() for (cx in scopedContexts) { se <- getPhenotypes(data, contexts = cx) + # scopedTraits is region-restricted upstream (.runJointSpecs) when needed. for (tid in intersect(scopedTraits, rownames(se))) { tuples[[length(tuples) + 1L]] <- list(context = cx, trait = tid) } @@ -814,684 +832,6 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # Fine-mapping dispatchers # ============================================================================= -# Cross-context joint dispatcher for QtlDataset. For each trait in scope -# with >= 2 contexts in scope, fits mvsusieR::mvsusie on the multi-column -# Y matrix and emits ONE result row with context = "joint" and -# jointContexts = "ctx1;ctx2;...". -# @noRd -.fmDispatchCrossContextQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - region = NULL, - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext: study '%s' has %d context(s) in scope; skipping cross-context fits.", - study, length(scopedContexts))) - return(NULL) - } - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (tid in scopedTraits) { - xy <- .buildIndividualCrossContextXY( - data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext", region = region) - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossContext: fitting mvsusie for (study='%s', trait='%s') across contexts (%s) ...", - study, tid, paste(xy$perTraitContexts, collapse = ", "))) - # Reweighted prior from a cross-context mr.mash joint twas run, keyed on - # (study, trait=tid, context="joint"); conditions are the contexts. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, study, tid, context = "joint"), - colnames(xy$Y), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - X = xy$X, Y = xy$Y, - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(xy$perTraitContexts, collapse = ";")) - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = NULL) -} - - -# Cross-trait joint dispatcher for QtlDataset. Per (study, context), fits -# mvsusieR::mvsusie or fsusieR::susiF (when in `methods`) jointly across -# the scoped traits within that context. Emits ONE result row per -# (study, context, method) with trait = "joint" and jointTraits populated. -# @noRd -.fmDispatchCrossTraitQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - region = NULL, - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - jointMethods <- intersect(methods, c("mvsusie", "fsusie")) - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (cx in scopedContexts) { - xy <- .buildIndividualCrossTraitXY( - data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait", study = study, region = region) - if (is.null(xy)) next - - for (mm in jointMethods) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait: fitting %s for (study='%s', context='%s') across traits (%s) ...", - mm, study, cx, paste(xy$traitsHere, collapse = ", "))) - if (mm == "mvsusie") { - # Reweighted prior from a cross-trait mr.mash joint twas run, keyed on - # (study, trait="joint", context=cx); conditions are the traits. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, study, "joint", context = cx), - colnames(xy$Y), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - X = xy$X, Y = xy$Y, - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - } else { - rr <- SummarizedExperiment::rowRanges(xy$se) - ord <- match(colnames(xy$Y), rownames(xy$se)) - rr <- rr[ord] - pos <- (GenomicRanges::start(rr) + GenomicRanges::end(rr)) / 2 - fit <- do.call(fitFsusie, - .fmMergeUserArgs(list(X = xy$X, Y = xy$Y, pos = pos), - "fsusie", methodArgs[["fsusie"]])) - fit <- .setFinemappingFitClass(fit, "fsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "fsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "fsusie") - } - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, mm) - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(xy$traitsHere, collapse = ";")) - } - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = NULL) -} - - -# Cross-context joint dispatcher for QtlSumStats input. Groups the -# selected sumstats rows by (study, trait); each group with >= 2 contexts -# in scope produces one mvsusie_rss fit and one result row with context = -# "joint" and jointContexts populated. -# @noRd -.fmDispatchCrossContextQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): study '%s' has %d context(s) in scope; skipping.", - s, length(scopedContexts))) - next - } - for (tid in scopedTraits) { - tupleRows <- which(studyCol == s & traitCol == tid & - contextCol %in% scopedContexts) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", - s, tid, length(tupleRows))) - next - } - ctxNames <- contextCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, ctxNames, - errorLabel = "jointCrossContext (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossContext (QtlSumStats): fitting mvsusie_rss for (study='%s', trait='%s', %d contexts) ...", - s, tid, length(ctxNames))) - # Reweighted prior from a cross-context mr.mash joint twas run, keyed on - # (study, trait=tid, context="joint"); conditions are the contexts. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, s, tid, context = "joint"), - colnames(jz$Z), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusieRss, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(ctxNames, collapse = ";")) - } - } - - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = ldSketch) -} - - -# Cross-trait joint dispatcher for QtlSumStats: groups by (study, context), -# requires >= 2 scoped traits per group. mvsusie_rss only -- no RSS fsusie. -# @noRd -.fmDispatchCrossTraitQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - if ("fsusie" %in% methods) - stop("jointCrossTrait (QtlSumStats): fsusie has no RSS variant; ", - "fsusie cannot participate in sumstats-based joint fits.") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - for (cx in scopedContexts) { - tupleRows <- which(studyCol == s & contextCol == cx & - traitCol %in% scopedTraits) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", - s, cx, length(tupleRows))) - next - } - trNames <- traitCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, trNames, - errorLabel = "jointCrossTrait (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (QtlSumStats): fitting mvsusie_rss for (study='%s', context='%s', %d traits) ...", - s, cx, length(trNames))) - # Reweighted prior from a cross-trait mr.mash joint twas run, keyed on - # (study, trait="joint", context=cx); conditions are the traits. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, s, "joint", context = cx), - colnames(jz$Z), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusieRss, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(trNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = ldSketch) -} - - -# Cross-study joint dispatcher for QtlSumStats: groups by (context, trait), -# requires >= 2 scoped studies per group. Sumstats-only by definition; -# individual-level studies are excluded with a message at the caller. -# mvsusie_rss only. -# @noRd -.fmDispatchCrossStudyQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - if ("fsusie" %in% methods) - stop("jointCrossStudy: fsusie cannot participate (no RSS variant).") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) - allTrs <- unique(unlist(scope$traits, use.names = FALSE)) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointStudies <- character(0) - - for (cx in allCtxs) { - for (tid in allTrs) { - tupleRows <- which(contextCol == cx & traitCol == tid & - studyCol %in% scope$studies) - keep <- logical(length(tupleRows)) - for (k in seq_along(tupleRows)) { - s <- studyCol[tupleRows[k]] - keep[k] <- (cx %in% scope$contexts[[s]]) && - (tid %in% scope$traits[[s]]) - } - tupleRows <- tupleRows[keep] - if (length(tupleRows) < 2L) { - if (length(tupleRows) > 0L && verbose >= 1) - message(sprintf( - "jointCrossStudy: (context='%s', trait='%s') has %d study(ies) in scope; skipping.", - cx, tid, length(tupleRows))) - next - } - stNames <- studyCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, stNames, - errorLabel = "jointCrossStudy") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "jointCrossStudy: fitting mvsusie_rss for (context='%s', trait='%s', %d studies) ...", - cx, tid, length(stNames))) - mvBaseArgs <- list( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - # TODO(mvsusie-prior): cross-study lookup key undecided; canonical prior for now - prior_variance = mvsusieR::create_mixture_prior(R = ncol(jz$Z)), - coverage = coverage) - fit <- do.call(fitMvsusieRss, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - rowStudy <- c(rowStudy, "joint") - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - paste(stNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = rowJointStudies, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlDataset. Only axes = -# c("context", "trait") is meaningful for a single-study individual- -# level input. Iterates per (study) (just one), enumerates the -# (context, trait) tuples in scope where the trait exists in the -# context, and fits one mvsusie joint over those tuples. -# @noRd -.fmDispatchComposedQtlDataset <- function(spec, data, methods, - contexts, traitIds, cisWindow, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - region = NULL, - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - axes <- spec$axes - if ("study" %in% axes) - stop("composed jointSpecification (QtlDataset): axes including 'study' require sumstats input.") - if (!setequal(axes, c("context", "trait"))) - stop(sprintf( - "composed jointSpecification (QtlDataset): unsupported axes (%s) for individual-level input.", - paste(axes, collapse = ", "))) - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, - verbose, - label = "composed joint (QtlDataset)", - region = region) - if (is.null(xy)) return(NULL) - - if (verbose >= 1) - message(sprintf( - "composed joint (QtlDataset): fitting mvsusie for study='%s' over %d (context, trait) columns ...", - study, ncol(xy$Y))) - # Reweighted prior from a composed mr.mash joint twas run, keyed on - # (study, trait="joint", context="joint"); conditions are the (context,trait) - # columns of the composed design. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, study, "joint", context = "joint"), - colnames(xy$Y), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - X = xy$X, Y = xy$Y, - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusie, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = xy$X, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "X") - QtlFineMappingResult( - study = study, - context = "joint", - trait = "joint", - method = "mvsusie", - entry = list(entry), - jointContexts = paste(vapply(xy$tuples, function(t) t$context, - character(1)), collapse = ";"), - jointTraits = paste(vapply(xy$tuples, function(t) t$trait, - character(1)), collapse = ";"), - ldSketch = NULL) -} - - -# Composed multi-axis joint dispatcher for QtlSumStats. Handles any -# `axes` subset of {study, context, trait} of size >= 2 by iterating the -# complement-axis Cartesian product and emitting one joint fit per -# iteration unit. -# @noRd -.fmDispatchComposedQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - coverage, secondaryCoverage, - signalCutoff, minAbsCorr, - verbose, - methodArgs = list(), - twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - if ("fsusie" %in% methods) - stop("composed jointSpecification (QtlSumStats): fsusie has no RSS variant.") - jointMethods <- intersect(methods, "mvsusie") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) - if (is.null(groupInfo)) return(NULL) - axes <- groupInfo$axes - studyCol <- groupInfo$studyCol - contextCol <- groupInfo$contextCol - traitCol <- groupInfo$traitCol - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list() - rowJointStudies <- character(0) - rowJointContexts <- character(0) - rowJointTraits <- character(0) - - for (gIdx in groupInfo$groups) { - if (length(gIdx) < 2L) { - if (verbose >= 1) - message(sprintf( - "composed joint (QtlSumStats): group has %d row(s); skipping.", - length(gIdx))) - next - } - colLabels <- vapply(gIdx, function(i) - paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), - character(1L)) - jz <- .buildJointSumstatZMatrix( - data, gIdx, colLabels, - errorLabel = "composed joint (QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - if (verbose >= 1) - message(sprintf( - "composed joint (QtlSumStats): fitting mvsusie_rss for axes=(%s), %d columns ...", - paste(axes, collapse = ", "), length(gIdx))) - # Reweighted prior from a composed mr.mash joint twas run, keyed on - # (study, trait="joint", context="joint"); conditions are the joint columns. - mvPrior <- .buildMvsusieReweightedPrior( - .fmLookupMrmashFit(twasWeights, studyCol[gIdx[[1L]]], "joint", - context = "joint"), - colnames(jz$Z), dataDrivenPriorWeightsCutoff) - mvBaseArgs <- list( - Z = jz$Z, R = ldMat, N = as.numeric(stats::median(jz$nVec)), - prior_variance = mvPrior$priorVariance, - coverage = coverage) - if (!is.null(mvPrior$residualVariance)) - mvBaseArgs$residual_variance <- mvPrior$residualVariance - fit <- do.call(fitMvsusieRss, - .fmMergeUserArgs(mvBaseArgs, "mvsusie", - methodArgs[["mvsusie"]])) - fit <- .setFinemappingFitClass(fit, "mvsusie") - entry <- .fmPostprocessOne( - fit = fit, method = "mvsusie", - dataX = ldMat, dataY = NULL, - coverage = coverage, - secondaryCoverage = secondaryCoverage, - signalCutoff = signalCutoff, - minAbsCorr = minAbsCorr, - csInput = "Xcorr") - - repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] - repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] - repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] - rowStudy <- c(rowStudy, repStudy) - rowContext <- c(rowContext, repContext) - rowTrait <- c(rowTrait, repTrait) - rowMethod <- c(rowMethod, "mvsusie") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") - else NA_character_) - rowJointContexts <- c(rowJointContexts, - if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") - else NA_character_) - rowJointTraits <- c(rowJointTraits, - if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") - else NA_character_) - } - - if (length(rowStudy) == 0L) return(NULL) - jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies - jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts - jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits - QtlFineMappingResult( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = jsArg, - jointContexts = jcArg, - jointTraits = jtArg, - ldSketch = ldSketch) -} - - # Top-level joint dispatcher for fineMappingPipeline(QtlDataset). # @noRd # Merge per-region QtlFineMappingResult collections (same keys across regions) @@ -1523,6 +863,23 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { ldSketch = NULL) } +# Synthesize a jointSpecification for the AUTO-DETECTION path (no explicit +# jointSpecification supplied): route mvsusie / fsusie over the data's natural +# multi-axis shape through the SAME engine as an explicit jointSpecification, +# matching the historical mvJobs / runMultivariate detection. >= 2 traits -> +# cross-trait (covers multi-trait single-context AND both-multi, since the +# cross-trait enumerator iterates contexts -> per-context multi-trait fits); +# else >= 2 contexts (single trait) -> cross-context. Single context & single +# trait -> no joint (the caller's multivariate guard already rejects mvsusie / +# fsusie there). Returns a list of parsed specs (full scope) or list(). +# @noRd +.fmSynthesizeJointSpec <- function(nCtx, nTraits) { + if (nTraits >= 2L) list(list(axes = "trait", scope = NULL)) + else if (nCtx >= 2L) list(list(axes = "context", scope = NULL)) + else list() +} + + .fmDispatchJointSpecsQtlDataset <- function(parsedJointSpec, data, methods, contexts, traitIds, cisWindow, @@ -1532,7 +889,10 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { methodArgs = list(), xRegions = list(NULL), twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { + dataDrivenPriorWeightsCutoff = 1e-10, + cvFolds = 0, samplePartition = NULL, + pipCutoffToSkip = 0, + fineMappingResult = NULL) { # Run the joint dispatch once per region block, then merge per # (study, context, trait, method) across regions. A single block (cis or # jointRegions=TRUE concatenated) returns its result directly. @@ -1542,7 +902,10 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, methodArgs = methodArgs, region = rg, twasWeights = twasWeights, - dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff) + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + cvFolds = cvFolds, samplePartition = samplePartition, + pipCutoffToSkip = pipCutoffToSkip, + fineMappingResult = fineMappingResult) }) perRegion <- Filter(Negate(is.null), perRegion) if (length(perRegion) == 0L) return(NULL) @@ -1559,44 +922,27 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { methodArgs = list(), region = NULL, twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - # Bundle the data-driven mvSuSiE prior pass-through args once; every leaf - # dispatcher accepts the same pair. - priorArgs <- list(twasWeights = twasWeights, - dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff) - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- do.call(.fmDispatchComposedQtlDataset, c(list( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs, region = region), priorArgs)) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = NULL) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = do.call(.fmDispatchCrossContextQtlDataset, c(list( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs, region = region), priorArgs)), - trait = do.call(.fmDispatchCrossTraitQtlDataset, c(list( - spec, data, methods, contexts, traitIds, cisWindow, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs, region = region), priorArgs)), - study = stop( - "fineMappingPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input. ", - "QtlDataset represents a single individual-level study; cross-study joints operate on the sumstats slot of MultiStudyQtlDataset or on QtlSumStats directly."), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = NULL) - } - out + dataDrivenPriorWeightsCutoff = 1e-10, + cvFolds = 0, samplePartition = NULL, + pipCutoffToSkip = 0, + fineMappingResult = NULL) { + # Engine routing (jointEngine.R); one region block (the caller loops regions). + .jointRejectStudyOnIndividual(parsedJointSpec) + pipeline <- new("FmJointPipeline", config = list( + coverage = coverage, secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + cvFolds = cvFolds, samplePartition = samplePartition, + verbose = verbose, ldSketch = NULL)) + .runJointSpecs(parsedJointSpec, data, dataForm = "individual", pipeline = pipeline, + jointMethods = intersect(methods, c("mvsusie", "fsusie")), + contexts = contexts, traitIds = traitIds, + args = list(twasWeights = twasWeights, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + methodArgs = methodArgs, cisWindow = cisWindow, + region = region, verbose = verbose, + pipCutoffToSkip = pipCutoffToSkip, + cache = fineMappingResult)) } @@ -1609,46 +955,24 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { verbose, methodArgs = list(), twasWeights = NULL, - dataDrivenPriorWeightsCutoff = 1e-10) { - # Bundle the data-driven mvSuSiE prior pass-through args once; every leaf - # dispatcher accepts the same pair. - priorArgs <- list(twasWeights = twasWeights, - dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff) - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- do.call(.fmDispatchComposedQtlSumStats, c(list( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), priorArgs)) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, ldSketch = getLdSketch(data)) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = do.call(.fmDispatchCrossContextQtlSumStats, c(list( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), priorArgs)), - trait = do.call(.fmDispatchCrossTraitQtlSumStats, c(list( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), priorArgs)), - study = do.call(.fmDispatchCrossStudyQtlSumStats, c(list( - spec, data, methods, contexts, traitIds, - coverage, secondaryCoverage, signalCutoff, minAbsCorr, verbose, - methodArgs = methodArgs), priorArgs)), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindFineMappingResult(out, res, - ldSketch = getLdSketch(data)) - } - out + dataDrivenPriorWeightsCutoff = 1e-10, + fineMappingResult = NULL) { + # Engine routing (jointEngine.R): the marker carries the fine-mapping config + # and result type; the dispatch table + .runJointCell replace the per-axis + # switch + the cross-context/trait/study/composed leaf dispatchers. RSS has no + # sample folds (no cvFolds / samplePartition); SER pre-screen is individual-only. + pipeline <- new("FmJointPipeline", config = list( + coverage = coverage, secondaryCoverage = secondaryCoverage, + signalCutoff = signalCutoff, minAbsCorr = minAbsCorr, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + cvFolds = 0L, verbose = verbose, ldSketch = getLdSketch(data))) + .runJointSpecs(parsedJointSpec, data, dataForm = "sumstats", pipeline = pipeline, + jointMethods = intersect(methods, c("mvsusie", "fsusie")), + contexts = contexts, traitIds = traitIds, + args = list(twasWeights = twasWeights, + dataDrivenPriorWeightsCutoff = dataDrivenPriorWeightsCutoff, + methodArgs = methodArgs, verbose = verbose, + cache = fineMappingResult)) } @@ -1722,534 +1046,11 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { # TWAS-weights dispatchers # ============================================================================= -# Cross-context joint dispatcher for QtlDataset (twas). Mr.mash across -# scoped contexts per (study, trait). -# @noRd -.twasDispatchCrossContextQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, dataType, - verbose, region = NULL, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlDataset): study '%s' has %d context(s) in scope; skipping.", - study, length(scopedContexts))) - return(NULL) - } - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (tid in scopedTraits) { - xy <- .buildIndividualCrossContextXY( - data, tid, scopedContexts, cisWindow, verbose, - label = "jointCrossContext (twas QtlDataset)", region = region) - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlDataset): fitting mr.mash for (study='%s', trait='%s') across contexts (%s) ...", - study, tid, paste(xy$perTraitContexts, collapse = ", "))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y, - retainFit = retainFit, fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = FALSE, - dataType = dataType) - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(xy$perTraitContexts, collapse = ";")) - } - - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = NULL) -} - - -# Cross-trait joint dispatcher for QtlDataset (twas). Mr.mash per -# (study, context) across scoped traits. -# @noRd -.twasDispatchCrossTraitQtlDataset <- function(spec, data, methods, - contexts, traitIds, - cisWindow, dataType, - verbose, region = NULL, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - scopedContexts <- scope$contexts[[study]] - scopedTraits <- scope$traits[[study]] - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (cx in scopedContexts) { - xy <- .buildIndividualCrossTraitXY( - data, cx, scopedTraits, cisWindow, verbose, - label = "jointCrossTrait (twas)", study = study, region = region) - if (is.null(xy)) next - - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas): fitting mr.mash for (study='%s', context='%s') across traits (%s) ...", - study, cx, paste(xy$traitsHere, collapse = ", "))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y, - retainFit = retainFit, fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = FALSE, - dataType = dataType) - rowStudy <- c(rowStudy, study) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(xy$traitsHere, collapse = ";")) - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = NULL) -} - - -# Cross-context joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (study, trait). -# @noRd -.twasDispatchCrossContextQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointContexts <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - if (length(scopedContexts) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): study '%s' has %d context(s) in scope; skipping.", - s, length(scopedContexts))) - next - } - for (tid in scopedTraits) { - tupleRows <- which(studyCol == s & traitCol == tid & - contextCol %in% scopedContexts) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): (study='%s', trait='%s') has %d scoped context(s); skipping.", - s, tid, length(tupleRows))) - next - } - ctxNames <- contextCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, ctxNames, - errorLabel = "jointCrossContext (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossContext (twas QtlSumStats): fitting mr.mash.rss for (study='%s', trait='%s', %d contexts) ...", - s, tid, length(ctxNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat, - retainFit = retainFit, - fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, "joint") - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointContexts <- c(rowJointContexts, - paste(ctxNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointContexts = rowJointContexts, - ldSketch = ldSketch) -} - - -# Cross-trait joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (study, context). -# @noRd -.twasDispatchCrossTraitQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointTraits <- character(0) - - for (s in scope$studies) { - scopedContexts <- scope$contexts[[s]] - scopedTraits <- scope$traits[[s]] - for (cx in scopedContexts) { - tupleRows <- which(studyCol == s & contextCol == cx & - traitCol %in% scopedTraits) - if (length(tupleRows) < 2L) { - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas QtlSumStats): (study='%s', context='%s') has %d scoped trait(s); skipping.", - s, cx, length(tupleRows))) - next - } - trNames <- traitCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, trNames, - errorLabel = "jointCrossTrait (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossTrait (twas QtlSumStats): fitting mr.mash.rss for (study='%s', context='%s', %d traits) ...", - s, cx, length(trNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat, - retainFit = retainFit, - fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, s) - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, "joint") - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointTraits <- c(rowJointTraits, - paste(trNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointTraits = rowJointTraits, - ldSketch = ldSketch) -} - - -# Cross-study joint dispatcher for QtlSumStats (twas). Mr.mash.rss per -# (context, trait). -# @noRd -.twasDispatchCrossStudyQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - studyCol <- as.character(data$study) - contextCol <- as.character(data$context) - traitCol <- as.character(data$trait) - - allCtxs <- unique(unlist(scope$contexts, use.names = FALSE)) - allTrs <- unique(unlist(scope$traits, use.names = FALSE)) - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list(); rowJointStudies <- character(0) - - for (cx in allCtxs) { - for (tid in allTrs) { - tupleRows <- which(contextCol == cx & traitCol == tid & - studyCol %in% scope$studies) - keep <- logical(length(tupleRows)) - for (k in seq_along(tupleRows)) { - s <- studyCol[tupleRows[k]] - keep[k] <- (cx %in% scope$contexts[[s]]) && - (tid %in% scope$traits[[s]]) - } - tupleRows <- tupleRows[keep] - if (length(tupleRows) < 2L) { - if (length(tupleRows) > 0L && verbose >= 1) - message(sprintf( - "jointCrossStudy (twas): (context='%s', trait='%s') has %d study(ies) in scope; skipping.", - cx, tid, length(tupleRows))) - next - } - stNames <- studyCol[tupleRows] - jz <- .buildJointSumstatZMatrix( - data, tupleRows, stNames, - errorLabel = "jointCrossStudy (twas)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "jointCrossStudy (twas): fitting mr.mash.rss for (context='%s', trait='%s', %d studies) ...", - cx, tid, length(stNames))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat, - retainFit = retainFit, - fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = TRUE, - dataType = dataType) - rowStudy <- c(rowStudy, "joint") - rowContext <- c(rowContext, cx) - rowTrait <- c(rowTrait, tid) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - paste(stNames, collapse = ";")) - } - } - if (length(rowStudy) == 0L) return(NULL) - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = rowJointStudies, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlSumStats (twas). -# @noRd -.twasDispatchComposedQtlSumStats <- function(spec, data, methods, - contexts, traitIds, - dataType, verbose, - retainFit = TRUE, - retainFitDetail = "slim") { - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - ldSketch <- getLdSketch(data) - groupInfo <- .enumerateComposedSumstatGroups(spec, data, scope) - if (is.null(groupInfo)) return(NULL) - axes <- groupInfo$axes - studyCol <- groupInfo$studyCol - contextCol <- groupInfo$contextCol - traitCol <- groupInfo$traitCol - - rowStudy <- character(0); rowContext <- character(0) - rowTrait <- character(0); rowMethod <- character(0) - rowEntries <- list() - rowJointStudies <- character(0) - rowJointContexts <- character(0) - rowJointTraits <- character(0) - - for (gIdx in groupInfo$groups) { - if (length(gIdx) < 2L) { - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlSumStats): group has %d row(s); skipping.", - length(gIdx))) - next - } - colLabels <- vapply(gIdx, function(i) - paste(studyCol[i], contextCol[i], traitCol[i], sep = ":"), - character(1L)) - jz <- .buildJointSumstatZMatrix( - data, gIdx, colLabels, - errorLabel = "composed joint (twas QtlSumStats)") - ldMat <- .fmLdFromSketch(ldSketch, jz$variantIds) - stat <- list(z = jz$Z, N = jz$nVec) - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlSumStats): fitting mr.mash.rss for axes=(%s), %d columns ...", - paste(axes, collapse = ", "), length(gIdx))) - weights <- mrmashRssWeights(stat = stat, LD = ldMat, - retainFit = retainFit, - fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- jz$variantIds - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = TRUE, - dataType = dataType) - - repStudy <- if ("study" %in% axes) "joint" else studyCol[gIdx[[1L]]] - repContext <- if ("context" %in% axes) "joint" else contextCol[gIdx[[1L]]] - repTrait <- if ("trait" %in% axes) "joint" else traitCol[gIdx[[1L]]] - rowStudy <- c(rowStudy, repStudy) - rowContext <- c(rowContext, repContext) - rowTrait <- c(rowTrait, repTrait) - rowMethod <- c(rowMethod, "mrmash") - rowEntries[[length(rowEntries) + 1L]] <- entry - rowJointStudies <- c(rowJointStudies, - if ("study" %in% axes) paste(studyCol[gIdx], collapse = ";") - else NA_character_) - rowJointContexts <- c(rowJointContexts, - if ("context" %in% axes) paste(contextCol[gIdx], collapse = ";") - else NA_character_) - rowJointTraits <- c(rowJointTraits, - if ("trait" %in% axes) paste(traitCol[gIdx], collapse = ";") - else NA_character_) - } - if (length(rowStudy) == 0L) return(NULL) - jsArg <- if (all(is.na(rowJointStudies))) NULL else rowJointStudies - jcArg <- if (all(is.na(rowJointContexts))) NULL else rowJointContexts - jtArg <- if (all(is.na(rowJointTraits))) NULL else rowJointTraits - TwasWeights( - study = rowStudy, - context = rowContext, - trait = rowTrait, - method = rowMethod, - entry = rowEntries, - jointStudies = jsArg, - jointContexts = jcArg, - jointTraits = jtArg, - ldSketch = ldSketch) -} - - -# Composed multi-axis joint dispatcher for QtlDataset (twas). axes = -# c("context", "trait") only. -# @noRd -.twasDispatchComposedQtlDataset <- function(spec, data, methods, - contexts, traitIds, cisWindow, - dataType, verbose, - region = NULL, - retainFit = TRUE, - retainFitDetail = "slim") { - axes <- spec$axes - if ("study" %in% axes) - stop("composed jointSpecification (twas QtlDataset): axes including 'study' require sumstats input.") - if (!setequal(axes, c("context", "trait"))) - stop(sprintf("composed jointSpecification (twas QtlDataset): unsupported axes (%s) for individual-level input.", - paste(axes, collapse = ", "))) - jointMethods <- intersect(methods, "mrmash") - if (length(jointMethods) == 0L) return(NULL) - - scope <- .fmResolveSpecScope(spec, data, contexts = contexts, - traitIds = traitIds) - study <- getStudy(data) - if (!(study %in% scope$studies)) return(NULL) - xy <- .buildComposedIndividualXY(data, scope, study, cisWindow, - verbose, - label = "composed joint (twas QtlDataset)", - region = region) - if (is.null(xy)) return(NULL) - - if (verbose >= 1) - message(sprintf( - "composed joint (twas QtlDataset): fitting mr.mash for study='%s' over %d (context, trait) columns ...", - study, ncol(xy$Y))) - weights <- mrmashWeights(X = xy$X, Y = xy$Y, - retainFit = retainFit, fitDetail = retainFitDetail) - if (is.null(rownames(weights))) rownames(weights) <- colnames(xy$X) - entry <- TwasWeightsEntry( - variantIds = rownames(weights), - weights = weights, - fits = attr(weights, "fit"), - standardized = FALSE, - dataType = dataType) - TwasWeights( - study = study, - context = "joint", - trait = "joint", - method = "mrmash", - entry = list(entry), - jointContexts = paste(vapply(xy$tuples, function(t) t$context, - character(1)), collapse = ";"), - jointTraits = paste(vapply(xy$tuples, function(t) t$trait, - character(1)), collapse = ";"), - ldSketch = NULL) -} - - # Top-level joint dispatcher for twasWeightsPipeline(QtlDataset). # @noRd # Merge per-region TwasWeights collections (same keys across regions) into one # by concatenating each (study, context, trait, method) row's entry via -# .twasMergeRegionEntries (stacked weights + flat per-region cvPerformance). +# .twasMergeRegionEntries (stacked weights + flat per-region cvResult). # @noRd .twasMergeResultsByKey <- function(results, regionLabels) { base <- results[[1L]] @@ -2302,38 +1103,16 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { verbose, region = NULL, retainFit = TRUE, retainFitDetail = "slim") { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .twasDispatchComposedQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, - region = region, - retainFit = retainFit, retainFitDetail = retainFitDetail) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = NULL) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .twasDispatchCrossContextQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, - region = region, - retainFit = retainFit, retainFitDetail = retainFitDetail), - trait = .twasDispatchCrossTraitQtlDataset( - spec, data, methods, contexts, traitIds, cisWindow, dataType, verbose, - region = region, - retainFit = retainFit, retainFitDetail = retainFitDetail), - study = stop( - "twasWeightsPipeline(QtlDataset): jointSpecification with axes = 'study' requires sumstats input."), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = NULL) - } - out + # Engine routing (jointEngine.R); one region block (the caller loops regions). + .jointRejectStudyOnIndividual(parsedJointSpec) + pipeline <- new("TwasJointPipeline", config = list( + retainFitDetail = retainFitDetail, dataType = dataType, + cvFolds = 0L, fitFullData = TRUE, standardized = FALSE, ldSketch = NULL)) + .runJointSpecs(parsedJointSpec, data, dataForm = "individual", pipeline = pipeline, + jointMethods = intersect(methods, "mrmash"), + contexts = contexts, traitIds = traitIds, + args = list(methodArgs = list(), cisWindow = cisWindow, + region = region, verbose = verbose)) } @@ -2344,36 +1123,15 @@ validateMethodsVsJointSpec <- function(methodsParsed, jointSpecParsed) { dataType, verbose, retainFit = TRUE, retainFitDetail = "slim") { - out <- NULL - for (i in seq_along(parsedJointSpec)) { - spec <- parsedJointSpec[[i]] - axes <- spec$axes - if (length(axes) > 1L) { - res <- .twasDispatchComposedQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose, - retainFit = retainFit, retainFitDetail = retainFitDetail) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) - next - } - axis <- axes[[1L]] - res <- switch(axis, - context = .twasDispatchCrossContextQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose, - retainFit = retainFit, retainFitDetail = retainFitDetail), - trait = .twasDispatchCrossTraitQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose, - retainFit = retainFit, retainFitDetail = retainFitDetail), - study = .twasDispatchCrossStudyQtlSumStats( - spec, data, methods, contexts, traitIds, dataType, verbose, - retainFit = retainFit, retainFitDetail = retainFitDetail), - stop(sprintf("Unsupported axis: %s", axis))) - if (!is.null(res)) - out <- if (is.null(out)) res - else .rbindTwasWeights(out, res, ldSketch = getLdSketch(data)) - } - out + # Engine routing (jointEngine.R). + pipeline <- new("TwasJointPipeline", config = list( + retainFitDetail = retainFitDetail, dataType = dataType, + cvFolds = 0L, fitFullData = TRUE, standardized = TRUE, + ldSketch = getLdSketch(data))) + .runJointSpecs(parsedJointSpec, data, dataForm = "sumstats", pipeline = pipeline, + jointMethods = intersect(methods, "mrmash"), + contexts = contexts, traitIds = traitIds, + args = list(methodArgs = list(), verbose = verbose)) } diff --git a/R/ld.R b/R/ld.R index 46975d00..aedb815d 100644 --- a/R/ld.R +++ b/R/ld.R @@ -1243,7 +1243,9 @@ ldPruneByCorrelation <- function(X, corThres = 0.8, .ldPruneSnprelate <- function(X, corThres, verbose) { if (!requireNamespace("SNPRelate", quietly = TRUE) || !requireNamespace("gdsfmt", quietly = TRUE)) { + # nocov start stop("Packages 'SNPRelate' and 'gdsfmt' are required for backend='snprelate'.") + # nocov end } p <- ncol(X) snpNames <- colnames(X) %||% paste0("snp", seq_len(p)) @@ -1555,10 +1557,14 @@ enforceDesignFullRank <- function(X, C, ldClumpByScore <- function(X, score, chr, pos, r2 = 0.2, windowKb = 100 / r2, verbose = FALSE) { if (!requireNamespace("bigsnpr", quietly = TRUE)) { + # nocov start stop("Package 'bigsnpr' is required. Install from CRAN: install.packages('bigsnpr')") + # nocov end } if (!requireNamespace("bigstatsr", quietly = TRUE)) { + # nocov start stop("Package 'bigstatsr' is required. Install from CRAN: install.packages('bigstatsr')") + # nocov end } if (ncol(X) < 1L) stop("ldClumpByScore: X must have at least one column") @@ -1924,10 +1930,12 @@ computeLd <- function(X, method = c("sample", "population", "gcta"), #' @return Correlation matrix. #' @noRd .computeLdSnprelate <- function(X) { + # nocov start if (!requireNamespace("SNPRelate", quietly = TRUE)) stop("Package 'SNPRelate' is required for backend='snprelate'") if (!requireNamespace("gdsfmt", quietly = TRUE)) stop("Package 'gdsfmt' is required for backend='snprelate'") + # nocov end tmpGds <- tempfile(fileext = ".gds") on.exit(unlink(tmpGds), add = TRUE) @@ -1962,8 +1970,10 @@ computeLd <- function(X, method = c("sample", "population", "gcta"), #' @return Correlation matrix (r, not r²). #' @noRd .computeLdSnpstats <- function(X) { + # nocov start if (!requireNamespace("snpStats", quietly = TRUE)) stop("Package 'snpStats' is required for backend='snpstats'") + # nocov end # snpStats expects counts of the B allele as raw codes: 1=AA, 2=AB, 3=BB, 0=NA # pecotmr dosage is ALT count (0/1/2), so map: 0->1, 1->2, 2->3, NA->0 diff --git a/R/mashPipeline.R b/R/mashPipeline.R index 951193b9..0cf339a7 100644 --- a/R/mashPipeline.R +++ b/R/mashPipeline.R @@ -60,12 +60,16 @@ mashPipeline <- function(sumStatsList, alpha, setSeed = 999) { inputScale <- match.arg(inputScale) if (!requireNamespace("mashr", quietly = TRUE)) { + # nocov start stop("To use this function, please install mashr: ", "https://cran.r-project.org/web/packages/mashr/index.html") + # nocov end } if (!requireNamespace("flashier", quietly = TRUE)) { + # nocov start stop("To use this function, please install flashier: ", "https://github.com/willwerscheid/flashier") + # nocov end } # Accept either a base list or a S4Vectors::SimpleList. diff --git a/R/mashWrapper.R b/R/mashWrapper.R index 3137c5f6..8cd76a63 100644 --- a/R/mashWrapper.R +++ b/R/mashWrapper.R @@ -136,140 +136,6 @@ filterMixtureComponents <- function(conditionsToKeep, U, w = NULL, wCutoff = 1e- return(list(U = U, w = w)) } -#' @importFrom purrr map_dfr -#' @importFrom dplyr bind_rows -mergeSusieCs <- function(susieFit, coverage = "CS_95_susie", method = NULL) { - if (is.null(coverage)) coverage <- "CS_95_susie" - coverage <- .translateLegacyCsColumnName(coverage) - # Identify variant IDs that are associated with more than one credible set - identifyOverlapSets <- function(variantsSetsAndPipsList) { - overlapSets <- list() - for (variantId in names(variantsSetsAndPipsList)) { - sets <- variantsSetsAndPipsList[[variantId]][["sets"]] - if (length(sets) > 1) { - overlapSets[[variantId]] <- sets - } - } - return(overlapSets) - } - # Merge overlapping credible sets using connected components. - mergeAndUpdateOverlapSets <- function(variantsSetsAndPipsList, overlapSets) { - allSets <- unique(unlist(overlapSets)) - if (length(allSets) == 0) return(list()) - - parent <- setNames(allSets, allSets) - findRoot <- function(x) { - while (!identical(parent[[x]], x)) x <- parent[[x]] - x - } - unionSets <- function(a, b) { - rootA <- findRoot(a) - rootB <- findRoot(b) - if (!identical(rootA, rootB)) parent[[rootB]] <<- rootA - } - - for (sets in overlapSets) { - if (length(sets) > 1) { - for (s in sets[-1]) unionSets(sets[[1]], s) - } - } - - components <- split(names(parent), vapply(names(parent), findRoot, character(1))) - setNameMap <- list() - for (members in components) { - label <- paste(sort(members), collapse = ",") - for (s in members) { - setNameMap[[s]] <- label - } - } - - # Update each variant's credible set names - updatedCredibleSets <- lapply( - setNames(names(variantsSetsAndPipsList), names(variantsSetsAndPipsList)), - function(variantId) { - currentSets <- variantsSetsAndPipsList[[variantId]][["sets"]] - mapped <- intersect(currentSets, names(setNameMap)) - if (length(mapped) > 0) { - setNameMap[[mapped[1]]] - } else { - paste(sort(unique(currentSets)), collapse = ",") - } - } - ) - return(updatedCredibleSets) - } - # Loop through each condition and their credible sets - extractTopLoci <- function(susieFit, coverage) { - # Build a flat data frame of (variant_id, pip, set_name) across all conditions - condNames <- names(susieFit[[1]]) - rows <- map_dfr(seq_along(condNames), function(i) { - condData <- susieFit[[1]][[i]] - topLoci <- .translateLegacyTopLociCsColumns(condData[["top_loci"]]) - if (is.null(topLoci) || nrow(topLoci) == 0) return(NULL) - pipCol <- resolvePipColumn(topLoci, method) - if (is.null(pipCol)) return(NULL) - - setNum <- unique(topLoci[[coverage]]) - setNum <- setNum[!is.na(setNum) & setNum != 0] - if (length(setNum) == 0) return(NULL) - - map_dfr(setNum, function(sn) { - rows <- topLoci[topLoci[[coverage]] == sn & !is.na(topLoci[[coverage]]), - c("variant_id", pipCol), drop = FALSE] - names(rows)[names(rows) == pipCol] <- "pip" - rows$set_name <- paste0("cs_", i, "_", sn) - rows - }) - }) - - if (is.null(rows) || nrow(rows) == 0) return(list()) - - # Aggregate by variant_id preserving first-seen order - seenOrder <- unique(rows$variant_id) - splitRows <- split(rows, factor(rows$variant_id, levels = seenOrder)) - lapply(splitRows, function(df) { - list(sets = df$set_name, pips = df$pip) - }) - } - - combineTopLoci <- function(extractedResult) { - if (length(extractedResult) == 0) return(NULL) - - # Compute overlap sets once, outside the per-variant loop - overlapSets <- identifyOverlapSets(extractedResult) - hasOverlaps <- length(overlapSets) != 0 - mergedSets <- if (hasOverlaps) { - mergeAndUpdateOverlapSets(extractedResult, overlapSets = overlapSets) - } else { - NULL - } - - topLociDf <- do.call(rbind, lapply(names(extractedResult), function(variantId) { - maxPip <- max(unlist(extractedResult[[variantId]]$pips)) - medianPip <- median(unlist(extractedResult[[variantId]]$pips)) - credibleSetNames <- if (hasOverlaps) { - mergedSets[[variantId]] - } else { - paste(sort(unique(unlist(extractedResult[[variantId]]$sets))), collapse = ",") - } - data.frame( - variant_id = variantId, credibleSetNames = credibleSetNames, - maxPip = maxPip, medianPip = medianPip, stringsAsFactors = FALSE - ) - })) - return(topLociDf) - } - - extractedTopLoci <- extractTopLoci(susieFit, coverage = coverage) - if (length(extractedTopLoci) == 0) return(NULL) - combinedTopLociDf <- combineTopLoci(extractedTopLoci) - if (is.null(combinedTopLociDf) || nrow(combinedTopLociDf) == 0) return(NULL) - # Clean up row names and make sure variant_id is unique - combinedTopLociDf <- combinedTopLociDf[!duplicated(combinedTopLociDf$variant_id), ] - rownames(combinedTopLociDf) <- NULL # Clean up row names - return(combinedTopLociDf) -} - #' @export mashRandNullSample <- function(dat, nRandom, nNull, excludeCondition, seed = NULL) { diff --git a/R/pvalCombine.R b/R/pvalCombine.R index ce02d343..7445b12d 100644 --- a/R/pvalCombine.R +++ b/R/pvalCombine.R @@ -47,7 +47,9 @@ pvalAcat <- function(pvals, naRm = TRUE) { pvalHmp <- function(pvals) { # Make sure harmonicmeanp is installed if (!requireNamespace("harmonicmeanp", quietly = TRUE)) { + # nocov start stop("To use this function, please install harmonicmeanp: https://cran.r-project.org/web/packages/harmonicmeanp/index.html") + # nocov end } # https://search.r-project.org/CRAN/refmans/harmonicmeanp/html/pLandau.html L <- length(pvals) @@ -61,7 +63,9 @@ pvalHmp <- function(pvals) { pvalPoolr <- function(pvals, method, R) { if (!requireNamespace("poolr", quietly = TRUE)) { + # nocov start stop("To use this method, please install poolr: install.packages('poolr')") + # nocov end } fn <- switch(method, fisher = poolr::fisher, @@ -74,7 +78,9 @@ pvalPoolr <- function(pvals, method, R) { pvalGbj <- function(zScores, R, method) { if (!requireNamespace("GBJ", quietly = TRUE)) { + # nocov start stop("To use this method, please install GBJ: install.packages('GBJ')") + # nocov end } result <- switch(method, gbj = GBJ::GBJ(test_stats = zScores, cor_mat = R), @@ -98,7 +104,9 @@ pvalGbj <- function(zScores, R, method) { pvalAspu <- function(zScores = NULL, pvals = NULL, R, method) { if (!requireNamespace("aSPU", quietly = TRUE)) { + # nocov start stop("To use this method, please install aSPU: install.packages('aSPU')") + # nocov end } switch(method, aspu = { diff --git a/R/qtlSumStats.R b/R/qtlSumStats.R index 290df53a..2e844bca 100644 --- a/R/qtlSumStats.R +++ b/R/qtlSumStats.R @@ -149,9 +149,12 @@ QtlSumStats <- function(study, context, trait, entry, genome, ldSketch, study, context, trait)) } if (length(idx) > 1L) { + # Unreachable: the class validity enforces (study, context, trait) uniqueness. + # nocov start stop(sprintf( - "Multiple entries match (study='%s', context='%s', trait='%s'); ", - "tuple uniqueness violation.")) + "Multiple entries match (study='%s', context='%s', trait='%s'); tuple uniqueness violation.", + study, context, trait)) + # nocov end } idx } diff --git a/R/regularizedRegressionWrappers.R b/R/regularizedRegressionWrappers.R index b4fa0136..bcc51b1c 100644 --- a/R/regularizedRegressionWrappers.R +++ b/R/regularizedRegressionWrappers.R @@ -255,187 +255,6 @@ sdprWeights <- function(stat, LD, ...) { return(model$betaEst) } -# Shared helper for susie/susieAsh/susieInf weight extraction. -# @param fit A susie fit object (or NULL to fit from X, y). -# @param X Genotype matrix (optional). -# @param y Phenotype vector (optional). -# @param requiredFields Fields that must be present in the fit to extract weights. -# @param fitArgs Extra arguments passed to susieR::susie when fit is NULL. -# @param ... Additional arguments forwarded to susieR::susie. -#' @importFrom susieR coef.susie susie -#' @noRd -.susieExtractWeights <- function(fit, X, y, requiredFields, fitArgs = list(), retainFit = FALSE, ...) { - if (is.null(fit)) { - fit <- do.call(susie, c(list(X = X, y = y), fitArgs, list(...))) - } - if (!is.null(X) && length(fit$pip) != ncol(X)) { - stop(paste0( - "Dimension mismatch on number of variant in susie fit ", length(fit$pip), - " and TWAS weights ", ncol(X), ". " - )) - } - if (all(requiredFields %in% names(fit))) { - fit$intercept <- 0 - weights <- coef.susie(fit)[-1] - } else { - weights <- rep(0, length(fit$pip)) - } - if (retainFit) attr(weights, "fit") <- fit - return(weights) -} - -#' Compute SuSiE TWAS weights -#' -#' Extracts coefficients from an existing SuSiE fit or fits `susieR::susie()` -#' from `X` and `y` before extracting weights. -#' -#' @param X Genotype matrix. Required when `susieFit` is NULL. -#' @param y Phenotype vector. Required when `susieFit` is NULL. -#' @param susieFit Optional fitted SuSiE object. -#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. -#' @param ... Additional arguments passed to `susieR::susie()` when fitting. -#' @return Numeric vector of variant weights. -#' @export -susieWeights <- function(X = NULL, y = NULL, susieFit = NULL, retainFit = FALSE, ...) { - .susieExtractWeights(susieFit, X, y, - requiredFields = c("alpha", "mu", "X_column_scale_factors"), - retainFit = retainFit, ...) -} - -#' Compute SuSiE-ASH TWAS weights -#' -#' Extracts coefficients from an existing SuSiE-ASH fit or fits `susieR::susie()` -#' with `unmappable_effects = "ash"`. -#' -#' @param X Genotype matrix. Required when `susieAshFit` is NULL. -#' @param y Phenotype vector. Required when `susieAshFit` is NULL. -#' @param susieAshFit Optional fitted SuSiE-ASH object. -#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. -#' @param ... Additional arguments passed to `susieR::susie()` when fitting. -#' @return Numeric vector of variant weights. -#' @export -susieAshWeights <- function(X = NULL, y = NULL, susieAshFit = NULL, retainFit = FALSE, ...) { - .susieExtractWeights(susieAshFit, X, y, - requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), - fitArgs = list(unmappable_effects = "ash", convergence_method = "pip"), - retainFit = retainFit, ...) -} - -#' Compute SuSiE-inf TWAS weights -#' -#' Extracts coefficients from an existing SuSiE-inf fit or fits `susieR::susie()` -#' with `unmappable_effects = "inf"`. -#' -#' @section Non-zero weights with zero PIPs: -#' SuSiE-inf decomposes effects into a mappable component (driven by `alpha * -#' mu`, reported as per-variant PIPs) and an infinitesimal component (driven by -#' `theta`). When the fit converges with no mappable effects -- all `V` and `mu` -#' zero, so every `pip == 0` -- the returned weights are still non-zero because -#' `susieR::coef.susie` adds `theta / X_column_scale_factors` to the mappable -#' coefficient. This is intentional: it captures diffuse polygenic signal that -#' the mappable component could not localize to any credible set. Consumers -#' that interpret per-variant PIPs as a gate on whether to use the weights -#' should be aware that low or zero PIPs do not imply zero TWAS weights here. -#' -#' @param X Genotype matrix. Required when `susieInfFit` is NULL. -#' @param y Phenotype vector. Required when `susieInfFit` is NULL. -#' @param susieInfFit Optional fitted SuSiE-inf object. -#' @param retainFit If TRUE, stores the fitted object as an attribute on the returned weights. -#' @param ... Additional arguments passed to `susieR::susie()` when fitting. -#' @return Numeric vector of variant weights. -#' @export -susieInfWeights <- function(X = NULL, y = NULL, susieInfFit = NULL, retainFit = FALSE, ...) { - .susieExtractWeights(susieInfFit, X, y, - requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), - fitArgs = list(unmappable_effects = "inf", convergence_method = "pip"), - retainFit = retainFit, ...) -} - -# ============================================================================= -# SuSiE-RSS weight functions -# ============================================================================= - -# Internal helper: extract weights from a susieRss fit. -# Mirrors .susie_extract_weights but uses the RSS interface. -#' @importFrom susieR coef.susie susie_rss -#' @noRd -.susieRssExtractWeights <- function(fit, z, R, n, - requiredFields, fitArgs = list(), - retainFit = FALSE) { - if (is.null(fit)) { - fit <- do.call(susie_rss, c(list(z = z, R = R, n = n), fitArgs)) - } - if (length(fit$pip) != nrow(R)) { - stop(paste0( - "Dimension mismatch: susieRss fit has ", length(fit$pip), - " variants but R has ", nrow(R), " rows.")) - } - if (all(requiredFields %in% names(fit))) { - fit$intercept <- 0 - weights <- coef.susie(fit)[-1] - } else { - weights <- rep(0, length(fit$pip)) - } - if (retainFit) attr(weights, "fit") <- fit - return(weights) -} - -#' Compute SuSiE-RSS TWAS weights -#' -#' Extracts coefficients from an existing SuSiE-RSS fit or fits -#' \code{susieR::susie_rss()} from summary statistics and LD. -#' -#' @param stat List with components \code{z} (z-scores), \code{n} (sample sizes). -#' @param LD LD correlation matrix. -#' @param susieRssFit Optional pre-fitted SuSiE-RSS object. -#' @param retainFit If TRUE, stores the fitted object as an attribute. -#' @param methodArgs Named list of additional arguments passed to -#' \code{susieR::susie_rss()}. Use this instead of \code{...} to avoid -#' partial matching of short argument names (e.g. \code{L}) to the -#' \code{LD} parameter. -#' @return Numeric vector of variant weights. -#' @export -susieRssWeights <- function(stat, LD, susieRssFit = NULL, retainFit = TRUE, - methodArgs = list()) { - .susieRssExtractWeights(fit = susieRssFit, z = stat$z, R = LD, n = median(stat$n), - requiredFields = c("alpha", "mu", "X_column_scale_factors"), - fitArgs = methodArgs, - retainFit = retainFit) -} - -#' Compute SuSiE-inf-RSS TWAS weights -#' -#' Extracts coefficients from an existing SuSiE-inf-RSS fit or fits -#' \code{susieR::susie_rss()} with \code{unmappable_effects = "inf"}. -#' -#' @inheritParams susieRssWeights -#' @param susieInfRssFit Optional pre-fitted SuSiE-inf-RSS object. -#' @return Numeric vector of variant weights. -#' @export -susieInfRssWeights <- function(stat, LD, susieInfRssFit = NULL, retainFit = TRUE, - methodArgs = list()) { - .susieRssExtractWeights(fit = susieInfRssFit, z = stat$z, R = LD, n = median(stat$n), - requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), - fitArgs = c(list(unmappable_effects = "inf", convergence_method = "pip"), methodArgs), - retainFit = retainFit) -} - -#' Compute SuSiE-ASH-RSS TWAS weights -#' -#' Extracts coefficients from an existing SuSiE-ASH-RSS fit or fits -#' \code{susieR::susie_rss()} with \code{unmappable_effects = "ash"}. -#' -#' @inheritParams susieRssWeights -#' @param susieAshRssFit Optional pre-fitted SuSiE-ASH-RSS object. -#' @return Numeric vector of variant weights. -#' @export -susieAshRssWeights <- function(stat, LD, susieAshRssFit = NULL, retainFit = TRUE, - methodArgs = list()) { - .susieRssExtractWeights(fit = susieAshRssFit, z = stat$z, R = LD, n = median(stat$n), - requiredFields = c("alpha", "mu", "theta", "X_column_scale_factors"), - fitArgs = c(list(unmappable_effects = "ash", convergence_method = "pip"), methodArgs), - retainFit = retainFit) -} #' Compute mr.mash TWAS weights #' @@ -462,7 +281,9 @@ mrmashWeights <- function(mrmashFit = NULL, X = NULL, Y = NULL, retainFit = FALSE, fitDetail = c("slim", "full"), ...) { if (!requireNamespace("mr.mashr", quietly = TRUE)) { + # nocov start stop("Package 'mr.mashr' is required. Install with: devtools::install_github('stephenslab/mr.mashr')") + # nocov end } dotArgs <- list(...) if (is.null(mrmashFit)) { @@ -490,192 +311,6 @@ mrmashWeights <- function(mrmashFit = NULL, X = NULL, Y = NULL, out } -#' Compute mvSuSiE TWAS weights -#' -#' Extracts coefficients from an existing mvSuSiE fit or fits `fitMvsusie()` -#' from `X` and `Y`. -#' -#' @param mvsusieFit Optional fitted mvSuSiE object. -#' @param X Genotype matrix. Required when `mvsusieFit` is NULL. -#' @param Y Phenotype matrix. Required when `mvsusieFit` is NULL. -#' @param priorVariance Optional mvSuSiE prior variance list. -#' @param residualVariance Optional residual variance matrix. -#' @param L Maximum number of components. -#' @param LGreedy Initial greedy number of components. -#' @param verbose If TRUE, prints mvSuSiE fitting progress. -#' @param ... Additional arguments passed to `fitMvsusie()` when fitting. -#' @return Matrix of variant weights. -#' @export -mvsusieWeights <- function(mvsusieFit = NULL, X = NULL, Y = NULL, - priorVariance = NULL, residualVariance = NULL, - L = 30, LGreedy = 5, verbose = FALSE, ...) { - if (!requireNamespace("mvsusieR", quietly = TRUE)) { - stop("Package 'mvsusieR' is required. Install with: devtools::install_github('stephenslab/mvsusieR')") - } - if (is.null(mvsusieFit)) { - message("mvsusieFit is not provided; fitting mvSuSiE now ...") - if (is.null(X) || is.null(Y)) { - stop("Both X and Y must be provided if mvsusieFit is NULL.") - } - if (is.null(priorVariance)) priorVariance <- mvsusieR::create_mixture_prior(R = ncol(Y)) - if (!is.null(LGreedy)) LGreedy <- min(LGreedy, L) - - mvsusieFit <- fitMvsusie( - X = X, Y = Y, L = L, L_greedy = LGreedy, prior_variance = priorVariance, - residual_variance = residualVariance, - estimate_residual_variance = TRUE, - verbose = verbose, ... - ) - } - return(mvsusieR::coef.mvsusie(mvsusieFit)[-1, ]) -} - -# Build the wavelet synthesis (inverse-DWT) matrix S (n_wac x nFeat) for the -# basis fSuSiE uses, by reconstructing each unit wavelet coefficient through the -# SAME $D / $C assignment as out_prep.susiF (detail columns -> $D, the coarsest -# scaling column -> last $C entry), then `wavethresh::wr`. A wavelet-coefficient -# row `c` then maps to the feature domain as `c %*% S`. `scaleCols` is the -# column index of the scaling coefficient(s) (per the prior family). fSuSiE's -# default basis (DaubLeAsymm, filter 10) matches `wavethresh::wd`'s default, the -# same one out_prep uses, so the plain `wd(rep(0, nWac))` template is consistent. -# @noRd -.fsusieSynthesisMatrix <- function(nWac, scaleCols) { - template <- wavethresh::wd(rep(0, nWac)) - reconstructUnit <- function(k) { - coeffRow <- numeric(nWac) - coeffRow[k] <- 1 - temp <- template - temp$D <- coeffRow[-scaleCols] - temp$C[length(temp$C)] <- sum(coeffRow[scaleCols]) - as.numeric(wavethresh::wr(temp)) - } - do.call(rbind, lapply(seq_len(nWac), reconstructUnit)) -} - -#' Compute fSuSiE feature-level TWAS weights -#' -#' Collapses a functional SuSiE (\code{fsusieR::susiF}) fit back to a -#' \code{variants x features} weight matrix usable for TWAS prediction of each -#' molecular feature. fSuSiE fits the regression in the wavelet domain, storing -#' per-SNP posterior-mean wavelet effects \code{fitted_wc[[l]]} -#' (\code{nSNP x n_wac}) and inclusion probabilities \code{alpha[[l]]}. Because -#' the inverse wavelet transform \code{wr()} is linear, the posterior-mean -#' prediction pushes through to a per-SNP, per-feature weight matrix: -#' \deqn{W[j, f] = \sum_l alpha[[l]][j] \cdot -#' \mathrm{wr}\!\left(fitted\_wc[[l]][j, ] / csd\_X[j]\right)[f].} -#' This is the exact analog of \code{coef.susie} for scalar SuSiE (all SNPs, -#' alpha-weighted), which spreads weight across the credible set — more robust -#' for out-of-sample TWAS than fSuSiE's in-sample lead-SNP summary -#' (\code{update_cal_indf}). -#' -#' The reconstruction uses the raw posterior wavelet coefficients -#' \code{fitted_wc}, so it is independent of the \code{post_processing} mode -#' (\code{"smash"}/\code{"TI"}/\code{"HMM"}/\code{"none"}) — that smoothing only -#' denoises the alpha-collapsed display curve \code{fitted_func}, never the -#' per-SNP predictive coefficients. The \code{$D}/\code{$C} coefficient layout -#' and wavelet basis mirror \code{out_prep.susiF}, so the feature-domain output -#' matches fSuSiE's own conventions. -#' -#' @param fsusieFit A fitted \code{fsusieR::susiF} object. Must retain -#' \code{fitted_wc}, \code{alpha}, \code{csd_X}, \code{n_wac}, and -#' \code{outing_grid} (i.e. an untrimmed fit). Required. -#' @param X,Y Accepted for call-compatibility with the multivariate -#' weight-method dispatch in \code{\link{learnTwasWeights}}, which invokes -#' every method as \code{fn(X = ., Y = ., ...)}. fSuSiE is a functional method -#' that cannot be refit from a bare \code{(X, Y)} pair (it needs feature -#' positions and the wavelet model), so these are ignored: a fitted -#' \code{fsusieFit} is always required. -#' @param variantIds Optional character vector of variant IDs (length = number -#' of SNPs in the fit) for the matrix row names. Defaults to -#' \code{names(fsusieFit$csd_X)} / \code{names(fsusieFit$pip)}. -#' @param featureNames Optional character vector of feature (outcome) names for -#' the matrix column names. Defaults to the fit's \code{outing_grid}. -#' @param retainFit If TRUE, stores the fit as an attribute on the result. -#' @return A numeric matrix of variant (rows) by feature (columns) weights. -#' @export -fsusieWeights <- function(fsusieFit = NULL, X = NULL, Y = NULL, - variantIds = NULL, featureNames = NULL, - retainFit = FALSE) { - if (is.null(fsusieFit)) { - stop("fsusieWeights: `fsusieFit` is required. fSuSiE is functional and ", - "cannot be refit from a bare (X, Y); fit it via fineMappingPipeline() ", - "and pass the fitted fsusieR::susiF object.") - } - # Fast path: a trimmed fit carries the precomputed variants x features weight - # matrix in `$coef` (fineMappingPipeline computes it eagerly while the full - # fit is in hand, because trimming drops fitted_wc/csd_X/...). Return it. - if (is.matrix(fsusieFit$coef) && - is.null(fsusieFit$fitted_wc)) { - W <- fsusieFit$coef - if (!is.null(variantIds) && length(variantIds) == nrow(W)) - rownames(W) <- variantIds - if (retainFit) attr(W, "fit") <- fsusieFit - return(W) - } - if (!requireNamespace("fsusieR", quietly = TRUE)) { - stop("Package 'fsusieR' is required for fsusieWeights().") - } - if (!requireNamespace("wavethresh", quietly = TRUE)) { - stop("Package 'wavethresh' is required for fsusieWeights().") - } - fit <- fsusieFit - missingSlots <- setdiff(c("fitted_wc", "alpha", "csd_X", "n_wac", - "outing_grid"), names(fit)) - if (length(missingSlots) > 0L) { - stop("fsusieWeights: the fSuSiE fit is missing required slot(s): ", - paste(missingSlots, collapse = ", "), - ". Pass an untrimmed fit (these are dropped when trimmed).") - } - - csdX <- as.numeric(fit$csd_X) - p <- length(csdX) - nWac <- fit$n_wac - - # alpha may be a list (one vector per effect, the fsusieR::susiF default) or - # a matrix/data.frame (L x nSNP) after fsusieWrapper reshaping. Normalize to - # a list of per-effect vectors. - alpha <- fit$alpha - alphaList <- if (is.list(alpha) && !is.data.frame(alpha)) { - lapply(alpha, as.numeric) - } else { - am <- as.matrix(alpha) - lapply(seq_len(nrow(am)), function(l) as.numeric(am[l, ])) - } - L <- length(fit$fitted_wc) - - # Scaling-coefficient column(s): the coarsest level for a per-scale prior, - # else the last column. Mirrors the two branches of out_prep.susiF. - perScale <- "mixture_normal_per_scale" %in% class(fsusieR::get_G_prior(fit)) - indxLst <- fsusieR::gen_wavelet_indx(log2(length(fit$outing_grid))) - scaleCols <- if (perScale) indxLst[[length(indxLst)]] - else ncol(as.matrix(fit$fitted_wc[[1L]])) - - # One inverse transform per wavelet coefficient (built once), then every SNP / - # effect is a matrix multiply: W = sum_l (alpha_l/csd_X-scaled fitted_wc_l) %*% S. - S <- .fsusieSynthesisMatrix(nWac, scaleCols) - nFeat <- ncol(S) - invCsd <- 1 / csdX - - W <- matrix(0, nrow = p, ncol = nFeat) - for (l in seq_len(L)) { - wc <- as.matrix(fit$fitted_wc[[l]]) - rowScale <- alphaList[[l]] * invCsd - W <- W + (rowScale * wc) %*% S - } - - rn <- variantIds - if (is.null(rn)) rn <- names(fit$csd_X) - if (is.null(rn)) rn <- names(fit$pip) - if (!is.null(rn) && length(rn) == p) rownames(W) <- rn - cn <- featureNames - if (is.null(cn) && !is.null(fit$outing_grid) && - length(fit$outing_grid) == nFeat) { - cn <- as.character(fit$outing_grid) - } - if (!is.null(cn) && length(cn) == nFeat) colnames(W) <- cn - if (retainFit) attr(W, "fit") <- fit - W -} #' Compute mr.mash-RSS TWAS weights from summary statistics #' @@ -741,8 +376,10 @@ mrmashRssWeights <- function(stat, LD, mrmashRssFit = NULL, retainFit = FALSE, fitDetail = c("slim", "full"), ...) { if (!requireNamespace("mr.mashr", quietly = TRUE)) { + # nocov start stop("Package 'mr.mashr' is required. ", "Install with: devtools::install_github('stephenslab/mr.mash.alpha')") + # nocov end } if (is.null(mrmashRssFit)) { Z <- if (is.matrix(stat$z)) stat$z else as.matrix(stat$z) @@ -794,63 +431,6 @@ mrmashRssWeights <- function(stat, LD, mrmashRssFit = NULL, weights } -#' Compute mvSuSiE-RSS TWAS weights from summary statistics -#' -#' Multi-context summary-statistics analog of \code{\link{mvsusieWeights}}: -#' extracts coefficients from an existing \code{mvsusieR::mvsusie_rss} fit, -#' or fits one from \code{stat$z} (variants x conditions) and \code{LD}. -#' -#' Follows the \code{*_rss_weights(stat, LD, ...)} contract. Expects -#' \code{stat$z} to be a numeric matrix (variants x conditions) and -#' \code{stat$n} a per-context vector or scalar. -#' -#' @param stat A list with \code{z} (matrix variants x conditions) and -#' \code{n} (numeric vector or scalar). -#' @param LD LD correlation matrix. -#' @param mvsusieRssFit Optional pre-fitted \code{mvsusieRss} object. -#' @param priorVariance Optional mvSuSiE prior variance specification. -#' When NULL, \code{mvsusieR::create_mixture_prior()} is used with -#' \code{R = ncol(stat$z)}. -#' @param residualVariance Optional residual covariance matrix. -#' @param L Maximum number of single effects (default 30). -#' @param LGreedy Initial greedy effect count (default 5). -#' @param retainFit If TRUE, attaches the fitted object as an attribute. -#' @param ... Additional arguments forwarded to \code{mvsusieR::mvsusie_rss}. -#' -#' @return A numeric matrix of per-variant per-context weights -#' (variants x conditions). -#' @export -mvsusieRssWeights <- function(stat, LD, mvsusieRssFit = NULL, - priorVariance = NULL, - residualVariance = NULL, - L = 30, LGreedy = 5, - retainFit = FALSE, ...) { - if (!requireNamespace("mvsusieR", quietly = TRUE)) { - stop("Package 'mvsusieR' is required. ", - "Install with: devtools::install_github('stephenslab/mvsusieR')") - } - if (is.null(mvsusieRssFit)) { - Z <- if (is.matrix(stat$z)) stat$z else as.matrix(stat$z) - if (ncol(Z) < 2) { - stop("mvsusieRssWeights expects stat$z to have >= 2 columns ", - "(one per context). For single-context use susieRssWeights().") - } - # mvsusieR::mvsusie_rss expects N to be a single scalar - nScalar <- as.numeric(stats::median(stat$n)) - if (is.null(priorVariance)) { - priorVariance <- mvsusieR::create_mixture_prior(R = ncol(Z)) - } - if (!is.null(LGreedy)) LGreedy <- min(LGreedy, L) - mvsusieRssFit <- fitMvsusieRss( - Z = Z, R = LD, N = nScalar, - prior_variance = priorVariance, - residual_variance = residualVariance, ... - ) - } - weights <- mvsusieR::coef.mvsusie(mvsusieRssFit)[-1, , drop = FALSE] - if (retainFit) attr(weights, "fit") <- mvsusieRssFit - weights -} # Get a reasonable setting for the standard deviations of the mixture # components in the mixture-of-normals prior based on the data (X, y). @@ -889,7 +469,9 @@ initPriorSd <- function(X, y, n = 30) { glmnetWeights <- function(X, y, alpha) { # Check if glmnet is installed if (!requireNamespace("glmnet", quietly = TRUE)) { + # nocov start stop("To use this function, please install glmnet: https://cran.r-project.org/web/packages/glmnet/index.html") + # nocov end } eff.wgt <- matrix(0, ncol = 1, nrow = ncol(X)) keep <- .dropZeroVariance(X, "glmnetWeights") @@ -958,7 +540,9 @@ mrashWeights <- function(X, y, initPriorSd = TRUE, retainFit = FALSE, ...) { bayesAlphabetWeights <- function(X, y, method, Z = NULL, h2 = NULL, nit = 5000, nburn = 1000, nthin = 5, ...) { # Make sure qgg is installed if (!requireNamespace("qgg", quietly = TRUE)) { + # nocov start stop("To use this function, please install qgg: https://cran.r-project.org/web/packages/qgg/index.html") + # nocov end } # check for identical row lengths of response and genotype if (!(length(y) == nrow(X))) { @@ -1823,7 +1407,9 @@ l0learnRssWeights <- function(stat, LD, #' @keywords internal ncvregWeights <- function(X, y, penalty, nfolds = 5, ...) { if (!requireNamespace("ncvreg", quietly = TRUE)) { + # nocov start stop("To use this function, please install ncvreg: https://cran.r-project.org/package=ncvreg") + # nocov end } eff.wgt <- matrix(0, ncol = 1, nrow = ncol(X)) keep <- .dropZeroVariance(X, "ncvregWeights") @@ -1880,7 +1466,9 @@ mcpWeights <- function(X, y, nfolds = 5, ...) { #' @export l0learnWeights <- function(X, y, penalty = "L0", nFolds = 5, ...) { if (!requireNamespace("L0Learn", quietly = TRUE)) { + # nocov start stop("To use this function, please install L0Learn: https://cran.r-project.org/package=L0Learn") + # nocov end } eff.wgt <- matrix(0, ncol = 1, nrow = ncol(X)) keep <- .dropZeroVariance(X, "l0learnWeights") @@ -1923,7 +1511,9 @@ l0learnWeights <- function(X, y, penalty = "L0", nFolds = 5, ...) { #' @keywords internal bglrWeights <- function(X, y, model, nIter, burnIn, thin, etaArgs = list(), ...) { if (!requireNamespace("BGLR", quietly = TRUE)) { + # nocov start stop("To use this function, please install BGLR: https://cran.r-project.org/package=BGLR") + # nocov end } eff.wgt <- rep(0, ncol(X)) keep <- .dropZeroVariance(X, "bglrWeights") @@ -2019,7 +1609,9 @@ bLassoWeights <- function(X, y, nIter = 10000, burnIn = 2000, thin = 5, ...) { #' @export dprWeights <- function(X, y, fittingMethod = "VB", retainFit = FALSE, ...) { if (!requireNamespace("RcppDPR", quietly = TRUE)) { + # nocov start stop("To use this function, please install RcppDPR: https://cran.r-project.org/package=RcppDPR") + # nocov end } eff.wgt <- rep(0, ncol(X)) keep <- .dropZeroVariance(X, "dprWeights") @@ -2141,11 +1733,15 @@ mrmashWrapper <- function(X, verbose = FALSE, ...) { # Make sure glmnet is installed if (!requireNamespace("glmnet", quietly = TRUE)) { + # nocov start stop("To use this function, please install glmnet: https://cran.r-project.org/web/packages/glmnet/index.html") + # nocov end } # Make sure mr.mashr is installed if (!requireNamespace("mr.mashr", quietly = TRUE)) { + # nocov start stop("To use this function, please install mr.mashr: https://github.com/stephenslab/mr.mashr") + # nocov end } # Check input data if (!exists(".Random.seed")) { @@ -2502,7 +2098,9 @@ buildMrmashPriorMatrices <- function(Bhat, Shat, K = NULL, hetgrid = c(0, 0.25, 0.5, 0.75, 1), singletons = TRUE) { if (!requireNamespace("mr.mashr", quietly = TRUE)) { + # nocov start stop("Package 'mr.mashr' is required.") + # nocov end } if (is.null(dataDrivenPriorMatrices) && !isTRUE(canonicalPriorMatrices)) { stop("Supply dataDrivenPriorMatrices or set canonicalPriorMatrices = TRUE.") diff --git a/R/relatednessQc.R b/R/relatednessQc.R index 1ad29bf5..b53afccd 100644 --- a/R/relatednessQc.R +++ b/R/relatednessQc.R @@ -59,10 +59,12 @@ filterRelatedness <- function( maxIterations = 20L, verbose = FALSE) { + # nocov start if (!requireNamespace("igraph", quietly = TRUE)) stop("Package 'igraph' is required for filterRelatedness") if (!requireNamespace("plinkQC", quietly = TRUE)) stop("Package 'plinkQC' is required for filterRelatedness") + # nocov end analysisType <- match.arg(analysisType) relatedness <- as.data.frame(relatedness) diff --git a/R/sldscPostprocessingPipeline.R b/R/sldscPostprocessingPipeline.R index fbe0a7b3..ceafdc36 100644 --- a/R/sldscPostprocessingPipeline.R +++ b/R/sldscPostprocessingPipeline.R @@ -1,87 +1,66 @@ #' @title sLDSC Postprocessing Pipeline -#' @description Postprocess polyfun's per-trait sLDSC outputs (one -#' single-target run per target annotation, plus an optional joint -#' run) into a single results object with per-trait tau*, EnrichStat -#' with back-solved jackknife SE, and a DerSimonian-Laird random- -#' effects meta-analysis across traits. -#' @param traitSinglePrefixes Named list of file prefixes for the -#' single-target polyfun runs (one entry per trait; each value is a -#' length-N character vector of `/` prefixes, one per -#' target annotation). -#' @param traitJointPrefix Named list of file prefixes for the joint -#' polyfun runs (one entry per trait; each value a `/` -#' prefix into the joint LD-score dir). Pass an empty list to skip -#' the joint branch. -#' @param targetAnnoDir Directory containing the target `.annot.gz` -#' files used for sd_C and binary detection (typically the joint dir). -#' @param frqfileDir Optional directory of `.frq` files for the MAF -#' cutoff. Pass \code{NULL} to skip MAF filtering. -#' @param plinkName File-name prefix of the PLINK reference panel -#' (default \code{"ADSP_chr"}; combined per-chromosome as -#' \code{paste0(plinkName, chrom)}). -#' @param mafCutoff Numeric MAF cutoff applied via the `.frq` files. -#' Default \code{0.05}. Set to \code{0} to opt out. -#' @param targetCategories Optional character vector of target -#' annotation names to retain. Auto-detected from the joint run when +#' @description Postprocess polyfun's per-trait sLDSC outputs (already loaded +#' into an \code{\link{SldscData}} object) into a single results object with +#' per-trait tau*, EnrichStat with back-solved jackknife SE, and a +#' DerSimonian-Laird random-effects meta-analysis across traits. All file I/O +#' is done up front by the reader functions (\code{\link{readSldscAnnot}}, +#' \code{\link{readSldscFrq}}, \code{\link{readSldscTrait}}); this pipeline is +#' pure computation over the in-memory \code{SldscData}. +#' @param sldscData An \code{\link{SldscData}} object bundling the annotation +#' table, the reference-panel allele frequencies, and the per-trait +#' single/joint polyfun runs. +#' @param mafCutoff Numeric MAF cutoff applied via the object's frq table. +#' Default \code{0.05}. Set to \code{0} to opt out (requires frq data when +#' \code{> 0}). +#' @param targetCategories Optional character vector of target annotation names +#' to retain. Auto-detected from the joint run (or first single run) when #' \code{NULL}. #' @param targetLabels Optional display names, same length / order as -#' \code{targetCategories}, applied to every output column / tau* -#' block colname. -#' @return A list with \code{per_trait} (per-trait standardised tables), -#' meta tables (\code{tau_star_meta}, \code{E_meta}, -#' \code{enrich_stat_meta}), and a \code{params} record of the call -#' options. +#' \code{targetCategories}, applied to every output column / tau* block +#' colname. +#' @return A list with \code{per_trait} (per-trait standardised tables), meta +#' tables (\code{tauStar}, \code{enrichment}, \code{enrichstat}), and a +#' \code{params} record of the call options. +#' @seealso \code{\link{SldscData}}, \code{\link{readSldscAnnot}}, +#' \code{\link{readSldscFrq}}, \code{\link{readSldscTrait}} +#' @importFrom stats median +#' @importFrom methods is +#' @include SldscData.R #' @export -sldscPostprocessingPipeline <- function(traitSinglePrefixes, - traitJointPrefix, - targetAnnoDir, - frqfileDir = NULL, - plinkName = "ADSP_chr", +sldscPostprocessingPipeline <- function(sldscData, mafCutoff = 0.05, targetCategories = NULL, targetLabels = NULL) { - traitNames <- names(traitSinglePrefixes) - if (is.null(traitNames)) - stop("sldscPostprocessingPipeline: traitSinglePrefixes must be a named list.") + if (!is(sldscData, "SldscData")) + stop("sldscPostprocessingPipeline: `sldscData` must be an SldscData object.") + traitNames <- getTraitNames(sldscData) + if (length(traitNames) == 0L) + stop("sldscPostprocessingPipeline: SldscData has no traits.") message("[sldsc] Computing M_ref...") - MRef <- computeSldscMRef(targetAnnoDir = targetAnnoDir, - frqfileDir = frqfileDir, - plinkName = plinkName, - mafCutoff = mafCutoff) + MRef <- computeSldscMRef(sldscData, mafCutoff = mafCutoff) message(sprintf("[sldsc] M_ref = %d (MAF cutoff %g)", MRef, mafCutoff)) message("[sldsc] Computing per-annotation sd...") - sdAnnotFull <- computeSldscAnnotSd(targetAnnoDir = targetAnnoDir, - frqfileDir = frqfileDir, - plinkName = plinkName, - mafCutoff = mafCutoff) + sdAnnotFull <- computeSldscAnnotSd(sldscData, mafCutoff = mafCutoff) message(sprintf("[sldsc] sd computed for %d annotation columns", length(sdAnnotFull))) message("[sldsc] Detecting binary vs continuous annotations...") - isBinaryFull <- isBinarySldscAnnot(targetAnnoDir = targetAnnoDir) + isBinaryFull <- isBinarySldscAnnot(sldscData) # Polyfun renames target columns to `_0` (file_idx=0 in --ref-ld-chr); - # mirror that suffix so intersect() with pivotRun$categories matches. + # mirror that suffix so intersect() with a run's categories matches. names(sdAnnotFull) <- paste0(names(sdAnnotFull), "_0") names(isBinaryFull) <- paste0(names(isBinaryFull), "_0") - # Auto-detect target categories from a representative run. + # A representative run for auto-detection: the first trait's joint run when + # present, otherwise its first single run. + pivotRun <- getTraitRun(sldscData, traitNames[1], "joint") + if (is.null(pivotRun)) + pivotRun <- getTraitRun(sldscData, traitNames[1], "single", 1L) + if (is.null(targetCategories)) { - pivotRun <- NULL - if (!is.null(traitJointPrefix) && length(traitJointPrefix) > 0) { - jp <- traitJointPrefix[[1]] - if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) { - pivotRun <- tryCatch(readSldscTrait(jp), error = function(e) NULL) - } - } - if (is.null(pivotRun) && - length(traitSinglePrefixes) > 0L && - length(traitSinglePrefixes[[1]]) > 0L) { - pivotRun <- tryCatch(readSldscTrait(traitSinglePrefixes[[1]][1]), - error = function(e) NULL) - } if (is.null(pivotRun)) stop("sldscPostprocessingPipeline: cannot auto-detect targetCategories.") targetCategories <- intersect(pivotRun$categories, names(sdAnnotFull)) @@ -112,25 +91,21 @@ sldscPostprocessingPipeline <- function(traitSinglePrefixes, nBaseline, baselinePreview, if (nBaseline > 3L) ", ..." else "")) } - message(sprintf("[sldsc] Auto-detected %d target categories", length(targetCategories))) + message(sprintf("[sldsc] Auto-detected %d target categories", + length(targetCategories))) } baselineCategories <- character(0) - if (!is.null(traitJointPrefix) && length(traitJointPrefix) > 0L) { - jp <- traitJointPrefix[[1]] - if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) { - pivot <- tryCatch(readSldscTrait(jp), error = function(e) NULL) - if (!is.null(pivot)) - baselineCategories <- setdiff(pivot$categories, targetCategories) - } - } + jointPivot <- getTraitRun(sldscData, traitNames[1], "joint") + if (!is.null(jointPivot)) + baselineCategories <- setdiff(jointPivot$categories, targetCategories) if (length(baselineCategories) > 0L) { msgHead <- paste(head(baselineCategories, 5), collapse = ", ") msgTail <- if (length(baselineCategories) > 5) ", ..." else "" message(sprintf("[sldsc] Detected %d baseline annotations: %s%s", length(baselineCategories), msgHead, msgTail)) } else { - message("[sldsc] No baseline annotations detected (joint-run prefix missing or unreadable).") + message("[sldsc] No baseline annotations detected (no joint run on the first trait).") } sdAnnot <- sdAnnotFull[targetCategories] @@ -145,19 +120,15 @@ sldscPostprocessingPipeline <- function(traitSinglePrefixes, singleSummaries <- list() singleBlocks <- list() singleH2gs <- numeric(0) - singPrefs <- traitSinglePrefixes[[trait]] + singleRuns <- getTraitRun(sldscData, trait, "single") + if (is.null(singleRuns)) singleRuns <- list() for (i in seq_along(targetCategories)) { catName <- targetCategories[i] - if (i > length(singPrefs)) break - pref <- singPrefs[i] - run <- tryCatch(readSldscTrait(pref), error = function(e) { - warning(sprintf("[sldsc] Failed to read single %s for %s: %s", - catName, trait, e$message)); NULL - }) - if (is.null(run)) next + if (i > length(singleRuns)) break std <- tryCatch( - standardizeSldscTrait(run, sdAnnot[catName], MRef, - targetCategories = catName, mode = "single"), + standardizeSldscTrait(sldscData, trait, mode = "single", idx = i, + sdAnnot = sdAnnot[catName], MRef = MRef, + targetCategories = catName), error = function(e) { warning(sprintf("[sldsc] Failed to standardize single %s for %s: %s", catName, trait, e$message)); NULL @@ -177,29 +148,20 @@ sldscPostprocessingPipeline <- function(traitSinglePrefixes, blocksJoint <- NULL jointH2g <- NA_real_ nBlocksTrait <- NA_integer_ - if (!is.null(traitJointPrefix) && trait %in% names(traitJointPrefix)) { - jp <- traitJointPrefix[[trait]] - if (is.character(jp) && length(jp) == 1L && !is.na(jp) && nzchar(jp)) { - run <- tryCatch(readSldscTrait(jp), error = function(e) { - warning(sprintf("[sldsc] Failed to read joint for %s: %s", + if (!is.null(getTraitRun(sldscData, trait, "joint"))) { + std <- tryCatch( + standardizeSldscTrait(sldscData, trait, mode = "joint", + sdAnnot = sdAnnot, MRef = MRef, + targetCategories = targetCategories), + error = function(e) { + warning(sprintf("[sldsc] Failed to standardize joint for %s: %s", trait, e$message)); NULL }) - if (!is.null(run)) { - std <- tryCatch( - standardizeSldscTrait(run, sdAnnot, MRef, - targetCategories = targetCategories, - mode = "joint"), - error = function(e) { - warning(sprintf("[sldsc] Failed to standardize joint for %s: %s", - trait, e$message)); NULL - }) - if (!is.null(std)) { - jointDf <- std$summary - blocksJoint <- std$tau_star_blocks - jointH2g <- std$h2g - nBlocksTrait <- std$nBlocks - } - } + if (!is.null(std)) { + jointDf <- std$summary + blocksJoint <- std$tau_star_blocks + jointH2g <- std$h2g + nBlocksTrait <- std$nBlocks } } diff --git a/R/sldscWrapper.R b/R/sldscWrapper.R index f0a7e7d8..4efe251f 100644 --- a/R/sldscWrapper.R +++ b/R/sldscWrapper.R @@ -99,6 +99,76 @@ readSldscTrait <- function(prefix) { } +#' @title Read target annotation files (.annot.gz) into one table +#' +#' @description Reads the per-chromosome polyfun `.annot.gz` files in a +#' directory and stacks them into a single \code{data.frame} of \code{CHR}, +#' \code{SNP}, and the annotation columns. This is the I/O step feeding the +#' \code{annot} slot of \code{\link{SldscData}}; the computation +#' (\code{\link{computeSldscAnnotSd}}, \code{\link{isBinarySldscAnnot}}) then +#' runs on the loaded table, not on paths. +#' +#' @param targetAnnoDir Character. Directory of `.annot.gz` files. +#' @param annotCols Character or integer vector, default NULL. Annotation +#' columns to keep. NULL keeps all non-standard columns (auto-detected). +#' @return A \code{data.frame}: \code{CHR}, \code{SNP}, and annotation columns. +#' @importFrom vroom vroom +#' @importFrom tidyselect all_of +#' @export +readSldscAnnot <- function(targetAnnoDir, annotCols = NULL) { + if (!dir.exists(targetAnnoDir)) + stop("readSldscAnnot: targetAnnoDir does not exist: ", targetAnnoDir) + annoFiles <- list.files(targetAnnoDir, pattern = "\\.annot\\.gz$", + full.names = TRUE) + if (length(annoFiles) == 0L) + stop("readSldscAnnot: no .annot.gz files in: ", targetAnnoDir) + + detected <- .sldscDetectAnnotCols(annoFiles[1]) + colsUse <- if (is.null(annotCols)) detected + else if (is.numeric(annotCols)) detected[annotCols] + else annotCols + if (length(colsUse) == 0L) + stop("readSldscAnnot: no annotation columns to read.") + + parts <- lapply(annoFiles, function(f) { + as.data.frame(vroom(f, col_select = all_of(c("CHR", "SNP", colsUse)), + show_col_types = FALSE)) + }) + do.call(rbind, parts) +} + + +#' @title Read PLINK allele-frequency files (.frq) into one table +#' +#' @description Reads the per-chromosome PLINK `.frq` files for the reference +#' panel and stacks them into a single \code{data.frame} of \code{CHR}, +#' \code{SNP}, \code{MAF}. Feeds the \code{frq} slot of \code{\link{SldscData}}. +#' +#' @param frqfileDir Character. Directory of `.frq` files. +#' @param plinkName Character. Filename prefix (files at `{plinkName}{chr}.frq`). +#' Falls back to all `*.frq` in the directory when the prefix matches nothing. +#' @return A \code{data.frame}: \code{CHR}, \code{SNP}, \code{MAF}. +#' @importFrom vroom vroom +#' @importFrom tidyselect all_of +#' @export +readSldscFrq <- function(frqfileDir, plinkName = "ADSP_chr") { + if (!dir.exists(frqfileDir)) + stop("readSldscFrq: frqfileDir does not exist: ", frqfileDir) + pat <- paste0("^", gsub("([.])", "\\\\\\1", plinkName), "[0-9]+\\.frq$") + frqFiles <- list.files(frqfileDir, pattern = pat, full.names = TRUE) + if (length(frqFiles) == 0L) + frqFiles <- list.files(frqfileDir, pattern = "\\.frq$", full.names = TRUE) + if (length(frqFiles) == 0L) + stop("readSldscFrq: no .frq files in: ", frqfileDir) + + parts <- lapply(frqFiles, function(f) { + as.data.frame(vroom(f, col_select = all_of(c("CHR", "SNP", "MAF")), + show_col_types = FALSE)) + }) + do.call(rbind, parts) +} + + #' @title Compute per-annotation standard deviation, MAF-restricted #' #' @description Computes the standard deviation of each annotation column in the @@ -106,61 +176,41 @@ readSldscTrait <- function(prefix) { #' `.frq` files. Required for internal consistency with polyfun's regression, #' which operates on MAF > cutoff SNPs by default. #' -#' @param targetAnnoDir Character. Directory containing target annotation files -#' (one per chromosome) in polyfun's `.annot.gz` format. -#' @param frqfileDir Character or NULL. Directory containing PLINK `.frq` files -#' for the reference panel. Required when `mafCutoff > 0`; the function -#' errors if missing. -#' @param plinkName Character. Filename prefix of the `.frq` files -#' (e.g. `"ADSP_chr"`). Files are expected at `{plinkName}{chr}.frq`. -#' @param mafCutoff Numeric, default `0.05`. +#' @param sldscData An \code{\link{SldscData}} object (its \code{annot} and +#' \code{frq} slots supply the annotation values and MAF, respectively). +#' @param mafCutoff Numeric, default `0.05`. Requires frq data when > 0. #' @param annotCols Character or integer vector, default NULL. Annotation columns #' to compute sd for. If NULL, all annotation columns are used. #' #' @return Named numeric vector of \eqn{sd_C} values, one per annotation. #' #' @importFrom stats setNames var +#' @importFrom methods is #' @export -computeSldscAnnotSd <- function(targetAnnoDir, frqfileDir = NULL, - plinkName = "ADSP_chr", - mafCutoff = 0.05, annotCols = NULL) { - if (mafCutoff > 0 && (is.null(frqfileDir) || !dir.exists(frqfileDir))) { +computeSldscAnnotSd <- function(sldscData, mafCutoff = 0.05, annotCols = NULL) { + if (!is(sldscData, "SldscData")) + stop("computeSldscAnnotSd: `sldscData` must be an SldscData object.") + annot <- getAnnotData(sldscData) + frq <- getFrqData(sldscData) + if (mafCutoff > 0 && nrow(frq) == 0L) stop("computeSldscAnnotSd: mafCutoff = ", mafCutoff, - " requires frqfileDir, but '", frqfileDir, "' is not a directory.") - } - if (!dir.exists(targetAnnoDir)) { - stop("computeSldscAnnotSd: targetAnnoDir does not exist: ", targetAnnoDir) - } - - annoFiles <- list.files(targetAnnoDir, pattern = "\\.annot\\.gz$", full.names = TRUE) - if (length(annoFiles) == 0L) - stop("computeSldscAnnotSd: no .annot.gz files in: ", targetAnnoDir) + " requires frq data (read via readSldscFrq); none present.") - detected <- .sldscDetectAnnotCols(annoFiles[1]) - if (is.null(annotCols)) { - colsUse <- detected - } else if (is.numeric(annotCols)) { - colsUse <- detected[annotCols] - } else { - colsUse <- annotCols - } + colsUse <- if (is.null(annotCols)) getAnnotCols(sldscData) + else if (is.numeric(annotCols)) getAnnotCols(sldscData)[annotCols] + else annotCols if (length(colsUse) == 0L) stop("computeSldscAnnotSd: no annotation columns to process.") num <- setNames(numeric(length(colsUse)), colsUse) den <- 0 - for (annoFile in annoFiles) { - dat <- vroom(annoFile, show_col_types = FALSE) + # Pool within-chromosome variance (matches polyfun's per-file accumulation). + for (chrom in unique(annot$CHR)) { + dat <- annot[annot$CHR == chrom, , drop = FALSE] if (mafCutoff > 0) { - chrom <- .sldscChromFromFilename(annoFile) - if (is.na(chrom)) - stop("computeSldscAnnotSd: could not parse chromosome from: ", annoFile) - frqFile <- file.path(frqfileDir, paste0(plinkName, chrom, ".frq")) - if (!file.exists(frqFile)) - stop("computeSldscAnnotSd: .frq file not found: ", frqFile) - frq <- vroom(frqFile, col_select = c("SNP", "MAF"), show_col_types = FALSE) - dat <- merge(dat, frq, by = "SNP", all.x = FALSE, all.y = FALSE) + dat <- merge(dat, frq[, c("SNP", "MAF")], by = "SNP", + all.x = FALSE, all.y = FALSE) dat <- dat[!is.na(dat$MAF) & dat$MAF > mafCutoff, ] } if (nrow(dat) <= 1L) next @@ -192,64 +242,29 @@ computeSldscAnnotSd <- function(targetAnnoDir, frqfileDir = NULL, #' \item `mafCutoff == 0` (all-M variant): count ALL SNPs across all #' `.frq` files (the same set polyfun's `.l2.M` sums). #' } -#' `targetAnnoDir` is a fallback only, used when no `.frq` directory is -#' given; that fallback counts `.l2.ldscore` rows and is WRONG when the target -#' was HM3-subsetted (it then yields the regression SNP count, not M_ref). +#' When no frq data is present, `mafCutoff == 0` falls back to the number of +#' annotation rows; `mafCutoff > 0` errors (a MAF-restricted count needs frq). #' -#' @param targetAnnoDir Character or NULL. Fallback only - directory of -#' `.l2.ldscore` files. Used only when `frqfileDir` is unavailable. -#' @param frqfileDir Character or NULL. Directory of PLINK `.frq` files; the -#' preferred (recommended) source of M_ref. -#' @param plinkName Character. Filename prefix of `.frq` files. +#' @param sldscData An \code{\link{SldscData}} object (its \code{frq} slot is +#' the reference-panel SNP set). #' @param mafCutoff Numeric, default `0.05`. #' #' @return Scalar integer. #' +#' @importFrom methods is #' @export -computeSldscMRef <- function(targetAnnoDir = NULL, frqfileDir = NULL, - plinkName = "ADSP_chr", mafCutoff = 0.05) { - ## --- preferred path: count reference-panel SNPs from the .frq files --- - if (!is.null(frqfileDir) && dir.exists(frqfileDir)) { - pat <- paste0("^", gsub("([.])", "\\\\\\1", plinkName), "[0-9]+\\.frq$") - frqFiles <- list.files(frqfileDir, pattern = pat, full.names = TRUE) - if (length(frqFiles) == 0L) - frqFiles <- list.files(frqfileDir, pattern = "\\.frq$", full.names = TRUE) - if (length(frqFiles) > 0L) { - total <- 0L - for (f in frqFiles) { - frq <- vroom(f, col_select = "MAF", show_col_types = FALSE) - total <- total + if (mafCutoff > 0) - sum(!is.na(frq$MAF) & frq$MAF > mafCutoff) else nrow(frq) - } - return(as.integer(total)) - } +computeSldscMRef <- function(sldscData, mafCutoff = 0.05) { + if (!is(sldscData, "SldscData")) + stop("computeSldscMRef: `sldscData` must be an SldscData object.") + frq <- getFrqData(sldscData) + if (nrow(frq) > 0L) { + return(as.integer(if (mafCutoff > 0) + sum(!is.na(frq$MAF) & frq$MAF > mafCutoff) else nrow(frq))) } - - ## --- fallback only (no .frq dir): count target .l2.ldscore rows --- if (mafCutoff > 0) stop("computeSldscMRef: mafCutoff = ", mafCutoff, - " requires frqfileDir (to count MAF>cutoff reference-panel SNPs).") - if (is.null(targetAnnoDir) || !dir.exists(targetAnnoDir)) - stop("computeSldscMRef: need frqfileDir, or targetAnnoDir as fallback.") - files <- list.files(targetAnnoDir, - pattern = "\\.l2\\.ldscore\\.(gz|parquet)$", - full.names = TRUE) - if (length(files) == 0L) - stop("computeSldscMRef: no .frq files and no .l2.ldscore files found.") - warning("computeSldscMRef: no .frq dir given; counting target .l2.ldscore ", - "rows as M_ref. If the target was HM3-subsetted this UNDERCOUNTS the ", - "reference panel and shrinks tau*. Pass frqfileDir instead.") - total <- 0L - for (f in files) { - if (endsWith(f, ".parquet")) { - if (!requireNamespace("arrow", quietly = TRUE)) - stop("computeSldscMRef: install 'arrow' to read .parquet files.") - total <- total + nrow(arrow::read_parquet(f)) - } else { - total <- total + nrow(vroom(f, show_col_types = FALSE)) - } - } - as.integer(total) + " requires frq data (read via readSldscFrq); none present.") + as.integer(nrow(getAnnotData(sldscData))) } @@ -258,40 +273,27 @@ computeSldscMRef <- function(targetAnnoDir = NULL, frqfileDir = NULL, #' @description Inspects each annotation column and returns whether its values #' lie in \{0, 1\} (binary) or take other values (continuous). #' -#' @param targetAnnoDir Character. Directory containing the target `.annot.gz` -#' files (one per chromosome). +#' @param sldscData An \code{\link{SldscData}} object. #' @param annotCols Character or integer vector, default NULL. #' #' @return Named logical vector: TRUE for binary, FALSE for continuous. #' -#' @importFrom stats setNames +#' @importFrom stats setNames na.omit +#' @importFrom methods is #' @export -isBinarySldscAnnot <- function(targetAnnoDir, annotCols = NULL) { - annoFiles <- list.files(targetAnnoDir, pattern = "\\.annot\\.gz$", full.names = TRUE) - if (length(annoFiles) == 0L) - stop("isBinarySldscAnnot: no .annot.gz files in: ", targetAnnoDir) - - detected <- .sldscDetectAnnotCols(annoFiles[1]) - if (is.null(annotCols)) { - colsUse <- detected - } else if (is.numeric(annotCols)) { - colsUse <- detected[annotCols] - } else { - colsUse <- annotCols - } +isBinarySldscAnnot <- function(sldscData, annotCols = NULL) { + if (!is(sldscData, "SldscData")) + stop("isBinarySldscAnnot: `sldscData` must be an SldscData object.") + annot <- getAnnotData(sldscData) + colsUse <- if (is.null(annotCols)) getAnnotCols(sldscData) + else if (is.numeric(annotCols)) getAnnotCols(sldscData)[annotCols] + else annotCols isBinary <- setNames(rep(TRUE, length(colsUse)), colsUse) - - for (f in annoFiles) { - dat <- vroom(f, col_select = all_of(colsUse), show_col_types = FALSE) - for (col in colsUse) { - if (!isBinary[[col]]) next - vals <- unique(na.omit(as.numeric(dat[[col]]))) - if (any(!(vals %in% c(0, 1)))) isBinary[[col]] <- FALSE - } - if (!any(isBinary)) break + for (col in colsUse) { + vals <- unique(na.omit(as.numeric(annot[[col]]))) + if (any(!(vals %in% c(0, 1)))) isBinary[[col]] <- FALSE } - isBinary } @@ -304,22 +306,33 @@ isBinarySldscAnnot <- function(targetAnnoDir, annotCols = NULL) { #' EnrichStat and back-solves its standard error from polyfun's reported #' `Enrichment_p` using \eqn{|Z| = \Phi^{-1}(1 - p/2)}. #' -#' @param traitData List from \code{\link{readSldscTrait}}. +#' @param sldscData An \code{\link{SldscData}} object (the run is pulled from it +#' via \code{getTraitRun}). +#' @param trait Character. Trait name (a key of the SldscData traits list). +#' @param mode Character: `"single"` or `"joint"`. +#' @param idx Integer or NULL. For `mode = "single"`, which of the trait's +#' single-target runs to standardize. #' @param sdAnnot Named numeric vector from \code{\link{computeSldscAnnotSd}}. #' @param MRef Scalar from \code{\link{computeSldscMRef}}. -#' @param targetCategories Character vector or NULL. If NULL, intersects -#' `traitData$categories` with `names(sdAnnot)`. -#' @param mode Character: `"single"` or `"joint"`. +#' @param targetCategories Character vector or NULL. If NULL, intersects the +#' run's `categories` with `names(sdAnnot)`. #' #' @return A list with `summary` (data frame), `tau_star_blocks` (matrix), #' `h2g`, `nBlocks`, `mode`. #' #' @importFrom stats qnorm var +#' @importFrom methods is #' @export -standardizeSldscTrait <- function(traitData, sdAnnot, MRef, - targetCategories = NULL, - mode = c("single", "joint")) { +standardizeSldscTrait <- function(sldscData, trait, mode = c("single", "joint"), + idx = NULL, sdAnnot, MRef, + targetCategories = NULL) { + if (!is(sldscData, "SldscData")) + stop("standardizeSldscTrait: `sldscData` must be an SldscData object.") mode <- match.arg(mode) + traitData <- getTraitRun(sldscData, trait, mode, idx) + if (is.null(traitData)) + stop("standardizeSldscTrait: no ", mode, " run for trait '", trait, "'", + if (!is.null(idx)) paste0(" (idx=", idx, ")") else "", ".") if (is.null(targetCategories)) targetCategories <- intersect(traitData$categories, names(sdAnnot)) if (length(targetCategories) == 0L) @@ -503,36 +516,3 @@ metaSldscRandom <- function(perTraitEstimates, category, list(summary = newDf) }) } - - -#' @title End-to-end S-LDSC post-processing across traits, single + joint in one pass -#' -#' @description Top-level orchestration. Reads polyfun outputs (one single-target -#' run per target plus, when available, one joint run per trait), standardizes -#' both modes, and runs the default random-effects meta across all traits. -#' -#' @param traitSinglePrefixes Named list. For each trait, a character vector -#' of length \eqn{N} giving the polyfun output prefixes for the \eqn{N} -#' single-target runs (order must match `targetCategories`). -#' @param traitJointPrefix Named character. For each trait, the polyfun output -#' prefix for the joint run. Pass `NA` (or `""`) for a trait without a joint run. -#' @param targetAnnoDir Character. Directory of target `.annot.gz` files used -#' for `sd_C` and binary detection (typically the joint-mode dir). -#' @param frqfileDir Character or NULL. -#' @param plinkName Character. Default `"ADSP_chr"`. -#' @param mafCutoff Numeric, default `0.05`. -#' @param targetCategories Character vector or NULL. Auto-detected from the -#' first available run if NULL. -#' @param targetLabels Character vector or NULL. Optional user-friendly display -#' names for the target annotations, same length and order as the resolved -#' `targetCategories` (e.g. `c("quantile_eQTL", "eQTL")` to replace the -#' polyfun `.results` names `c("ANNOT_1_0", "ANNOT_2_0")`). When given, every -#' `target` column and `tau*`-block column name in the output is renamed; -#' `params$target_categories` then holds the labels and -#' `params$target_categories_orig` keeps the original polyfun names. When NULL -#' (default), nothing is renamed - the original `.results` category names are -#' used as before. -#' -#' @return List with `per_trait`, `meta` (three frames), `params`. -#' -#' @export diff --git a/R/sumstatsQc.R b/R/sumstatsQc.R index 2b6cdb68..70cf9e1f 100644 --- a/R/sumstatsQc.R +++ b/R/sumstatsQc.R @@ -405,9 +405,10 @@ mergeVariantInfo <- function(variants1, variants2, all = TRUE) { matchIdx <- match(key2, key1) hasMatch <- !is.na(matchIdx) - flip <- hasMatch & - df2$alt[hasMatch] == df1$ref[matchIdx[hasMatch]] & - df2$ref[hasMatch] == df1$alt[matchIdx[hasMatch]] + flip <- rep(FALSE, nrow(df2)) + mi <- matchIdx[hasMatch] + flip[hasMatch] <- df2$alt[hasMatch] == df1$ref[mi] & + df2$ref[hasMatch] == df1$alt[mi] # Apply flips to df2 flipRows <- which(hasMatch)[flip[hasMatch]] @@ -2365,9 +2366,11 @@ krigingOutlierQc <- function(zScore, R, n, variantIds = NULL, } if (!requireNamespace("susieR", quietly = TRUE) || !all(c("estimate_s_rss", "kriging_rss") %in% getNamespaceExports("susieR"))) { + # nocov start stop("krigingOutlierQc requires a susieR that provides estimate_s_rss() and ", "kriging_rss(); the installed susieR does not. Install a susieR with the ", "kriging RSS diagnostic, or disable alleleFlipKriging.") + # nocov end } if (is.null(variantIds)) variantIds <- rownames(R) # susieR's kriging RSS diagnostic: estimate the LD-mismatch scale, then take diff --git a/R/twasWeights.R b/R/twasWeights.R index 3ba87a29..77d3a7a8 100644 --- a/R/twasWeights.R +++ b/R/twasWeights.R @@ -201,13 +201,13 @@ setMethod("getWeights", "TwasWeights", getWeights(entry) }) -#' @rdname getCvPerformance +#' @rdname getCvResult #' @export -setMethod("getCvPerformance", "TwasWeights", +setMethod("getCvResult", "TwasWeights", function(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) { entry <- getTwasWeights(x, study, context, trait, method) - getCvPerformance(entry) + getCvResult(entry) }) #' @rdname getFits @@ -351,7 +351,7 @@ setMethod("show", "TwasWeights", function(object) { mcp = list(fn = "mcp_weights", impl = "mcpWeights", args = list()), l0learn = list(fn = "l0learn_weights", impl = "l0learnWeights", args = list()), mvsusie = list(fn = "mvsusie_weights", impl = "mvsusieWeights", args = list(L = 30, L_greedy = 5)), - mrmash = list(fn = "mrmash_weights", impl = "mrmashWeights", args = list()), + mrmash = list(fn = "mrmash_weights", impl = "mrmashWeights", args = list(canonicalPriorMatrices = TRUE)), fsusie = list(fn = "fsusie_weights", impl = "fsusieWeights", args = list()) ) @@ -422,6 +422,82 @@ setMethod("show", "TwasWeights", function(object) { methodKey } +# Normalize a cross-validation fold specification into the canonical +# samplePartition data.frame(Sample, Fold) used throughout the CV machinery, so +# callers can pass folds in any of three forms and downstream code has a single +# source of truth. Accepts: +# * cvFolds an integer k: k-fold auto-partition (returned as NULL so the +# partition is generated per (study, context, trait) downstream). +# * cvFolds a list of vectors: each element defines one fold's SAMPLES, as +# numeric column indices into `sampleNames` or character sample names. +# * samplePartition a data.frame(Sample, Fold): used as-is. +# A list-form `cvFolds` and an explicit `samplePartition` are mutually exclusive. +# When `sampleNames` is supplied, the resolved partition is validated to +# reference only known samples, assign each sample to one fold, and cover every +# sample (a proper partition). Returns list(samplePartition, nFolds). +.normalizeCvFolds <- function(cvFolds = 0, samplePartition = NULL, + sampleNames = NULL) { + isListFolds <- is.list(cvFolds) && !is.data.frame(cvFolds) + if (isListFolds && !is.null(samplePartition)) + stop("Provide either a list-form `cvFolds` or an explicit ", + "`samplePartition`, not both.") + + validatePartition <- function(df) { + if (!all(c("Sample", "Fold") %in% names(df))) + stop("samplePartition must have columns `Sample` and `Fold`.") + df$Sample <- as.character(df$Sample) + dup <- unique(df$Sample[duplicated(df$Sample)]) + if (length(dup) > 0L) + stop("Fold partition assigns sample(s) to more than one fold: ", + paste(dup, collapse = ", ")) + if (!is.null(sampleNames)) { + unknown <- setdiff(df$Sample, sampleNames) + if (length(unknown) > 0L) + stop("Fold partition references unknown sample(s): ", + paste(unknown, collapse = ", ")) + uncovered <- setdiff(sampleNames, df$Sample) + if (length(uncovered) > 0L) + stop("Fold partition does not cover ", length(uncovered), + " sample(s) (folds must partition all samples), e.g. ", + paste(utils::head(uncovered, 5L), collapse = ", ")) + } + df + } + + if (!is.null(samplePartition)) { + df <- validatePartition(as.data.frame(samplePartition, + stringsAsFactors = FALSE)) + return(list(samplePartition = df, nFolds = length(unique(df$Fold)))) + } + + if (isListFolds) { + if (length(cvFolds) < 2L) + stop("A list-form `cvFolds` must define at least 2 folds.") + rows <- lapply(seq_along(cvFolds), function(k) { + ids <- cvFolds[[k]] + if (is.numeric(ids)) { + if (is.null(sampleNames)) + stop("Numeric fold vectors require `sampleNames` to resolve ", + "column indices.") + if (any(ids < 1L | ids > length(sampleNames))) + stop("Fold ", k, " has out-of-range sample column index/indices.") + ids <- sampleNames[as.integer(ids)] + } else { + ids <- as.character(ids) + } + data.frame(Sample = ids, Fold = k, stringsAsFactors = FALSE) + }) + df <- validatePartition(do.call(rbind, rows)) + return(list(samplePartition = df, nFolds = length(cvFolds))) + } + + k <- suppressWarnings(as.integer(cvFolds)) + if (length(k) != 1L || is.na(k)) + stop("`cvFolds` must be a single integer, a list of fold vectors, or ", + "paired with `samplePartition`.") + list(samplePartition = NULL, nFolds = k) +} + # Identify non-zero-variance columns of X. Returns a logical vector. #' @importFrom matrixStats colSds #' @noRd @@ -445,46 +521,6 @@ setMethod("show", "TwasWeights", function(object) { full } -# Filter weight methods that produced all-zero weights from CV. -# Returns filtered weightMethods list and warns about removed methods. -# @noRd -.filterZeroWeightMethods <- function(weightMethods, twasWeightsRes) { - if (is(twasWeightsRes, "TwasWeights")) { - methodTokens <- as.character(twasWeightsRes$method) - perMethodAllZero <- vapply(seq_len(nrow(twasWeightsRes)), function(i) { - w <- getWeights(twasWeightsRes$entry[[i]]) - all(w == 0, na.rm = TRUE) - }, logical(1)) - methodToZero <- tapply(perMethodAllZero, methodTokens, all) - methodKeys <- names(weightMethods) - methodBase <- sub("(_weights|Weights)$", "", methodKeys) - isAllZero <- vapply(methodBase, function(mb) { - if (mb %in% names(methodToZero)) isTRUE(methodToZero[[mb]]) else FALSE - }, logical(1)) - } else { - wl <- twasWeightsRes - isAllZero <- vapply(wl, function(w) all(w == 0, na.rm = TRUE), logical(1)) - } - removed <- names(weightMethods)[isAllZero] - if (length(removed) > 0) { - warning(sprintf( - "Methods %s are removed from CV because all their weights are zeros.", - paste(removed, collapse = ", ") - )) - } - weightMethods[!isAllZero] -} - -.susieWeightIntermediate <- function(fit, X) { - keep <- intersect(c("mu", "lbf_variable", "X_column_scale_factors", "pip", "theta"), names(fit)) - intermediate <- fit[keep] - if (!is.null(fit$sets$cs)) { - intermediate$csVariants <- setNames(lapply(fit$sets$cs, function(L) colnames(X)[L]), names(fit$sets$cs)) - intermediate$csPurity <- .translateSusiePurity(fit$sets$purity) - } - intermediate -} - .prepareSusieWeightMethods <- function(X, Y, weightMethods, fittedModels = NULL) { if (is.vector(Y)) Y <- matrix(Y, ncol = 1) if (is.null(fittedModels)) fittedModels <- list() @@ -574,7 +610,7 @@ setMethod("show", "TwasWeights", function(object) { #' @importFrom BiocParallel bplapply bpworkers MulticoreParam #' @importFrom quadprog solve.QP #' @export -twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMethods = NULL, maxNumVariants = NULL, variantsToKeep = NULL, numThreads = 1, verbose = 1, ...) { +twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMethods = NULL, maxNumVariants = NULL, variantsToKeep = NULL, numThreads = 1, verbose = 1, retainFits = FALSE, ...) { splitData <- function(X, Y, samplePartition, fold) { testIds <- samplePartition[which(samplePartition$Fold == fold), "Sample"] Xtrain <- X[!(rownames(X) %in% testIds), , drop = FALSE] @@ -731,30 +767,40 @@ twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMeth # Xtest <- Xtest[, validColumns, drop=FALSE] foldWeightMethods <- .prepareSusieWeightMethods(Xtrain, Ytrain, weightMethods) - foldPreds <- setNames(lapply(names(foldWeightMethods), function(method) { + foldOut <- setNames(lapply(names(foldWeightMethods), function(method) { args <- foldWeightMethods[[method]] fnName <- .resolveMethodFunction(method, args) + capturedFit <- NULL if (method %in% multivariateWeightMethods) { - # Apply multivariate method to entire Y for this fold - if (!is.null(cvArgs$data_driven_prior_matrices_cv)) { - if (method %in% c("mrmash_weights", "mrmashWeights")) { - args$data_driven_prior_matrices <- cvArgs$data_driven_prior_matrices_cv[[j]] - } - if (method %in% c("mvsusie_weights", "mvsusieWeights")) { - args$prior_variance <- cvArgs$reweightedMixturePriorCv[[j]] - } + # Apply multivariate method to entire Y for this fold. Per-fold priors + # bind to the fitter's camelCase args: the fold's mr.mash data-driven + # prior, and the fold's reweighted mvSuSiE prior. + if (!is.null(cvArgs$data_driven_prior_matrices_cv) && + method %in% c("mrmash_weights", "mrmashWeights")) { + args$dataDrivenPriorMatrices <- cvArgs$data_driven_prior_matrices_cv[[j]] + } + if (!is.null(cvArgs$reweightedMixturePriorCv) && + method %in% c("mvsusie_weights", "mvsusieWeights")) { + args$prior_variance <- cvArgs$reweightedMixturePriorCv[[j]] + } + # Retain the per-fold fitted model (e.g. the fold's mr.mash fit) when + # asked and supported, so a caller can reuse it as that fold's prior + # (full-CV mvSuSiE). No extra fitting -- it is the model fit here. + if (isTRUE(retainFits) && "retainFit" %in% names(formals(fnName))) { + args$retainFit <- TRUE } weightsMatrix <- if (verbose < 2) { .quietEval(do.call(fnName, c(list(X = Xtrain, Y = Ytrain), args))) } else { do.call(fnName, c(list(X = Xtrain, Y = Ytrain), args)) } + capturedFit <- attr(weightsMatrix, "fit") + attr(weightsMatrix, "fit") <- NULL rownames(weightsMatrix) <- colnames(Xtrain) fullWeightsMatrix <- .embedWeights(weightsMatrix[validColumns, , drop = FALSE], validColumns, ncol(X), ncol(Y), colnames(X), colnames(Y)) Ypred <- Xtest %*% fullWeightsMatrix rownames(Ypred) <- rownames(Xtest) - return(Ypred) } else { Ypred <- sapply(1:ncol(Ytrain), function(k) { weights <- if (verbose < 2) { @@ -770,14 +816,15 @@ twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMeth Xtest %*% fullWeights }) rownames(Ypred) <- rownames(Xtest) - return(Ypred) } + list(pred = Ypred, fit = capturedFit) }), names(foldWeightMethods)) if (verbose >= 1) { elapsed <- toc(quiet = TRUE) message(sprintf(" CV fold %d/%d done in %.1fs", j, fold, elapsed$toc - elapsed$tic)) } - foldPreds + list(preds = lapply(foldOut, `[[`, "pred"), + fits = lapply(foldOut, `[[`, "fit")) } if (numCores >= 2) { @@ -796,7 +843,7 @@ twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMeth Ypred <- setNames(lapply(weightMethods, function(x) `dimnames<-`(matrix(NA, nrow(Y), ncol(Y)), dimnames(Y))), names(weightMethods)) for (j in seq_along(foldResults)) { for (method in names(weightMethods)) { - Ypred[[method]][rownames(foldResults[[j]][[method]]), ] <- foldResults[[j]][[method]] + Ypred[[method]][rownames(foldResults[[j]]$preds[[method]]), ] <- foldResults[[j]]$preds[[method]] } } @@ -847,7 +894,14 @@ twasWeightsCv <- function(X, Y, fold = NULL, samplePartitions = NULL, weightMeth } } names(metricsTable) <- .renameSuffix(names(metricsTable), "performance") - return(list(samplePartition = samplePartition, prediction = Ypred, performance = metricsTable, timeElapsed = proc.time() - st)) + # Per-fold retained fits (e.g. mr.mash), keyed [[fold]][[method]] with NULL + # entries dropped; empty per fold when retainFits = FALSE. Lets full-CV + # callers reuse each fold's fit as that fold's prior. + foldFits <- setNames(lapply(seq_along(foldResults), function(j) { + ff <- foldResults[[j]]$fits + ff[!vapply(ff, is.null, logical(1))] + }), paste0("fold_", seq_along(foldResults))) + return(list(samplePartition = samplePartition, prediction = Ypred, performance = metricsTable, foldFits = foldFits, timeElapsed = proc.time() - st)) } } @@ -1043,7 +1097,7 @@ learnTwasWeights <- function(X, Y, weightMethods, variantIds = variantIds, weights = wMat[, k], fits = if (retainFits) fitVal else NULL, - cvPerformance = NULL, + cvResult = NULL, standardized = isTRUE(standardized), dataType = dataType) } @@ -1057,7 +1111,7 @@ learnTwasWeights <- function(X, Y, weightMethods, variantIds = variantIds, weights = wPayload, fits = if (retainFits) fitVal else NULL, - cvPerformance = NULL, + cvResult = NULL, standardized = isTRUE(standardized), dataType = dataType) } diff --git a/R/twasWeightsPipeline.R b/R/twasWeightsPipeline.R index a34ced24..f34c8ba1 100644 --- a/R/twasWeightsPipeline.R +++ b/R/twasWeightsPipeline.R @@ -53,12 +53,12 @@ out[!vapply(out, is.null, logical(1))] } -# Flat per-region cvPerformance reporting table: one row per region carrying the +# Flat per-region cvResult reporting table: one row per region carrying the # region label plus that region's CV metric columns. Per-sample predictions are # intentionally omitted — this is a summary-reporting structure. .twasRegionCvDf <- function(entries, regionLabels) { rows <- Map(function(e, lab) { - cv <- getCvPerformance(e) + cv <- getCvResult(e) if (is.null(cv) || is.null(cv$metrics)) return(NULL) cbind(data.frame(region = lab, stringsAsFactors = FALSE), as.data.frame(as.list(cv$metrics), check.names = FALSE)) @@ -70,7 +70,7 @@ # Concatenate one method's per-region TwasWeightsEntry payloads into a single # entry. Variants/weights are stacked (regions are disjoint), the per-region -# fits are kept as a named list, and cvPerformance becomes the flat per-region +# fits are kept as a named list, and cvResult becomes the flat per-region # reporting data.frame. .twasMergeRegionEntries <- function(entries, regionLabels) { keep <- !vapply(entries, is.null, logical(1)) @@ -84,7 +84,7 @@ variantIds = unlist(lapply(entries, getVariantIds), use.names = FALSE), weights = weights, fits = setNames(lapply(entries, getFits), regionLabels), - cvPerformance = .twasRegionCvDf(entries, regionLabels), + cvResult = .twasRegionCvDf(entries, regionLabels), standardized = getStandardized(entries[[1L]]), dataType = getDataType(entries[[1L]])) } @@ -117,83 +117,31 @@ entry = mergedEntries) } -# Splice per-(method, outcome) cross-validated predictions and the 6-metric -# performance row from a `twasWeightsCv()` result into the matching -# `TwasWeightsEntry$cvPerformance` slot of every row in a TwasWeights -# collection. Rebuilds the collection because TwasWeightsEntry is treated -# as immutable. Rows for which no CV result is available (method not in -# the CV run, or trait not in the CV prediction matrix's columns) are -# emitted unchanged. -# -# The CV result keys carry a method suffix (`_predicted`, -# `_performance` in snake form, or `Predicted`, `Performance` in -# camel form); the TwasWeights `method` column carries the bare token -# (e.g. "lasso"). The trait column carries the outcome name, which must -# match the column name of the CV prediction matrix. +# Unpack a MashPrior input into the internal twasWeightsPipeline arguments: +# $fullPrior -> mr.mash full-data dataDrivenPriorMatrices +# $dataDrivenPriorMatricesCv -> per-fold priors for twasWeightsCv +# $samplePartition -> the CV folds (an explicit `samplePartition` +# arg wins; otherwise the partition the per-fold +# priors were computed on) +# NULL input returns all-NULL, preserving the supplied samplePartition. # @noRd -.spliceCvIntoTwasWeights <- function(twasWeights, twasCvResult, - ldSketch = NULL) { - if (is.null(twasCvResult) || is.null(twasCvResult$prediction) || - is.null(twasCvResult$performance)) { - return(twasWeights) +.unpackMashPrior <- function(mashPrior, samplePartition = NULL) { + if (is.null(mashPrior)) { + return(list(fullPrior = NULL, dataDrivenPriorMatricesCv = NULL, + samplePartition = samplePartition)) } - predKeyBase <- sub("(_predicted|Predicted)$", "", - names(twasCvResult$prediction)) - perfKeyBase <- sub("(_performance|Performance)$", "", - names(twasCvResult$performance)) - - pickKey <- function(bare, keys, base) { - hit <- which(base == bare) - if (length(hit) == 0L) NA_character_ else keys[[hit[[1L]]]] + if (!is(mashPrior, "MashPrior")) { + stop("`mashPrior` must be a MashPrior object (see ?MashPrior).") } - - studies <- as.character(twasWeights$study) - contexts <- as.character(twasWeights$context) - traits <- as.character(twasWeights$trait) - methodsV <- as.character(twasWeights$method) - newEntries <- as.list(twasWeights$entry) - - for (i in seq_along(newEntries)) { - bare <- methodsV[[i]] - pKey <- pickKey(bare, names(twasCvResult$prediction), predKeyBase) - mKey <- pickKey(bare, names(twasCvResult$performance), perfKeyBase) - if (is.na(pKey) || is.na(mKey)) next - predMat <- twasCvResult$prediction[[pKey]] - perfMat <- twasCvResult$performance[[mKey]] - if (is.null(predMat) || is.null(perfMat)) next - - tr <- traits[[i]] - predCols <- colnames(predMat) - perfRows <- rownames(perfMat) - colHit <- if (!is.null(predCols) && tr %in% predCols) tr - else if (ncol(predMat) == 1L) 1L else NA_integer_ - rowHit <- if (!is.null(perfRows) && tr %in% perfRows) tr - else if (nrow(perfMat) == 1L) 1L else NA_integer_ - if (is.na(colHit) || is.na(rowHit)) next - - predVec <- predMat[, colHit, drop = TRUE] - metRow <- perfMat[rowHit, , drop = TRUE] - cv <- list( - samplePartition = twasCvResult$samplePartition, - predictions = predVec, - metrics = metRow) - entry <- newEntries[[i]] - newEntries[[i]] <- TwasWeightsEntry( - variantIds = getVariantIds(entry), - weights = getWeights(entry), - fits = getFits(entry), - cvPerformance = cv, - standardized = getStandardized(entry), - dataType = getDataType(entry)) + cvFits <- getCvFits(mashPrior) + perFold <- if (!is.null(cvFits)) cvFits$perFoldFits else NULL + sp <- samplePartition + if (is.null(sp) && !is.null(cvFits) && !is.null(cvFits$samplePartition)) { + sp <- cvFits$samplePartition } - - TwasWeights( - study = studies, - context = contexts, - trait = traits, - method = methodsV, - entry = newEntries, - ldSketch = ldSketch) + list(fullPrior = getFullFit(mashPrior), + dataDrivenPriorMatricesCv = perFold, + samplePartition = sp) } # Mapping from short / canonical TWAS weight-method name to dispatch @@ -325,7 +273,27 @@ # keys in methodList are an internal detail of learnTwasWeights. tokens <- unique(methods) } else if (is.list(methods)) { - methodList <- methods + # Normalize a user-supplied named list the same way the character branch + # does: re-key each entry to its canonical `_weights` name, merge the + # caller's kwargs over the method's defaults, and carry the token->impl map + # as an "impl" attribute. Without this, named-list entries keep their bare + # token key with no "impl" attribute, and downstream function resolution in + # `.resolveMethodFunction` falls back to the bare token (e.g. "mrmash"), + # which is not a function (the implementation is `mrmashWeights`). + methodList <- list() + for (tk in names(methods)) { + base <- tryCatch(.twasMethodLookup(tk), error = function(e) NULL) + if (is.null(base)) { + # Fine-mapping-only / unknown token with no learner default: keep as-is + # (the downstream capability gate produces a method-specific message). + methodList[[tk]] <- methods[[tk]] + next + } + snake <- names(base)[[1L]] + merged <- modifyList(base[[snake]], methods[[tk]]) + attr(merged, "impl") <- attr(base[[snake]], "impl") + methodList[[snake]] <- merged + } tokens <- .twasTokensFromMethodList(methodList) } else { stop("`methods` must be a character vector, preset string, or named list.") @@ -448,12 +416,12 @@ rssFitArg = NULL, methodKey = "fsusie_weights")) -# Canonical list of fine-mapping tokens recognised by twasWeightsPipeline. -# Sourced from fineMappingPipeline's registry minus mrmash (which -# fineMappingPipeline hard-rejects as a TWAS-only method). +# Canonical list of fine-mapping tokens recognised by twasWeightsPipeline: +# fineMappingPipeline's registry, which now contains only fine-mapping methods +# (mr.mash is a TWAS method, kept out of that registry). # @noRd .twasFineMappingTokens <- function() { - setdiff(names(.fineMappingMethodCapabilities), "mrmash") + names(.fineMappingMethodCapabilities) } # Reject fine-mapping methods (susie / susieInf / susieAsh / mvsusie / @@ -738,7 +706,7 @@ #' \code{FALSE} (default) learns weights for each range independently and #' concatenates them into one entry per (study, context, trait, method); #' the per-region fits are kept as a named list and per-region CV is -#' recorded as a flat \code{cvPerformance} data frame (one row per region). +#' recorded as a flat \code{cvResult} data frame (one row per region). #' \code{TRUE} concatenates the ranges' genotypes into one joint fit. #' Ignored for a single-range / cis request. #' @param jointSpecification Optional joint-fit specification (NULL by @@ -825,8 +793,10 @@ setMethod("twasWeightsPipeline", "QtlDataset", jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL, + mashPrior = NULL, cvFolds = 5, samplePartition = NULL, + fitFullData = TRUE, maxCvVariants = -1, cvThreads = 1, cvWeightMethods = NULL, @@ -868,6 +838,24 @@ setMethod("twasWeightsPipeline", "QtlDataset", .twasCheckMethodCapabilities(norm$tokens, "QtlDataset") .twasCheckFineMappingMethods(norm$tokens, fineMappingResult, "QtlDataset") + # fitFullData = FALSE (CV-only) is meaningful only with cross-validation. + if (!isTRUE(fitFullData) && cvFolds <= 1L) { + stop("twasWeightsPipeline: fitFullData = FALSE requires cross-validation ", + "(cvFolds > 1).") + } + # Unpack the MashPrior bundle: route the full-data prior into the mr.mash + # method args, the per-fold priors + fold partition into the CV machinery. + mp <- .unpackMashPrior(mashPrior, samplePartition) + samplePartition <- mp$samplePartition + dataDrivenPriorMatricesCv <- mp$dataDrivenPriorMatricesCv + if (!is.null(mashPrior) && !"mrmash" %in% norm$tokens) { + warning("`mashPrior` was supplied but 'mrmash' is not among `methods`; ", + "the data-driven prior is ignored.") + } + if (!is.null(mp$fullPrior) && "mrmash_weights" %in% names(norm$methodList)) { + norm$methodList$mrmash_weights$dataDrivenPriorMatrices <- mp$fullPrior + } + # Explicit jointSpecification path: run the per-spec axis dispatcher for # mr.mash. Other (univariate) methods continue through the existing # per-(context, trait) iteration below. @@ -934,208 +922,38 @@ setMethod("twasWeightsPipeline", "QtlDataset", multivariate <- any(vapply(norm$tokens, .twasIsMultivariateToken, logical(1))) - runOne <- function(ctx, tid) { - # Resume cache: per-method check against the supplied `twasWeights` - # collection. Methods present in the cache for (study, ctx, tid) - # are pulled directly; the remaining methods (if any) are fit via - # .twasWeightsPipelineMatrix with a subset weightMethods list. - cachedRows <- list() - remaining <- norm$methodList - for (mName in names(norm$methodList)) { - shortMethod <- sub("(_weights|Weights)$", "", mName) - cached <- .twasCacheLookup(twasWeights, study, ctx, tid, shortMethod) - if (!is.null(cached)) { - cachedRows[[shortMethod]] <- cached - remaining[[mName]] <- NULL - } - } - cachedTw <- .twasBuildFromCachedRows(cachedRows, study, ctx, tid) - if (length(remaining) == 0L) return(cachedTw) - - Y <- .fmResidPheno( - data, contexts = ctx, traitId = tid, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, - naAction = naAction) - - # Fine-mapping fits for this (study, ctx, trait); a multi-region fit is a - # per-region list and is selected blockwise inside the loop. - allFits <- .twasFineMappingFits(fineMappingResult, - study = study, context = ctx, trait = tid) - # Fine-mapping's own cross-validated predictions (shared fold partition), - # reused by the ensemble instead of re-fitting the fine-mapping methods. - fmCv <- .twasCvResultFor(fineMappingResult, study, ctx, tid) - nBlocks <- length(xRegions) - perBlockTw <- lapply(seq_len(nBlocks), function(bi) { - rg <- xRegions[[bi]] - X <- if (is.null(rg)) { - .fmResidGeno( - data, contexts = ctx, traitId = tid, cisWindow = cisWindow, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, - samples = rownames(Y)) - } else { - .fmResidGeno( - data, contexts = ctx, region = rg, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, - samples = rownames(Y)) - } - common <- intersect(rownames(X), rownames(Y)) - if (length(common) < 2L) { - stop(sprintf( - "twasWeightsPipeline: too few shared samples between residualized X and Y for (context='%s', trait='%s').", - ctx, tid)) - } - .twasWeightsPipelineMatrix( - X = X[common, , drop = FALSE], y = Y[common, , drop = FALSE], - study = study, context = ctx, trait = tid, - fittedModels = .twasFitsForRegion(allFits, bi, nBlocks), - cvFolds = cvFolds, - samplePartition = samplePartition, - fineMappingCv = fmCv, - weightMethods = remaining, - maxCvVariants = maxCvVariants, - cvThreads = cvThreads, - cvWeightMethods = cvWeightMethods, - ensemble = ensemble, - ensembleR2Threshold = ensembleR2Threshold, - ensembleSolver = ensembleSolver, - ensembleAlpha = ensembleAlpha, - estimatePi = estimatePi, - standardized = FALSE, - dataType = dataType, - ldSketch = NULL, - verbose = verbose)$twasWeights - }) - # Single block (cis or jointRegions=TRUE) returns unchanged; multiple - # blocks (jointRegions=FALSE) concatenate per method into one entry. - freshTw <- .twasMergeRegions( - perBlockTw, vapply(xRegions, .twasRegionLabel, character(1))) - if (is.null(cachedTw)) freshTw - else .rbindTwasWeights(freshTw, cachedTw, ldSketch = NULL) - } - + # Multivariate (mr.mash / mvsusie / ... + any univariate methods) joint fit + # over the (context, trait) grid, ROUTED THROUGH THE JOINT ENGINE: one + # composed group per region -> per-method fit (the engine twas fitter) + + # SR-TWAS ensemble layer, merged across regions. The SAME engine + fitter + + # ensemble as every other multivariate path -- no separate fitting code. runMultivariate <- function(traits) { - # Joint over selected (contexts, traits): residualize, intersect - # samples across contexts, drop subjects with any-NA in Y. - # Sample basis for Y construction (residualized genotypes are - # region-independent in their sample set): use the cis window when no - # explicit region is given, otherwise the first range. - Xlist <- lapply(useCtx, function(ctx) { - if (is.null(region)) { - .fmResidGeno( - data, contexts = ctx, traitId = traits, cisWindow = cisWindow, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) - } else { - .fmResidGeno( - data, contexts = ctx, region = region[1L], - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) - } + marker <- new("TwasJointPipeline", config = list( + cvFolds = cvFolds, samplePartition = samplePartition, + fitFullData = fitFullData, dataType = dataType, + retainFitDetail = retainFitDetail, standardized = FALSE, + ensemble = ensemble, ensembleR2Threshold = ensembleR2Threshold, + ensembleSolver = ensembleSolver, ensembleAlpha = ensembleAlpha, + maxCvVariants = maxCvVariants, cvThreads = cvThreads, + estimatePi = estimatePi, verbose = verbose, ldSketch = NULL)) + synthSpec <- list(list(axes = c("context", "trait"), scope = NULL)) + labs <- vapply(xRegions, .twasRegionLabel, character(1)) + perRegion <- lapply(seq_along(xRegions), function(bi) { + .runJointSpecs(synthSpec, data, dataForm = "individual", pipeline = marker, + jointMethods = norm$tokens, contexts = useCtx, + traitIds = traits, + args = list(methodList = norm$methodList, + fineMappingResult = fineMappingResult, + dataDrivenPriorMatricesCv = dataDrivenPriorMatricesCv, + cisWindow = cisWindow, region = xRegions[[bi]], + regionIndex = bi, nRegions = length(xRegions), + verbose = verbose)) }) - # Intersect samples across contexts. - commonSamples <- Reduce(intersect, lapply(Xlist, rownames)) - if (length(commonSamples) < 2L) { - stop("twasWeightsPipeline(QtlDataset, multivariate): insufficient samples shared across selected contexts.") - } - - Yres <- .fmResidPheno( - data, contexts = useCtx, traitId = traits, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize, - naAction = naAction) - if (length(useCtx) == 1L) Yres <- setNames(list(Yres), useCtx) - # Concatenate per-context residualized phenotypes column-wise, - # restricting to commonSamples. Column names become - # "__". - Ymats <- list() - colMeta <- list() - for (ctx in names(Yres)) { - rn <- intersect(commonSamples, rownames(Yres[[ctx]])) - Ym <- Yres[[ctx]][rn, , drop = FALSE] - # Pad missing rows so columns line up across contexts. - if (length(rn) < length(commonSamples)) { - full <- matrix(NA_real_, nrow = length(commonSamples), - ncol = ncol(Ym), - dimnames = list(commonSamples, colnames(Ym))) - full[rn, ] <- Ym - Ym <- full - } else { - Ym <- Ym[commonSamples, , drop = FALSE] - } - colnames(Ym) <- paste(ctx, colnames(Ym), sep = "__") - Ymats[[ctx]] <- Ym - colMeta[[ctx]] <- data.frame( - context = ctx, trait = colnames(Yres[[ctx]]), - stringsAsFactors = FALSE) - } - Y <- do.call(cbind, Ymats) - meta <- do.call(rbind, colMeta) - # Drop subjects with any NA across Y columns (joint over contexts). - keep <- complete.cases(Y) - if (sum(keep) < 2L) { - stop("twasWeightsPipeline(QtlDataset, multivariate): too few subjects with complete Y across selected (context, trait) columns.") - } - Y <- Y[keep, , drop = FALSE] - - # Per-region fit + merge. The cis block reuses the already-extracted - # genotypes; an explicit region re-extracts that window (genotype - # residualization is context-independent, so one context suffices). - # mvsusie/mr.mash joint fits are stored once per (context, trait) row in - # the FineMappingResult; pull via the first (context, trait) of the group - # and (for a multi-region fit) select the per-region element. - perBlockTw <- lapply(seq_along(xRegions), function(bi) { - rg <- xRegions[[bi]] - Xr <- if (is.null(rg)) { - Xlist[[1L]] - } else { - .fmResidGeno( - data, contexts = useCtx[[1L]], region = rg, - phenotypeCovariatesToResidualize = phenotypeCovariatesToResidualize, - genotypeCovariatesToResidualize = genotypeCovariatesToResidualize) - } - Xr <- Xr[rownames(Y), , drop = FALSE] - jointFits <- .twasFitsForRegion( - .twasFineMappingFits(fineMappingResult, study = study, - context = meta$context[[1L]], - trait = meta$trait[[1L]]), - bi, length(xRegions)) - fmCv <- .twasCvResultFor(fineMappingResult, study, - meta$context[[1L]], meta$trait[[1L]]) - .twasWeightsPipelineMatrix( - X = Xr, y = Y, - study = study, - context = meta$context, - trait = meta$trait, - # Retain the mr.mash fit parts ({dataDrivenPriorMatrices, w0, V}) on - # the entry's `fits` slot so fineMappingPipeline can rebuild the - # mvSuSiE reweighted prior + residual variance from this shared fit. - # `retainFitDetail` selects the slim payload (default) or the full - # mr.mash fit. - retainFits = TRUE, - retainFitDetail = retainFitDetail, - fittedModels = jointFits, - cvFolds = cvFolds, - samplePartition = samplePartition, - fineMappingCv = fmCv, - weightMethods = norm$methodList, - maxCvVariants = maxCvVariants, - cvThreads = cvThreads, - cvWeightMethods = cvWeightMethods, - ensemble = ensemble, - ensembleR2Threshold = ensembleR2Threshold, - ensembleSolver = ensembleSolver, - ensembleAlpha = ensembleAlpha, - estimatePi = estimatePi, - standardized = FALSE, - dataType = dataType, - ldSketch = NULL, - verbose = verbose)$twasWeights - }) - .twasMergeRegions( - perBlockTw, vapply(xRegions, .twasRegionLabel, character(1))) + keep <- !vapply(perRegion, is.null, logical(1)) + perRegion <- perRegion[keep]; labs <- labs[keep] + if (length(perRegion) == 0L) return(NULL) + if (length(perRegion) == 1L) return(perRegion[[1L]]) + .twasMergeResultsByKey(perRegion, labs) } # Top-level dispatch within the QtlDataset method body. @@ -1144,15 +962,38 @@ setMethod("twasWeightsPipeline", "QtlDataset", # we already rejected above via .twasCheckMultivariateY. tw <- runMultivariate(allTraits) } else { - # Univariate methods: sequential over (context, trait). - out <- NULL - for (ctx in useCtx) { - for (tid in perCtxTraits[[ctx]]) { - twi <- runOne(ctx, tid) - out <- if (is.null(out)) twi else .rbindTwasWeights(out, twi, ldSketch = NULL) - } - } - tw <- out + # Univariate methods ROUTED THROUGH THE ENGINE: one 1-condition group per + # (context, trait), per region -> the SAME per-method fitter (+ ensemble + # layer for >= 2 methods + resume cache) as the joint paths, merged across + # regions. No separate per-(context, trait) fitting loop. + marker <- new("TwasJointPipeline", config = list( + cvFolds = cvFolds, samplePartition = samplePartition, + fitFullData = fitFullData, dataType = dataType, + retainFitDetail = retainFitDetail, standardized = FALSE, + ensemble = ensemble, ensembleR2Threshold = ensembleR2Threshold, + ensembleSolver = ensembleSolver, ensembleAlpha = ensembleAlpha, + maxCvVariants = maxCvVariants, cvThreads = cvThreads, + estimatePi = estimatePi, verbose = verbose, ldSketch = NULL)) + univCell <- .lookupJointCell("univariate", "individual") + scope <- list(studies = study, + contexts = setNames(list(useCtx), study), + traits = setNames(list(allTraits), study)) + labs <- vapply(xRegions, .twasRegionLabel, character(1)) + perRegion <- lapply(seq_along(xRegions), function(bi) { + .runJointCell(univCell, marker, data, scope, norm$tokens, + args = list(methodList = norm$methodList, + fineMappingResult = fineMappingResult, + cache = twasWeights, + dataDrivenPriorMatricesCv = dataDrivenPriorMatricesCv, + cisWindow = cisWindow, region = xRegions[[bi]], + regionIndex = bi, nRegions = length(xRegions), + naAction = naAction, verbose = verbose)) + }) + keep <- !vapply(perRegion, is.null, logical(1)) + perRegion <- perRegion[keep]; labs <- labs[keep] + tw <- if (length(perRegion) == 0L) NULL + else if (length(perRegion) == 1L) perRegion[[1L]] + else .twasMergeResultsByKey(perRegion, labs) } if (is.null(tw) && is.null(jointResult)) { stop("twasWeightsPipeline(QtlDataset): no (context, trait) pair produced any weights.") @@ -1333,7 +1174,7 @@ setMethod("twasWeightsPipeline", "QtlSumStats", variantIds = variantIds, weights = as.numeric(weights), fits = fitAttr, - cvPerformance = NULL, # Q5: no CV on the sumstat path + cvResult = NULL, # Q5: no CV on the sumstat path standardized = TRUE, # Q4: sumstat-derived weights are standardized dataType = dataType) } @@ -1434,7 +1275,7 @@ setMethod("twasWeightsPipeline", "QtlSumStats", # Share the underlying joint fit on the first row only; # remaining rows reference the same fit by leaving fits NULL. fits = if (kk == 1L) fitAttr else NULL, - cvPerformance = NULL, + cvResult = NULL, standardized = TRUE, dataType = dataType) } @@ -1618,397 +1459,9 @@ setMethod("twasWeightsPipeline", "ANY", }) # ============================================================================= -# Internal matrix-driven TWAS weights pipeline +# SR-TWAS ensemble stacking solvers (used by ensembleWeights, the primitive the +# engine's .twasEnsembleLayer calls per context) # ============================================================================= -# -# This is the legacy matrix-based pipeline retained as an internal worker. -# The exported, S4-dispatched `twasWeightsPipeline` defined above extracts -# (X, Y) blocks from QtlDataset / QtlSumStats / GwasSumStats and calls this -# function per (study, context, trait) tuple. It returns a single-tuple -# `TwasWeights` collection (one row per method, plus an optional ensemble -# row) along with auxiliary state used during stacking. -# -# Method restrictions imposed at the dispatch layer: -# - PRS-CS, lassosumRss, sdpr, susieRss, susieInfRss, susieAshRss, -# mrAshRss, mrmashRss, mvsusieRss: RSS-only (refuse QtlDataset). -# - bglrWeights / qgg methods (bayesA/B/C/L/N/R, bLasso, dpr*): individual -# level only (refuse QtlSumStats / GwasSumStats). -# - mr.mash / mvsusie: multi-trait / multi-context (same rule family as -# the fine-mapping mvSuSiE family in the design doc). -# -# @noRd -.twasWeightsPipelineMatrix <- function(X, - y, - study = "", - context = "", - trait = "", - susieFit = NULL, - fittedModels = NULL, - cvFolds = 5, - samplePartition = NULL, - fineMappingCv = NULL, - weightMethods = "default", - maxCvVariants = -1, - cvThreads = 1, - cvWeightMethods = NULL, - ensemble = TRUE, - ensembleR2Threshold = 0.01, - ensembleSolver = "quadprog", - ensembleAlpha = 1, - estimatePi = TRUE, - standardized = FALSE, - dataType = NULL, - ldSketch = NULL, - retainFits = FALSE, - retainFitDetail = c("slim", "full"), - verbose = 1) { - retainFitDetail <- match.arg(retainFitDetail) - if (is.character(weightMethods)) { - weightMethods <- .twasMethodLookup(weightMethods) - } - if (is.null(fittedModels)) fittedModels <- list() - if (!is.null(susieFit)) fittedModels[["susie"]] <- susieFit - - # Inject precomputed fine-mapping fits into the per-method args so the - # corresponding *Weights wrapper extracts coefficients from the fit - # rather than refitting. The adapter table (.twasFineMappingMethodAdapters) - # gives the snake_case methodList key and the *Fit argument name for - # each fine-mapping method. - for (canonical in names(.twasFineMappingMethodAdapters)) { - adapter <- .twasFineMappingMethodAdapters[[canonical]] - if (!is.null(fittedModels[[canonical]]) && - !is.null(weightMethods[[adapter$methodKey]]) && - is.null(weightMethods[[adapter$methodKey]][[adapter$fitArg]])) { - weightMethods[[adapter$methodKey]][[adapter$fitArg]] <- - fittedModels[[canonical]] - } - } - - res <- list() - st <- proc.time() - if (verbose >= 1) { - message("Performing TWAS weights computation for univariate analysis methods ...") - tic() - } - - if (!is.null(fittedModels[["susie"]]) && !is.null(weightMethods$susie_weights)) { - res$susieWeightsIntermediate <- .susieWeightIntermediate(fittedModels[["susie"]], X) - } - - # Check if empirical pi estimation is needed for spike-and-slab methods - bayesCneedsPi <- "bayes_c_weights" %in% names(weightMethods) && - !"pi" %in% names(weightMethods$bayes_c_weights) - bayesBneedsPi <- "bayes_b_weights" %in% names(weightMethods) && - !"probIn" %in% names(weightMethods$bayes_b_weights) - needsPiEstimation <- (bayesCneedsPi || bayesBneedsPi) && estimatePi - - learnArgs <- list( - study = study, context = context, trait = trait, - standardized = standardized, dataType = dataType, - ldSketch = ldSketch, retainFitDetail = retainFitDetail) - - if (needsPiEstimation) { - # Run mr.ash first to estimate sparsity - mrashMethods <- list(mrash_weights = weightMethods[["mrash_weights"]] %||% list()) - - if (verbose >= 1) message(" Estimating sparsity from mr.ash ...") - mrashWeights <- do.call(learnTwasWeights, c( - list(X = X, Y = y, weightMethods = mrashMethods, - retainFits = TRUE, verbose = verbose), - learnArgs)) - - empiricalPi <- estimateSparsity(mrashWeights) - if (verbose >= 1) message(sprintf(" Empirical sparsity estimate: %.4f", empiricalPi)) - res$empiricalPi <- empiricalPi - - # Inject into spike-and-slab methods that need it - if (bayesCneedsPi) weightMethods$bayes_c_weights$pi <- as.numeric(empiricalPi) - if (bayesBneedsPi) weightMethods$bayes_b_weights$probIn <- as.numeric(empiricalPi) - - # Run remaining methods (those not already computed) - remainingFnNames <- setdiff(names(weightMethods), "mrash_weights") - - if (length(remainingFnNames) > 0) { - remainingMethods <- weightMethods[remainingFnNames] - remainingTw <- do.call(learnTwasWeights, c( - list(X = X, Y = y, weightMethods = remainingMethods, - fittedModels = fittedModels, retainFits = retainFits, - verbose = verbose), - learnArgs)) - res$twasWeights <- .rbindTwasWeights(mrashWeights, remainingTw, - ldSketch = ldSketch) - } else { - res$twasWeights <- mrashWeights - } - - # Remove mr.ash if it was not in the original weightMethods - if (!"mrash_weights" %in% names(weightMethods)) { - tw <- res$twasWeights - keep <- as.character(tw$method) != "mrash" - res$twasWeights <- TwasWeights( - study = as.character(tw$study)[keep], - context = as.character(tw$context)[keep], - trait = as.character(tw$trait)[keep], - method = as.character(tw$method)[keep], - entry = as.list(tw$entry)[keep], - ldSketch = ldSketch) - } - } else { - # Run all methods at once - res$twasWeights <- do.call(learnTwasWeights, c( - list(X = X, Y = y, weightMethods = weightMethods, - fittedModels = fittedModels, retainFits = retainFits, - verbose = verbose), - learnArgs)) - } - if (verbose >= 1) { - elapsed <- toc(quiet = TRUE) - message(sprintf("TWAS weights fitting done in %.1fs", elapsed$toc - elapsed$tic)) - } - res$twasPredictions <- twasPredict(X, res$twasWeights) - - if (cvFolds > 1) { - # A few cutting corners to run CV faster at the disadvantage of SuSiE and mr.ash: - # 1. reset SuSiE to not using refine or adaptive L but to use L from previous analysis - # 2. at most 100 iterations for mr.ash allowed - # 3. only use a subset of variants randomly selected to avoid bias - if (!is.null(fittedModels[["susieInf"]]) && !is.null(weightMethods$susie_inf_weights)) { - weightMethods$susie_inf_weights$L <- length(fittedModels[["susieInf"]]$V) - weightMethods$susie_inf_weights$refine <- FALSE - } - if (!is.null(weightMethods$susie_weights)) { - susieCvFit <- fittedModels[["susie"]] - if (is.null(susieCvFit)) susieCvFit <- fittedModels[["susieInf"]] - if (!is.null(susieCvFit)) { - weightMethods$susie_weights$L <- length(susieCvFit$V) - weightMethods$susie_weights$refine <- FALSE - } - } - if (is.null(cvWeightMethods)) { - cvWeightMethods <- .filterZeroWeightMethods(weightMethods, res$twasWeights) - } - - # Fine-mapping handoff: when fineMappingPipeline supplied cross-validated - # predictions for some methods (shared fold partition + per-fold out-of- - # fold predictions), reuse them rather than refitting those methods here. - # Drop them from the CV refit set, adopt the shared partition (unless the - # caller passed one explicitly), and merge their predictions/metrics into - # the CV result below so the SR-TWAS ensemble consumes fine-mapping's own - # cross-validation. - fmCvPrediction <- NULL; fmCvPerformance <- NULL - if (!is.null(fineMappingCv) && length(fineMappingCv$prediction) > 0L) { - if (is.null(samplePartition) && !is.null(fineMappingCv$samplePartition)) { - samplePartition <- fineMappingCv$samplePartition - } - fmBase <- sub("(_predicted|Predicted)$", "", names(fineMappingCv$prediction)) - cvWeightMethods <- cvWeightMethods[ - setdiff(names(cvWeightMethods), paste0(fmBase, "_weights"))] - yMat <- if (is.matrix(y)) y - else matrix(y, ncol = 1L, dimnames = list(names(y), NULL)) - sampleNames <- rownames(X) - outcomeNames <- colnames(yMat) - alignFmPred <- function(mat) { - out <- matrix(NA_real_, length(sampleNames), - max(1L, length(outcomeNames)), - dimnames = list(sampleNames, outcomeNames)) - rs <- intersect(rownames(mat), sampleNames) - cs <- if (!is.null(colnames(mat)) && !is.null(outcomeNames)) - intersect(colnames(mat), outcomeNames) else character(0) - if (length(cs) > 0L) { - out[rs, cs] <- mat[rs, cs, drop = FALSE] - } else if (ncol(mat) == ncol(out)) { - out[rs, ] <- mat[rs, , drop = FALSE] - } - out - } - fmCvPrediction <- setNames(lapply(fineMappingCv$prediction, alignFmPred), - names(fineMappingCv$prediction)) - fmCvPerformance <- fineMappingCv$performance - } - - variantsForCv <- c() - if (maxCvVariants <= 0) { - maxCvVariants <- Inf - } - if (ncol(X) > maxCvVariants) { - variantsForCv <- sample(colnames(X), maxCvVariants, replace = FALSE) - } - - if (length(cvWeightMethods) > 0L) { - if (verbose >= 1) { - message("Performing cross-validation to assess TWAS weights ...") - tic() - } - res$twasCvResult <- twasWeightsCv( - X, - y, - fold = cvFolds, - samplePartitions = samplePartition, - weightMethods = cvWeightMethods, - maxNumVariants = maxCvVariants, - numThreads = cvThreads, - verbose = verbose, - variantsToKeep = if (length(variantsForCv) > 0) variantsForCv else NULL - ) - if (verbose >= 1) { - elapsed <- toc(quiet = TRUE) - message(sprintf("Cross-validation done in %.1fs", elapsed$toc - elapsed$tic)) - } - } else { - # Every CV method came from fine-mapping; no refit needed here. - res$twasCvResult <- list(samplePartition = samplePartition, - prediction = list(), performance = list()) - } - - # Merge fine-mapping's cross-validated predictions/metrics into the CV - # result so downstream splicing + ensemble treat them as first-class. - if (!is.null(fmCvPrediction)) { - res$twasCvResult$prediction <- c(res$twasCvResult$prediction, - fmCvPrediction) - res$twasCvResult$performance <- c(res$twasCvResult$performance, - fmCvPerformance) - if (is.null(res$twasCvResult$samplePartition)) { - res$twasCvResult$samplePartition <- samplePartition - } - } - - # Number of methods participating in cross-validation / ensemble (refit - # here plus those handed over by fine-mapping). - nCvMethods <- length(res$twasCvResult$prediction) - - # Splice per-(method, outcome) CV predictions + metrics into the - # corresponding TwasWeightsEntry$cvPerformance slot. - res$twasWeights <- .spliceCvIntoTwasWeights(res$twasWeights, - res$twasCvResult, - ldSketch = ldSketch) - - # Ensemble learning: learn optimal method combination via stacked regression - if (isTRUE(ensemble) && nCvMethods <= 1) { - if (verbose >= 1) message("Ensemble model skipped: only ", nCvMethods, - " weight method provided (need >= 2 for ensemble learning).") - } - if (isTRUE(ensemble) && nCvMethods > 1) { - if (!is.null(res$twasCvResult$performance)) { - # Extract R-squared for each method from CV performance table - methodRsq <- vapply(res$twasCvResult$performance, function(perf) { - perf[1, "rsq"] - }, numeric(1)) - names(methodRsq) <- sub("(_performance|Performance)$", "", names(methodRsq)) - - # NA R-squared already implies the method is unusable for the ensemble: a - # method whose CV predictions are degenerate (zero variance across all - # held-out folds) yields cor(predictions, y) = NA and therefore rsq = NA. - # So !is.na(methodRsq) is sufficient to drop both NA-rsq and degenerate - # methods - no separate variance check needed. - passing <- !is.na(methodRsq) & methodRsq >= ensembleR2Threshold - nPassing <- sum(passing) - - if (nPassing < 2) { - # Ensemble (stacked regression) requires at least 2 base learners. - # Build a per-method status line so the user can see which methods - # dropped out and why (NA R-squared from degenerate CV predictions, - # or simply R-squared below the cutoff). - reason <- ifelse(passing, "(passed)", - ifelse(is.na(methodRsq), - "(dropped: NA R-squared - likely degenerate CV predictions)", - "(dropped: R-squared below cutoff)")) - passedInfo <- paste0(" ", names(methodRsq), ": R-squared = ", - round(methodRsq, 4), " ", reason) - surviving <- if (nPassing == 1) { - paste0(" Use the surviving method's weights directly: ", - names(methodRsq)[passing], ".") - } else "" - if (verbose >= 1) message("Ensemble TWAS skipped: ", nPassing, " of ", length(methodRsq), - " methods passed the R-squared cutoff of ", ensembleR2Threshold, - " (need >= 2).", surviving, "\n", - "Method R-squared values:\n", - paste(passedInfo, collapse = "\n")) - } else { - passingBase <- names(methodRsq)[passing] - - # Subset cvResults predictions to passing methods, matching on the - # base name regardless of whether the prediction key uses snake - # ("lasso_predicted") or camel ("lassoPredicted") form. - filteredCv <- res$twasCvResult - predBaseNames <- sub("(_predicted|Predicted)$", "", names(filteredCv$prediction)) - filteredCv$prediction <- filteredCv$prediction[match(passingBase, predBaseNames)] - - # Subset twas_weights to passing methods. - # Method names on a TwasWeights collection are stored as bare - # tokens (e.g. "lasso") in the `method` column; the ensemble - # helper wants snake_case "_weights" keys. - tw <- res$twasWeights - twMethodNames <- as.character(tw$method) - filteredWeights <- setNames( - lapply(passingBase, function(bn) { - idx <- which(twMethodNames == bn) - if (length(idx) == 0L) return(NULL) - w <- getWeights(tw$entry[[idx[[1L]]]]) - if (!is.matrix(w)) w <- matrix(w, ncol = 1) - w - }), - paste0(passingBase, "_weights")) - filteredWeights <- Filter(Negate(is.null), filteredWeights) - - if (verbose >= 1) { - message("Computing ensemble TWAS weights via stacked regression ", - "using ", nPassing, " methods: ", - paste(passingBase, collapse = ", "), " ...") - tic() - } - ensResult <- ensembleWeights( - cvResults = filteredCv, - Y = y, - twasWeightList = filteredWeights, - solver = ensembleSolver, - alpha = ensembleAlpha - ) - if (verbose >= 1) { - elapsed <- toc(quiet = TRUE) - message(sprintf("Ensemble learning done in %.1fs", elapsed$toc - elapsed$tic)) - } - - # Add ensemble weights alongside individual method weights as a - # new row in the TwasWeights collection. - if (!is.null(ensResult$ensembleTwasWeights)) { - ensWt <- ensResult$ensembleTwasWeights - if (!is.matrix(ensWt)) ensWt <- matrix(ensWt, ncol = 1) - tw <- res$twasWeights - # Use the first existing row's (study, context, trait) as the - # identity tuple for the ensemble row. - existingStudy <- as.character(tw$study)[1L] - existingContext <- as.character(tw$context)[1L] - existingTrait <- as.character(tw$trait)[1L] - existingStd <- getStandardized(tw$entry[[1L]]) - ensWtVec <- if (ncol(ensWt) == 1L) drop(ensWt) else ensWt - ensVarIds <- if (!is.null(rownames(ensWt))) rownames(ensWt) - else colnames(X) - ensEntry <- TwasWeightsEntry( - variantIds = ensVarIds, - weights = ensWtVec, - cvPerformance = list( - methodCoef = ensResult$methodCoef, - methodPerformance = ensResult$methodPerformance), - standardized = existingStd) - ensRow <- TwasWeights( - study = existingStudy, - context = existingContext, - trait = existingTrait, - method = "ensemble", - entry = list(ensEntry), - ldSketch = ldSketch) - res$twasWeights <- .rbindTwasWeights(tw, ensRow, ldSketch = ldSketch) - res$twasPredictions$ensemble_predicted <- X %*% ensWt - } - res$ensemble <- ensResult - } - } - } - } - res$totalTimeElapsed <- proc.time() - st - - return(res) -} # Solve ensemble stacking via quadprog (constrained QP with sum-to-1 and non-negativity). # @param Pvalid Matrix of CV predictions for valid methods (n x Kvalid). @@ -2018,8 +1471,10 @@ setMethod("twasWeightsPipeline", "ANY", # @noRd .solveEnsembleQuadprog <- function(Pvalid, yObs, Kvalid) { if (!requireNamespace("quadprog", quietly = TRUE)) { + # nocov start stop("Package 'quadprog' is required for solver='quadprog'. ", "Install with: install.packages('quadprog')") + # nocov end } Dmat <- crossprod(Pvalid) @@ -2064,8 +1519,10 @@ setMethod("twasWeightsPipeline", "ANY", # @noRd .solveEnsembleNnls <- function(Pvalid, yObs, Kvalid) { if (!requireNamespace("nnls", quietly = TRUE)) { + # nocov start stop("Package 'nnls' is required for solver='nnls'. ", "Install with: install.packages('nnls')") + # nocov end } fit <- tryCatch( @@ -2142,8 +1599,10 @@ setMethod("twasWeightsPipeline", "ANY", # @noRd .solveEnsembleGlmnet <- function(Pvalid, yObs, Kvalid, alpha = 1) { if (!requireNamespace("glmnet", quietly = TRUE)) { + # nocov start stop("Package 'glmnet' is required for solver='glmnet'. ", "Install with: install.packages('glmnet')") + # nocov end } fit <- tryCatch( diff --git a/R/vcfWriter.R b/R/vcfWriter.R index 31f55c18..f2d43d2a 100644 --- a/R/vcfWriter.R +++ b/R/vcfWriter.R @@ -11,8 +11,10 @@ NULL #' @export setMethod("writeSumstatsVcf", signature("GwasSumStats"), function(x, outputPath, sampleName = NULL, study = NULL, ...) { + # nocov start if (!requireNamespace("VariantAnnotation", quietly = TRUE)) stop("Package 'VariantAnnotation' is required for writeSumstatsVcf") + # nocov end # Select which study to write (the new GwasSumStats can hold many). if (is.null(study)) { @@ -63,8 +65,10 @@ setMethod("writeSumstatsVcf", signature("FineMappingResultBase"), study = NULL, context = NULL, trait = NULL, method = NULL, splitByContext = FALSE, splitByTrait = FALSE, ...) { + # nocov start if (!requireNamespace("VariantAnnotation", quietly = TRUE)) stop("Package 'VariantAnnotation' is required for writeSumstatsVcf") + # nocov end # Resolve the set of rows to write. With both selectors NULL and no # split flags, the collection must have exactly one row. Splitting @@ -171,8 +175,8 @@ setMethod("writeSumstatsVcf", signature("FineMappingResultBase"), } if (any(!is.na(marginal$N))) addGeno("SS", as.integer(marginal$N), "Integer", "Sample size") - if (any(!is.na(marginal$MAF))) - addGeno("AF", marginal$MAF, "Float", "Minor allele frequency") + if (any(!is.na(marginal$af))) + addGeno("AF", marginal$af, "Float", "Allele frequency (effect allele)") genoHeader <- DataFrame( Number = hdrNum, Type = hdrType, Description = hdrDesc, diff --git a/man/MashPrior-class.Rd b/man/MashPrior-class.Rd new file mode 100644 index 00000000..5d0c3bd9 --- /dev/null +++ b/man/MashPrior-class.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MashPrior.R +\docType{class} +\name{MashPrior-class} +\alias{MashPrior-class} +\title{Data-Driven (mash) Prior Bundle} +\description{ +Input container packaging a full-data data-driven prior with + optional per-fold (cross-validated) priors and the fold partition they were + computed on. Produced (eventually) by \code{mashPipeline()} and consumed by + \code{twasWeightsPipeline()} (mr.mash); the per-fold fits then flow to + \code{fineMappingPipeline()} (mvSuSiE) via the resulting + \code{\link{TwasWeights}}. +} +\section{Slots}{ + +\describe{ +\item{\code{fullFit}}{The full-data data-driven prior payload — the +\code{mashPipeline()} output \code{list(U, w)}; fed to mr.mash as +\code{dataDrivenPriorMatrices} for the full-data fit. \code{NULL} when only +per-fold priors are supplied (a CV-only run).} + +\item{\code{cvFits}}{\code{NULL}, or a list with \code{samplePartition} +(\code{data.frame(Sample, Fold)}) and \code{perFoldFits} (a list of per-fold +prior payloads, \code{perFoldFits[[j]]} for fold \code{j}).} +}} + diff --git a/man/MashPrior.Rd b/man/MashPrior.Rd new file mode 100644 index 00000000..33a2c988 --- /dev/null +++ b/man/MashPrior.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MashPrior.R +\name{MashPrior} +\alias{MashPrior} +\title{Create a MashPrior Object} +\usage{ +MashPrior(fullFit = NULL, cvFits = NULL) +} +\arguments{ +\item{fullFit}{Full-data data-driven prior payload (the +\code{mashPipeline()} \code{list(U, w)} output), or \code{NULL} for a +CV-only bundle.} + +\item{cvFits}{\code{NULL}, or a list with \code{perFoldFits} (a non-empty +list of per-fold prior payloads) and optionally \code{samplePartition} +(\code{data.frame(Sample, Fold)}).} +} +\value{ +A \code{MashPrior} object. +} +\description{ +Construct a \code{\link{MashPrior}} bundling a full-data + data-driven prior with optional per-fold (cross-validated) priors. +} diff --git a/man/SldscData-class.Rd b/man/SldscData-class.Rd new file mode 100644 index 00000000..a6984da6 --- /dev/null +++ b/man/SldscData-class.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SldscData.R +\name{SldscData-class} +\alias{SldscData-class} +\title{S-LDSC input data container} +\description{ +An in-memory bundle of the loaded S-LDSC inputs, produced from + the reader functions (\code{\link{readSldscAnnot}}, \code{\link{readSldscFrq}}, + \code{\link{readSldscTrait}}) and consumed by + \code{\link{sldscPostprocessingPipeline}}. The class itself performs no + file I/O: the user runs the readers, then constructs an \code{SldscData} + from those in-memory objects, and the pipeline does all computation on it. +} +\section{Slots}{ + +\describe{ +\item{\code{annot}}{A \code{data.frame} of target annotations with at least +\code{CHR} and \code{SNP} columns plus one or more annotation columns +(\code{BP}/\code{CM} optional).} + +\item{\code{frq}}{A \code{data.frame} of reference-panel allele frequencies with +\code{SNP} and \code{MAF} columns (a 0-row frame when no \code{.frq} data +was supplied).} + +\item{\code{traits}}{A named list, one entry per trait, each a list with a +\code{single} element (list of per-target \code{\link{readSldscTrait}} +runs) and an optional \code{joint} element (a single run, or \code{NULL}).} +}} + diff --git a/man/SldscData.Rd b/man/SldscData.Rd new file mode 100644 index 00000000..802eecfe --- /dev/null +++ b/man/SldscData.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SldscData.R +\name{SldscData} +\alias{SldscData} +\alias{show,SldscData-method} +\title{Construct an SldscData object} +\usage{ +SldscData(annot, frq = NULL, traits = list()) + +\S4method{show}{SldscData}(object) +} +\arguments{ +\item{annot}{A target-annotation \code{data.frame} (e.g. from +\code{\link{readSldscAnnot}}): \code{CHR}, \code{SNP}, and one or more +annotation columns.} + +\item{frq}{Optional reference-panel allele-frequency \code{data.frame} (e.g. +from \code{\link{readSldscFrq}}): \code{SNP}, \code{MAF}. \code{NULL} (the +default) stores an empty frame, which disables MAF-based filtering.} + +\item{traits}{A named list of per-trait runs; each entry a list with a +\code{single} list (per-target \code{\link{readSldscTrait}} outputs) and an +optional \code{joint} run.} +} +\value{ +An \code{SldscData} object. +} +\description{ +Bundles the in-memory outputs of the S-LDSC readers into a single object for +\code{\link{sldscPostprocessingPipeline}}. Performs no file I/O. +} +\seealso{ +\code{\link{readSldscAnnot}}, \code{\link{readSldscFrq}}, + \code{\link{readSldscTrait}}, \code{\link{sldscPostprocessingPipeline}} +} diff --git a/man/TwasWeightsEntry.Rd b/man/TwasWeightsEntry.Rd index ef244ee7..6a1abbf4 100644 --- a/man/TwasWeightsEntry.Rd +++ b/man/TwasWeightsEntry.Rd @@ -8,7 +8,7 @@ TwasWeightsEntry( variantIds, weights, fits = NULL, - cvPerformance = NULL, + cvResult = NULL, standardized = FALSE, dataType = NULL ) @@ -18,9 +18,14 @@ TwasWeightsEntry( \item{weights}{Numeric vector or matrix.} -\item{fits}{Optional method-specific fit object.} +\item{fits}{Optional method-specific fit object (the full-data fit; e.g. the +mr.mash fit's \code{{dataDrivenPriorMatrices, w0, V}}).} -\item{cvPerformance}{Optional list of CV metrics.} +\item{cvResult}{Optional cross-validation payload: a list mirroring +\code{FineMappingEntry@cvResult} with \code{samplePartition}, +\code{predictions}, \code{performance}, and (mr.mash only) \code{foldFits} +— the per-fold fits that \code{fineMappingPipeline}'s mvSuSiE path consumes +as per-fold priors.} \item{standardized}{Logical (length 1).} diff --git a/man/buildTopLoci.Rd b/man/buildTopLoci.Rd index 91fcfdb4..a04e3f8d 100644 --- a/man/buildTopLoci.Rd +++ b/man/buildTopLoci.Rd @@ -15,7 +15,8 @@ buildTopLoci( dataX = NULL, dataY = NULL, otherQuantities = NULL, - region = NULL + region = NULL, + conditionIdx = NULL ) } \arguments{ diff --git a/man/causalInferencePipeline.Rd b/man/causalInferencePipeline.Rd index b48bef0b..5438531a 100644 --- a/man/causalInferencePipeline.Rd +++ b/man/causalInferencePipeline.Rd @@ -36,27 +36,27 @@ coefficients on each entry's \code{topLoci}.} \item{rsqCutoff}{Numeric (length 1). When \code{> 0}, performs CV weight selection (ports the legacy \code{twas_pipeline} \code{pick_best_model} + \code{update_twas_method}): per \code{(study, context, trait, gwasStudy)} -keep only the method whose \code{cvPerformance} \code{rsqOption} metric is +keep only the method whose \code{cvResult} \code{rsqOption} metric is highest among methods that clear both \code{rsqCutoff} and the \code{rsqPvalCutoff} gate AND that produced a finite TWAS Z (the NA/Inf re-selection); groups where no method clears the cutoffs are dropped. A -group whose methods carry no usable \code{cvPerformance} (the SS-TWAS -path) keeps all methods. Needs the \code{twasWeights} \code{cvPerformance}, +group whose methods carry no usable \code{cvResult} (the SS-TWAS +path) keeps all methods. Needs the \code{twasWeights} \code{cvResult}, so selection is a no-op on the fineMappingResult-only path. Default \code{0} (no selection; score every method).} \item{rsqPvalCutoff}{Numeric (length 1). CV-p-value gate for weight selection (ports legacy \code{rsq_pval_cutoff}): a method is eligible only -when its \code{cvPerformance} \code{rsqPvalOption} metric is +when its \code{cvResult} \code{rsqPvalOption} metric is \code{< rsqPvalCutoff}. Default \code{Inf} (no p-value gate). A finite value activates selection even when \code{rsqCutoff = 0}.} -\item{rsqOption}{Character. Which \code{cvPerformance} metric is the +\item{rsqOption}{Character. Which \code{cvResult} metric is the "r-squared" used for the cutoff and ranking (ports legacy \code{rsq_option}); typically \code{"rsq"} or \code{"adj_rsq"}. Default \code{"rsq"}.} -\item{rsqPvalOption}{Character vector of candidate \code{cvPerformance} +\item{rsqPvalOption}{Character vector of candidate \code{cvResult} metric names for the p-value gate (ports legacy \code{rsq_pval_option}); the first one present in a tuple's metrics is used. Default \code{c("adj_rsq_pval", "pval")}.} diff --git a/man/computeSldscAnnotSd.Rd b/man/computeSldscAnnotSd.Rd index 6241187f..c9541c9c 100644 --- a/man/computeSldscAnnotSd.Rd +++ b/man/computeSldscAnnotSd.Rd @@ -4,26 +4,13 @@ \alias{computeSldscAnnotSd} \title{Compute per-annotation standard deviation, MAF-restricted} \usage{ -computeSldscAnnotSd( - targetAnnoDir, - frqfileDir = NULL, - plinkName = "ADSP_chr", - mafCutoff = 0.05, - annotCols = NULL -) +computeSldscAnnotSd(sldscData, mafCutoff = 0.05, annotCols = NULL) } \arguments{ -\item{targetAnnoDir}{Character. Directory containing target annotation files -(one per chromosome) in polyfun's `.annot.gz` format.} +\item{sldscData}{An \code{\link{SldscData}} object (its \code{annot} and +\code{frq} slots supply the annotation values and MAF, respectively).} -\item{frqfileDir}{Character or NULL. Directory containing PLINK `.frq` files -for the reference panel. Required when `mafCutoff > 0`; the function -errors if missing.} - -\item{plinkName}{Character. Filename prefix of the `.frq` files -(e.g. `"ADSP_chr"`). Files are expected at `{plinkName}{chr}.frq`.} - -\item{mafCutoff}{Numeric, default `0.05`.} +\item{mafCutoff}{Numeric, default `0.05`. Requires frq data when > 0.} \item{annotCols}{Character or integer vector, default NULL. Annotation columns to compute sd for. If NULL, all annotation columns are used.} diff --git a/man/computeSldscMRef.Rd b/man/computeSldscMRef.Rd index 496e1c10..4e7f68db 100644 --- a/man/computeSldscMRef.Rd +++ b/man/computeSldscMRef.Rd @@ -4,21 +4,11 @@ \alias{computeSldscMRef} \title{Reference-panel SNP count (the M_ref used to standardise tau*)} \usage{ -computeSldscMRef( - targetAnnoDir = NULL, - frqfileDir = NULL, - plinkName = "ADSP_chr", - mafCutoff = 0.05 -) +computeSldscMRef(sldscData, mafCutoff = 0.05) } \arguments{ -\item{targetAnnoDir}{Character or NULL. Fallback only - directory of -`.l2.ldscore` files. Used only when `frqfileDir` is unavailable.} - -\item{frqfileDir}{Character or NULL. Directory of PLINK `.frq` files; the -preferred (recommended) source of M_ref.} - -\item{plinkName}{Character. Filename prefix of `.frq` files.} +\item{sldscData}{An \code{\link{SldscData}} object (its \code{frq} slot is +the reference-panel SNP set).} \item{mafCutoff}{Numeric, default `0.05`.} } @@ -37,7 +27,6 @@ Scalar integer. \item `mafCutoff == 0` (all-M variant): count ALL SNPs across all `.frq` files (the same set polyfun's `.l2.M` sums). } - `targetAnnoDir` is a fallback only, used when no `.frq` directory is - given; that fallback counts `.l2.ldscore` rows and is WRONG when the target - was HM3-subsetted (it then yields the regression SNP count, not M_ref). + When no frq data is present, `mafCutoff == 0` falls back to the number of + annotation rows; `mafCutoff > 0` errors (a MAF-restricted count needs frq). } diff --git a/man/fineMappingPipeline.Rd b/man/fineMappingPipeline.Rd index dc1f1a77..70a0906b 100644 --- a/man/fineMappingPipeline.Rd +++ b/man/fineMappingPipeline.Rd @@ -21,6 +21,8 @@ fineMappingPipeline(data, ...) jointRegions = FALSE, jointSpecification = NULL, addSusieInf = TRUE, + L = 20L, + Lgreedy = 5L, coverage = 0.95, secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, @@ -101,6 +103,8 @@ fineMappingPipeline(data, ...) data, methods, addSusieInf = TRUE, + L = 20L, + Lgreedy = 5L, coverage = 0.95, secondaryCoverage = c(0.7, 0.5), signalCutoff = 0.025, diff --git a/man/fsusieWeights.Rd b/man/fsusieWeights.Rd index fad49604..da5efe11 100644 --- a/man/fsusieWeights.Rd +++ b/man/fsusieWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{fsusieWeights} \alias{fsusieWeights} \title{Compute fSuSiE feature-level TWAS weights} diff --git a/man/getAnnotCols.Rd b/man/getAnnotCols.Rd new file mode 100644 index 00000000..9b146ab2 --- /dev/null +++ b/man/getAnnotCols.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getAnnotCols} +\alias{getAnnotCols} +\alias{getAnnotCols,SldscData-method} +\title{Get the annotation column names from an SldscData} +\usage{ +getAnnotCols(x) + +\S4method{getAnnotCols}{SldscData}(x) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} +} +\value{ +A character vector of annotation column names. +} +\description{ +Get the annotation column names from an SldscData +} diff --git a/man/getAnnotData.Rd b/man/getAnnotData.Rd new file mode 100644 index 00000000..def2ec55 --- /dev/null +++ b/man/getAnnotData.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getAnnotData} +\alias{getAnnotData} +\alias{getAnnotData,SldscData-method} +\title{Get the annotation table from an SldscData} +\usage{ +getAnnotData(x) + +\S4method{getAnnotData}{SldscData}(x) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} +} +\value{ +A \code{data.frame} of annotations (CHR, SNP, annotation columns). +} +\description{ +Get the annotation table from an SldscData +} diff --git a/man/getCvFits.Rd b/man/getCvFits.Rd new file mode 100644 index 00000000..98d440be --- /dev/null +++ b/man/getCvFits.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/MashPrior.R +\name{getCvFits} +\alias{getCvFits} +\alias{getCvFits,MashPrior-method} +\title{Get the Per-Fold Priors from a MashPrior} +\usage{ +getCvFits(x, ...) + +\S4method{getCvFits}{MashPrior}(x, ...) +} +\arguments{ +\item{x}{A \code{MashPrior} object.} + +\item{...}{Unused.} +} +\value{ +The \code{cvFits} list, or \code{NULL}. +} +\description{ +Accessor for the \code{cvFits} slot (per-fold priors + + \code{samplePartition}). +} diff --git a/man/getCvPerformance.Rd b/man/getCvPerformance.Rd deleted file mode 100644 index 551aaf9b..00000000 --- a/man/getCvPerformance.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/AllGenerics.R, R/TwasWeightsEntry.R, -% R/twasWeights.R -\name{getCvPerformance} -\alias{getCvPerformance} -\alias{getCvPerformance,TwasWeightsEntry-method} -\alias{getCvPerformance,TwasWeights-method} -\title{Get CV Performance} -\usage{ -getCvPerformance(x, ...) - -\S4method{getCvPerformance}{TwasWeightsEntry}(x, ...) - -\S4method{getCvPerformance}{TwasWeights}( - x, - study = NULL, - context = NULL, - trait = NULL, - method = NULL, - ... -) -} -\arguments{ -\item{x}{A \code{TwasWeightsEntry} or \code{TwasWeights}.} - -\item{...}{Class-specific selection arguments.} -} -\value{ -Method-specific (typically a list). -} -\description{ -Extract cross-validation performance metrics. -} diff --git a/man/getCvResult.Rd b/man/getCvResult.Rd index 1cf22c96..77df7930 100644 --- a/man/getCvResult.Rd +++ b/man/getCvResult.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/FineMappingEntry.R, -% R/QtlFineMappingResult.R +% R/QtlFineMappingResult.R, R/TwasWeightsEntry.R, R/twasWeights.R \name{getCvResult} \alias{getCvResult} \alias{getCvResult,FineMappingEntry-method} \alias{getCvResult,QtlFineMappingResult-method} +\alias{getCvResult,TwasWeightsEntry-method} +\alias{getCvResult,TwasWeights-method} \title{Get Cross-Validation Result} \usage{ getCvResult(x, ...) @@ -12,6 +14,10 @@ getCvResult(x, ...) \S4method{getCvResult}{FineMappingEntry}(x, ...) \S4method{getCvResult}{QtlFineMappingResult}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) + +\S4method{getCvResult}{TwasWeightsEntry}(x, ...) + +\S4method{getCvResult}{TwasWeights}(x, study = NULL, context = NULL, trait = NULL, method = NULL, ...) } \arguments{ \item{x}{A \code{FineMappingEntry} or \code{FineMappingResult}.} diff --git a/man/getFrqData.Rd b/man/getFrqData.Rd new file mode 100644 index 00000000..4a0a9c2c --- /dev/null +++ b/man/getFrqData.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getFrqData} +\alias{getFrqData} +\alias{getFrqData,SldscData-method} +\title{Get the allele-frequency table from an SldscData} +\usage{ +getFrqData(x) + +\S4method{getFrqData}{SldscData}(x) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} +} +\value{ +A \code{data.frame} of reference-panel frequencies (SNP, MAF). +} +\description{ +Get the allele-frequency table from an SldscData +} diff --git a/man/getFullFit.Rd b/man/getFullFit.Rd new file mode 100644 index 00000000..0435db40 --- /dev/null +++ b/man/getFullFit.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/MashPrior.R +\name{getFullFit} +\alias{getFullFit} +\alias{getFullFit,MashPrior-method} +\title{Get the Full-Data Prior from a MashPrior} +\usage{ +getFullFit(x, ...) + +\S4method{getFullFit}{MashPrior}(x, ...) +} +\arguments{ +\item{x}{A \code{MashPrior} object.} + +\item{...}{Unused.} +} +\value{ +The full-data prior payload, or \code{NULL}. +} +\description{ +Accessor for the \code{fullFit} slot (the full-data data-driven + prior payload). +} diff --git a/man/getTraitNames.Rd b/man/getTraitNames.Rd new file mode 100644 index 00000000..fe6be06f --- /dev/null +++ b/man/getTraitNames.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getTraitNames} +\alias{getTraitNames} +\alias{getTraitNames,SldscData-method} +\title{Get the trait names from an SldscData} +\usage{ +getTraitNames(x) + +\S4method{getTraitNames}{SldscData}(x) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} +} +\value{ +A character vector of trait names. +} +\description{ +Get the trait names from an SldscData +} diff --git a/man/getTraitRun.Rd b/man/getTraitRun.Rd new file mode 100644 index 00000000..5d18484b --- /dev/null +++ b/man/getTraitRun.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getTraitRun} +\alias{getTraitRun} +\alias{getTraitRun,SldscData-method} +\title{Get one trait's run from an SldscData} +\usage{ +getTraitRun(x, trait, ...) + +\S4method{getTraitRun}{SldscData}(x, trait, mode = c("single", "joint"), idx = NULL) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} + +\item{trait}{Character. Trait name.} + +\item{...}{Further arguments: \code{mode} (\code{"single"}/\code{"joint"}) +and \code{idx} (which single run).} +} +\value{ +A single run list, the list of single runs, or \code{NULL}. +} +\description{ +Get one trait's run from an SldscData +} diff --git a/man/getTraitRuns.Rd b/man/getTraitRuns.Rd new file mode 100644 index 00000000..74c8fad1 --- /dev/null +++ b/man/getTraitRuns.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/SldscData.R +\name{getTraitRuns} +\alias{getTraitRuns} +\alias{getTraitRuns,SldscData-method} +\title{Get the per-trait runs list from an SldscData} +\usage{ +getTraitRuns(x) + +\S4method{getTraitRuns}{SldscData}(x) +} +\arguments{ +\item{x}{An \code{\link{SldscData}} object.} +} +\value{ +The named list of per-trait \code{single}/\code{joint} runs. +} +\description{ +Get the per-trait runs list from an SldscData +} diff --git a/man/isBinarySldscAnnot.Rd b/man/isBinarySldscAnnot.Rd index 572b58a0..f5fb9888 100644 --- a/man/isBinarySldscAnnot.Rd +++ b/man/isBinarySldscAnnot.Rd @@ -4,11 +4,10 @@ \alias{isBinarySldscAnnot} \title{Detect whether each annotation is binary or continuous} \usage{ -isBinarySldscAnnot(targetAnnoDir, annotCols = NULL) +isBinarySldscAnnot(sldscData, annotCols = NULL) } \arguments{ -\item{targetAnnoDir}{Character. Directory containing the target `.annot.gz` -files (one per chromosome).} +\item{sldscData}{An \code{\link{SldscData}} object.} \item{annotCols}{Character or integer vector, default NULL.} } diff --git a/man/mergeSusieCs.Rd b/man/mergeSusieCs.Rd new file mode 100644 index 00000000..103da20c --- /dev/null +++ b/man/mergeSusieCs.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fineMappingWrappers.R +\name{mergeSusieCs} +\alias{mergeSusieCs} +\title{Merge SuSiE credible sets across conditions} +\usage{ +mergeSusieCs(fineMappingResult, coverage = 0.95) +} +\arguments{ +\item{fineMappingResult}{A \code{\link{QtlFineMappingResult}} (or any +\code{FineMappingResult}) produced by per-condition SuSiE fine-mapping. Each +entry's \code{topLoci} must carry a credible-set column +(\code{cs_}, e.g. \code{cs_95}, with values such as +\code{"susie_1"} where the trailing integer is the set index and \code{_0} +means "not in a credible set") and a PIP column.} + +\item{coverage}{Credible-set coverage level selecting the \code{cs_*} column +(default \code{0.95} -> \code{cs_95}).} +} +\value{ +A \code{data.frame} with one row per variant: \code{variant_id}, + \code{credibleSetNames} (the merged credible-set label), \code{maxPip} and + \code{medianPip}; or \code{NULL} when no credible sets are present. +} +\description{ +Reconciles per-condition (univariate) SuSiE fine-mapping into a single set of +merged credible sets. Each row of the supplied +\code{\link{QtlFineMappingResult}} is treated as one condition (its +\code{topLoci} carrying that condition's credible sets); credible sets that +share variants across conditions are unioned via connected components, and +every variant is reported with its merged credible-set label plus the maximum +and median PIP across the conditions it appears in. A typical use is selecting +a representative lead variant per merged credible set to assemble the +\code{"strong"} input for \code{\link{mashPipeline}}. +} +\seealso{ +\code{\link{fineMappingPipeline}}, \code{\link{mashPipeline}} +} diff --git a/man/mvsusieRssWeights.Rd b/man/mvsusieRssWeights.Rd index 72abc414..a0a7295a 100644 --- a/man/mvsusieRssWeights.Rd +++ b/man/mvsusieRssWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{mvsusieRssWeights} \alias{mvsusieRssWeights} \title{Compute mvSuSiE-RSS TWAS weights from summary statistics} diff --git a/man/mvsusieWeights.Rd b/man/mvsusieWeights.Rd index 2d4acd14..162853ae 100644 --- a/man/mvsusieWeights.Rd +++ b/man/mvsusieWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{mvsusieWeights} \alias{mvsusieWeights} \title{Compute mvSuSiE TWAS weights} diff --git a/man/postprocessFinemappingFits.Rd b/man/postprocessFinemappingFits.Rd index 78ae6c85..8aa69cd5 100644 --- a/man/postprocessFinemappingFits.Rd +++ b/man/postprocessFinemappingFits.Rd @@ -20,6 +20,7 @@ postprocessFinemappingFits( minAbsCorr = 0.8, medianAbsCorr = NULL, csInput = NULL, + conditionIdx = NULL, trim = TRUE ) } diff --git a/man/readSldscAnnot.Rd b/man/readSldscAnnot.Rd new file mode 100644 index 00000000..a29cd6ea --- /dev/null +++ b/man/readSldscAnnot.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldscWrapper.R +\name{readSldscAnnot} +\alias{readSldscAnnot} +\title{Read target annotation files (.annot.gz) into one table} +\usage{ +readSldscAnnot(targetAnnoDir, annotCols = NULL) +} +\arguments{ +\item{targetAnnoDir}{Character. Directory of `.annot.gz` files.} + +\item{annotCols}{Character or integer vector, default NULL. Annotation +columns to keep. NULL keeps all non-standard columns (auto-detected).} +} +\value{ +A \code{data.frame}: \code{CHR}, \code{SNP}, and annotation columns. +} +\description{ +Reads the per-chromosome polyfun `.annot.gz` files in a + directory and stacks them into a single \code{data.frame} of \code{CHR}, + \code{SNP}, and the annotation columns. This is the I/O step feeding the + \code{annot} slot of \code{\link{SldscData}}; the computation + (\code{\link{computeSldscAnnotSd}}, \code{\link{isBinarySldscAnnot}}) then + runs on the loaded table, not on paths. +} diff --git a/man/readSldscFrq.Rd b/man/readSldscFrq.Rd new file mode 100644 index 00000000..97e663df --- /dev/null +++ b/man/readSldscFrq.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sldscWrapper.R +\name{readSldscFrq} +\alias{readSldscFrq} +\title{Read PLINK allele-frequency files (.frq) into one table} +\usage{ +readSldscFrq(frqfileDir, plinkName = "ADSP_chr") +} +\arguments{ +\item{frqfileDir}{Character. Directory of `.frq` files.} + +\item{plinkName}{Character. Filename prefix (files at `{plinkName}{chr}.frq`). +Falls back to all `*.frq` in the directory when the prefix matches nothing.} +} +\value{ +A \code{data.frame}: \code{CHR}, \code{SNP}, \code{MAF}. +} +\description{ +Reads the per-chromosome PLINK `.frq` files for the reference + panel and stacks them into a single \code{data.frame} of \code{CHR}, + \code{SNP}, \code{MAF}. Feeds the \code{frq} slot of \code{\link{SldscData}}. +} diff --git a/man/sldscPostprocessingPipeline.Rd b/man/sldscPostprocessingPipeline.Rd index 90aa5919..cb33828b 100644 --- a/man/sldscPostprocessingPipeline.Rd +++ b/man/sldscPostprocessingPipeline.Rd @@ -5,58 +5,44 @@ \title{sLDSC Postprocessing Pipeline} \usage{ sldscPostprocessingPipeline( - traitSinglePrefixes, - traitJointPrefix, - targetAnnoDir, - frqfileDir = NULL, - plinkName = "ADSP_chr", + sldscData, mafCutoff = 0.05, targetCategories = NULL, targetLabels = NULL ) } \arguments{ -\item{traitSinglePrefixes}{Named list of file prefixes for the -single-target polyfun runs (one entry per trait; each value is a -length-N character vector of `/` prefixes, one per -target annotation).} +\item{sldscData}{An \code{\link{SldscData}} object bundling the annotation +table, the reference-panel allele frequencies, and the per-trait +single/joint polyfun runs.} -\item{traitJointPrefix}{Named list of file prefixes for the joint -polyfun runs (one entry per trait; each value a `/` -prefix into the joint LD-score dir). Pass an empty list to skip -the joint branch.} +\item{mafCutoff}{Numeric MAF cutoff applied via the object's frq table. +Default \code{0.05}. Set to \code{0} to opt out (requires frq data when +\code{> 0}).} -\item{targetAnnoDir}{Directory containing the target `.annot.gz` -files used for sd_C and binary detection (typically the joint dir).} - -\item{frqfileDir}{Optional directory of `.frq` files for the MAF -cutoff. Pass \code{NULL} to skip MAF filtering.} - -\item{plinkName}{File-name prefix of the PLINK reference panel -(default \code{"ADSP_chr"}; combined per-chromosome as -\code{paste0(plinkName, chrom)}).} - -\item{mafCutoff}{Numeric MAF cutoff applied via the `.frq` files. -Default \code{0.05}. Set to \code{0} to opt out.} - -\item{targetCategories}{Optional character vector of target -annotation names to retain. Auto-detected from the joint run when +\item{targetCategories}{Optional character vector of target annotation names +to retain. Auto-detected from the joint run (or first single run) when \code{NULL}.} \item{targetLabels}{Optional display names, same length / order as -\code{targetCategories}, applied to every output column / tau* -block colname.} +\code{targetCategories}, applied to every output column / tau* block +colname.} } \value{ -A list with \code{per_trait} (per-trait standardised tables), - meta tables (\code{tau_star_meta}, \code{E_meta}, - \code{enrich_stat_meta}), and a \code{params} record of the call - options. +A list with \code{per_trait} (per-trait standardised tables), meta + tables (\code{tauStar}, \code{enrichment}, \code{enrichstat}), and a + \code{params} record of the call options. } \description{ -Postprocess polyfun's per-trait sLDSC outputs (one - single-target run per target annotation, plus an optional joint - run) into a single results object with per-trait tau*, EnrichStat - with back-solved jackknife SE, and a DerSimonian-Laird random- - effects meta-analysis across traits. +Postprocess polyfun's per-trait sLDSC outputs (already loaded + into an \code{\link{SldscData}} object) into a single results object with + per-trait tau*, EnrichStat with back-solved jackknife SE, and a + DerSimonian-Laird random-effects meta-analysis across traits. All file I/O + is done up front by the reader functions (\code{\link{readSldscAnnot}}, + \code{\link{readSldscFrq}}, \code{\link{readSldscTrait}}); this pipeline is + pure computation over the in-memory \code{SldscData}. +} +\seealso{ +\code{\link{SldscData}}, \code{\link{readSldscAnnot}}, + \code{\link{readSldscFrq}}, \code{\link{readSldscTrait}} } diff --git a/man/standardizeSldscTrait.Rd b/man/standardizeSldscTrait.Rd index aab7cf11..61d83a86 100644 --- a/man/standardizeSldscTrait.Rd +++ b/man/standardizeSldscTrait.Rd @@ -5,24 +5,32 @@ \title{Standardize tau and compute EnrichStat for one polyfun run} \usage{ standardizeSldscTrait( - traitData, + sldscData, + trait, + mode = c("single", "joint"), + idx = NULL, sdAnnot, MRef, - targetCategories = NULL, - mode = c("single", "joint") + targetCategories = NULL ) } \arguments{ -\item{traitData}{List from \code{\link{readSldscTrait}}.} +\item{sldscData}{An \code{\link{SldscData}} object (the run is pulled from it +via \code{getTraitRun}).} + +\item{trait}{Character. Trait name (a key of the SldscData traits list).} + +\item{mode}{Character: `"single"` or `"joint"`.} + +\item{idx}{Integer or NULL. For `mode = "single"`, which of the trait's +single-target runs to standardize.} \item{sdAnnot}{Named numeric vector from \code{\link{computeSldscAnnotSd}}.} \item{MRef}{Scalar from \code{\link{computeSldscMRef}}.} -\item{targetCategories}{Character vector or NULL. If NULL, intersects -`traitData$categories` with `names(sdAnnot)`.} - -\item{mode}{Character: `"single"` or `"joint"`.} +\item{targetCategories}{Character vector or NULL. If NULL, intersects the +run's `categories` with `names(sdAnnot)`.} } \value{ A list with `summary` (data frame), `tau_star_blocks` (matrix), diff --git a/man/susieAshRssWeights.Rd b/man/susieAshRssWeights.Rd index a3a4a0f0..21d9e85a 100644 --- a/man/susieAshRssWeights.Rd +++ b/man/susieAshRssWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieAshRssWeights} \alias{susieAshRssWeights} \title{Compute SuSiE-ASH-RSS TWAS weights} diff --git a/man/susieAshWeights.Rd b/man/susieAshWeights.Rd index 9c21302a..20f78799 100644 --- a/man/susieAshWeights.Rd +++ b/man/susieAshWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieAshWeights} \alias{susieAshWeights} \title{Compute SuSiE-ASH TWAS weights} diff --git a/man/susieInfRssWeights.Rd b/man/susieInfRssWeights.Rd index c1504bf9..4ad4a4a6 100644 --- a/man/susieInfRssWeights.Rd +++ b/man/susieInfRssWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieInfRssWeights} \alias{susieInfRssWeights} \title{Compute SuSiE-inf-RSS TWAS weights} diff --git a/man/susieInfWeights.Rd b/man/susieInfWeights.Rd index a66d7859..38b02710 100644 --- a/man/susieInfWeights.Rd +++ b/man/susieInfWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieInfWeights} \alias{susieInfWeights} \title{Compute SuSiE-inf TWAS weights} diff --git a/man/susieRssWeights.Rd b/man/susieRssWeights.Rd index a792c21b..e3bdde88 100644 --- a/man/susieRssWeights.Rd +++ b/man/susieRssWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieRssWeights} \alias{susieRssWeights} \title{Compute SuSiE-RSS TWAS weights} diff --git a/man/susieWeights.Rd b/man/susieWeights.Rd index a26f76ed..dffc9ea2 100644 --- a/man/susieWeights.Rd +++ b/man/susieWeights.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/regularizedRegressionWrappers.R +% Please edit documentation in R/fineMappingWrappers.R \name{susieWeights} \alias{susieWeights} \title{Compute SuSiE TWAS weights} diff --git a/man/twasWeightsCv.Rd b/man/twasWeightsCv.Rd index 32a41cdc..c5baae04 100644 --- a/man/twasWeightsCv.Rd +++ b/man/twasWeightsCv.Rd @@ -14,6 +14,7 @@ twasWeightsCv( variantsToKeep = NULL, numThreads = 1, verbose = 1, + retainFits = FALSE, ... ) } diff --git a/man/twasWeightsPipeline.Rd b/man/twasWeightsPipeline.Rd index 4155ea34..b7895800 100644 --- a/man/twasWeightsPipeline.Rd +++ b/man/twasWeightsPipeline.Rd @@ -23,8 +23,10 @@ twasWeightsPipeline(data, ...) jointSpecification = NULL, fineMappingResult = NULL, twasWeights = NULL, + mashPrior = NULL, cvFolds = 5, samplePartition = NULL, + fitFullData = TRUE, maxCvVariants = -1, cvThreads = 1, cvWeightMethods = NULL, @@ -128,7 +130,7 @@ construct-time cutoff in place.} \code{FALSE} (default) learns weights for each range independently and concatenates them into one entry per (study, context, trait, method); the per-region fits are kept as a named list and per-region CV is -recorded as a flat \code{cvPerformance} data frame (one row per region). +recorded as a flat \code{cvResult} data frame (one row per region). \code{TRUE} concatenates the ranges' genotypes into one joint fit. Ignored for a single-range / cis request.} diff --git a/tests/testthat/helper-rrwFixtures.R b/tests/testthat/helper-rrwFixtures.R new file mode 100644 index 00000000..621c8069 --- /dev/null +++ b/tests/testthat/helper-rrwFixtures.R @@ -0,0 +1,41 @@ +# Shared fixtures for the TWAS weight-learner wrappers. Used by both +# test_regularizedRegressionWrappers.R (lasso/enet/scad/... + RSS solvers) and +# test_fineMappingWrappers.R (the SuSiE/mvSuSiE/fSuSiE weight extractors), so +# they live in a helper that testthat auto-loads for every test file. + +# Individual-level (X, y) fixture: n samples x p variants with a sparse signal. +.rrwXy <- function(n = 50, p = 6, seed = 1) { + set.seed(seed) + X <- matrix(rnorm(n * p), n, p, dimnames = list(NULL, paste0("v", seq_len(p)))) + b <- rnorm(p); b[-(1:2)] <- 0 + y <- as.numeric(X %*% b + rnorm(n)) + list(X = X, y = y, n = n, p = p) +} + +# Single-context summary statistics + LD for the *_rss_weights(stat, LD) contract. +.rrwStatLd <- function(n = 50, p = 6, seed = 1) { + d <- .rrwXy(n, p, seed) + bhat <- vapply(seq_len(p), + function(j) summary(lm(d$y ~ d$X[, j]))$coefficients[2, 1], numeric(1)) + sehat <- vapply(seq_len(p), + function(j) summary(lm(d$y ~ d$X[, j]))$coefficients[2, 2], numeric(1)) + zhat <- bhat / sehat + LD <- cor(d$X) + stat <- list(b = bhat, seb = sehat, z = zhat, cor = bhat, + n = rep(n, p), var_y = 1, variantNames = colnames(LD)) + list(stat = stat, LD = LD, p = p, n = n) +} + +# Multi-context (variants x conditions) fixture for mr.mash / mvSuSiE wrappers. +.rrwMulti <- function(n = 60, p = 6, K = 3, seed = 2) { + set.seed(seed) + X <- matrix(rnorm(n * p), n, p, dimnames = list(NULL, paste0("v", seq_len(p)))) + B <- matrix(0, p, K); B[1, ] <- rnorm(K, sd = 2); B[2, ] <- rnorm(K, sd = 2) + Y <- X %*% B + matrix(rnorm(n * K), n, K) + colnames(Y) <- paste0("ctx", seq_len(K)) + Z <- vapply(seq_len(K), function(k) + vapply(seq_len(p), function(j) + summary(lm(Y[, k] ~ X[, j]))$coefficients[2, 3], numeric(1)), numeric(p)) + colnames(Z) <- colnames(Y) + list(X = X, Y = Y, LD = cor(X), stat = list(z = Z, n = n), p = p, K = K) +} diff --git a/tests/testthat/helper-showMethods.R b/tests/testthat/helper-showMethods.R index a5c3e8b7..d7c31ff2 100644 --- a/tests/testthat/helper-showMethods.R +++ b/tests/testthat/helper-showMethods.R @@ -48,7 +48,7 @@ context("show methods") TwasWeightsEntry( variantIds = paste0("v", seq_len(p)), weights = rep(0.1, p), - cvPerformance = list(rsq = 0.5), + cvResult = list(rsq = 0.5), standardized = standardized) } diff --git a/tests/testthat/helper-sldsc.R b/tests/testthat/helper-sldsc.R new file mode 100644 index 00000000..166e35b5 --- /dev/null +++ b/tests/testthat/helper-sldsc.R @@ -0,0 +1,222 @@ +# Shared S-LDSC fixture generators for test_sldscWrapper.R (helper unit tests) +# and test_sldscPostprocessingPipeline.R (integration tests). +# +# Fixture convention: +# - 2 chromosomes (1, 2), 50 SNPs each -> 100 total +# - 2 target annotations: "annot_A" (binary), "annot_B" (continuous) +# - baseline annotations (baselineLD_0 ..) in joint run +# - 10 jackknife blocks +# - Polyfun appends "_0" to target annotation names in .results + +# Create a single .annot.gz file for one chromosome. +# Real polyfun .annot.gz files have CHR, SNP, BP, CM + annotation columns only +# (no MAF/A1/A2 -- those come from the .frq / PLINK files). +.make_annot_gz <- function(dir, chrom, nSnps = 50) { + df <- data.frame( + CHR = chrom, + SNP = paste0("rs", (chrom - 1L) * 100L + seq_len(nSnps)), + BP = seq_len(nSnps) * 1000L, + CM = seq_len(nSnps) * 0.01, + annot_A = sample(c(0L, 1L), nSnps, replace = TRUE), + annot_B = rnorm(nSnps, 2, 0.5), + stringsAsFactors = FALSE + ) + path <- file.path(dir, sprintf("target.%d.annot.gz", chrom)) + gz <- gzfile(path, "wb") + vroom::vroom_write(df, gz, delim = "\t") + close(gz) + invisible(df) +} + +# Create a PLINK .frq file for one chromosome +.make_frq <- function(dir, chrom, plinkName ="ref_chr", nSnps = 50) { + df <- data.frame( + CHR = chrom, + SNP = paste0("rs", (chrom - 1L) * 100L + seq_len(nSnps)), + A1 = "A", + A2 = "G", + MAF = runif(nSnps, 0.01, 0.49), + NCHROBS = 200L, + stringsAsFactors = FALSE + ) + path <- file.path(dir, sprintf("%s%d.frq", plinkName, chrom)) + vroom::vroom_write(df, path, delim = "\t") + invisible(df) +} + +# Create the three polyfun output files (.results, .log, .part_delete) +# for a single-target run. Real polyfun output includes baseline categories +# even in single-target mode, so we add 2 dummy baseline categories. +.make_polyfun_single <- function(dir, prefix, target_name, nBlocks = 10, + h2g = 0.3, tau = 1e-7, enrichment = 2.5, + n_baseline = 2) { + target_cat <- paste0(target_name, "_0") + baseline_cats <- paste0("baselineLD_", seq_len(n_baseline) - 1L) + all_cats <- c(target_cat, baseline_cats) + n_cats <- length(all_cats) + + taus_all <- c(tau, rep(1e-8, n_baseline)) + enrichments_all <- c(enrichment, rep(1.0, n_baseline)) + + results <- data.frame( + Category = all_cats, + Coefficient = taus_all, + Coefficient_std_error = abs(taus_all) * 0.3, + Enrichment = enrichments_all, + Enrichment_std_error = enrichments_all * 0.2, + Enrichment_p = rep(0.01, n_cats), + `Prop._h2` = c(0.15, rep(0.425, n_baseline)), + `Prop._SNPs` = c(0.06, rep(0.47, n_baseline)), + check.names = FALSE, + stringsAsFactors = FALSE + ) + vroom::vroom_write(results, paste0(prefix, ".results"), delim = "\t") + + writeLines(c( + "Analysis started at 2024-01-01", + sprintf("Total Observed scale h2: %g (0.05)", h2g), + "Analysis finished" + ), paste0(prefix, ".log")) + + blocks <- matrix(rnorm(nBlocks * n_cats, + mean = rep(taus_all, each = nBlocks), + sd = abs(rep(taus_all, each = nBlocks)) * 0.5), + nrow = nBlocks, ncol = n_cats) + colnames(blocks) <- all_cats + vroom::vroom_write(as.data.frame(blocks), paste0(prefix, ".part_delete"), delim = "\t") + invisible(NULL) +} + +# Create polyfun output files for a joint run (target + baseline annotations) +.make_polyfun_joint <- function(dir, prefix, target_names, + n_baseline = 3, nBlocks = 10, h2g = 0.3) { + target_cats <- paste0(target_names, "_0") + baseline_cats <- paste0("baselineLD_", seq_len(n_baseline) - 1L) + all_cats <- c(target_cats, baseline_cats) + n_cats <- length(all_cats) + + taus <- c(rep(1e-7, length(target_cats)), rep(1e-8, n_baseline)) + enrichments <- c(rep(2.0, length(target_cats)), rep(1.0, n_baseline)) + + results <- data.frame( + Category = all_cats, + Coefficient = taus, + Coefficient_std_error = abs(taus) * 0.3, + Enrichment = enrichments, + Enrichment_std_error = enrichments * 0.2, + Enrichment_p = rep(0.05, n_cats), + `Prop._h2` = rep(1 / n_cats, n_cats), + `Prop._SNPs` = rep(1 / n_cats, n_cats), + check.names = FALSE, + stringsAsFactors = FALSE + ) + vroom::vroom_write(results, paste0(prefix, ".results"), delim = "\t") + + writeLines(c( + "Analysis started at 2024-01-01", + sprintf("Total Observed scale h2: %g (0.05)", h2g), + "Analysis finished" + ), paste0(prefix, ".log")) + + blocks <- matrix(rnorm(nBlocks * n_cats, mean = rep(taus, each = nBlocks), + sd = abs(rep(taus, each = nBlocks)) * 0.5), + nrow = nBlocks, ncol = n_cats) + colnames(blocks) <- all_cats + vroom::vroom_write(as.data.frame(blocks), paste0(prefix, ".part_delete"), delim = "\t") + invisible(NULL) +} + +# Build a complete fixture directory for the full pipeline +.make_sldsc_fixtures <- function(envir = parent.frame()) { + base_dir <- withr::local_tempdir(.local_envir = envir) + + anno_dir <- file.path(base_dir, "annot") + frq_dir <- file.path(base_dir, "frq") + out_dir <- file.path(base_dir, "output") + dir.create(anno_dir) + dir.create(frq_dir) + dir.create(out_dir) + + plink_name <- "ref_chr" + + # Annotation + freq files for 2 chromosomes + for (chr in 1:2) { + .make_annot_gz(anno_dir, chr) + .make_frq(frq_dir, chr, plinkName =plink_name) + } + + targets <- c("annot_A", "annot_B") + + # Single-target runs: 2 targets x 2 traits + for (trait in c("traitX", "traitY")) { + for (i in seq_along(targets)) { + pref <- file.path(out_dir, sprintf("%s_single_%s", trait, targets[i])) + .make_polyfun_single(out_dir, pref, targets[i], h2g = 0.3 + (i - 1) * 0.05) + } + } + + # Joint runs: 1 per trait + for (trait in c("traitX", "traitY")) { + pref <- file.path(out_dir, sprintf("%s_joint", trait)) + .make_polyfun_joint(out_dir, pref, targets, h2g = 0.3) + } + + list( + base_dir = base_dir, + anno_dir = anno_dir, + frq_dir = frq_dir, + out_dir = out_dir, + plinkName =plink_name, + targets = targets, + trait_names = c("traitX", "traitY") + ) +} + +# ============================================================================= +# In-memory builders for SldscData / compute / pipeline tests (no file I/O) +# ============================================================================= + +# Build one readSldscTrait-shaped run list (the in-memory shape the pipeline +# and standardizeSldscTrait consume). +.sldscMkRun <- function(cats, h2g = 0.3, nBlocks = 10L, + tau = 1e-7, enrichment = 2.0, enrichmentP = 0.01) { + n <- length(cats) + list( + categories = cats, + tau = setNames(rep(tau, n), cats), + tauSe = setNames(rep(abs(tau) * 0.3, n), cats), + enrichment = setNames(rep(enrichment, n), cats), + enrichmentSe = setNames(rep(enrichment * 0.2, n), cats), + enrichmentP = setNames(rep(enrichmentP, n), cats), + propH2 = setNames(rep(0.2, n), cats), + propSnps = setNames(rep(0.1, n), cats), + h2g = h2g, + tauBlocks = matrix(rep(tau, nBlocks * n), nBlocks, n, + dimnames = list(NULL, cats)), + nBlocks = nBlocks) +} + +# Build a small, valid in-memory SldscData: 2 traits, 2 target annotations +# (annot_A binary, annot_B continuous), single + optional joint runs. +.sldscMkData <- function(withJoint = TRUE, withFrq = TRUE, + traitNames = c("traitX", "traitY")) { + annot <- data.frame( + CHR = c(1, 1, 1, 2, 2, 2), + SNP = paste0("rs", 1:6), + annot_A = c(1, 0, 1, 0, 1, 0), + annot_B = c(2.1, 1.8, 2.5, 1.9, 2.3, 2.0), + stringsAsFactors = FALSE) + frq <- if (withFrq) + data.frame(CHR = c(1, 1, 1, 2, 2, 2), SNP = paste0("rs", 1:6), + MAF = rep(0.2, 6), stringsAsFactors = FALSE) + else NULL + mkTrait <- function() { + tr <- list(single = list(.sldscMkRun(c("annot_A_0", "baselineLD_0")), + .sldscMkRun(c("annot_B_0", "baselineLD_0")))) + tr$joint <- if (withJoint) + .sldscMkRun(c("annot_A_0", "annot_B_0", "baselineLD_0")) else NULL + tr + } + traits <- setNames(lapply(traitNames, function(.) mkTrait()), traitNames) + SldscData(annot = annot, frq = frq, traits = traits) +} diff --git a/tests/testthat/test_FineMappingEntry.R b/tests/testthat/test_FineMappingEntry.R index 6243ed4f..328dd0c9 100644 --- a/tests/testthat/test_FineMappingEntry.R +++ b/tests/testthat/test_FineMappingEntry.R @@ -254,3 +254,88 @@ test_that("show.FineMappingEntry reports variant count and CS count", { }) +# === getMarginalEffects maxPval filter === + +test_that("FineMappingEntry: getMarginalEffects applies the maxPval filter", { + tl <- data.frame( + variant_id = c("v1", "v2", "v3"), + pip = c(0.9, 0.5, 0.1), + marginal_p = c(0.001, 0.5, NA_real_), + stringsAsFactors = FALSE) + entry <- FineMappingEntry(variantIds = tl$variant_id, + susieFit = list(), topLoci = tl) + out <- getMarginalEffects(entry, maxPval = 0.01) + # Drops the p = 0.5 row and the NA-p row; keeps only v1. + expect_equal(nrow(out), 1L) + expect_equal(out$variant_id, "v1") +}) + + +# === getCs empty / cs-less topLoci projections === + +test_that("FineMappingEntry: getCs returns empty posterior view when topLoci is empty", { + entry <- FineMappingEntry( + variantIds = character(0), + susieFit = list(), + topLoci = data.frame(variant_id = character(0), pip = numeric(0), + stringsAsFactors = FALSE)) + res <- getCs(entry) + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 0L) + expect_true(all(c("variant_id", "pip") %in% names(res))) +}) + + +test_that("FineMappingEntry: getCs returns empty posterior view when the cs column is absent", { + tl <- data.frame(variant_id = c("a", "b"), pip = c(0.1, 0.2), + stringsAsFactors = FALSE) + entry <- FineMappingEntry(variantIds = c("a", "b"), + susieFit = list(), topLoci = tl) + res <- getCs(entry) # no cs_95 column -> empty view + expect_s3_class(res, "data.frame") + expect_equal(nrow(res), 0L) +}) + + +# === adjustPips: missing lbf_variable + mu2-driven posterior recompute === + +test_that("FineMappingEntry: adjustPips errors when susieFit lacks lbf_variable", { + tl <- data.frame(variant_id = c("v1", "v2"), pip = c(0.6, 0.4), + stringsAsFactors = FALSE) + entry <- FineMappingEntry(variantIds = c("v1", "v2"), + susieFit = list(), topLoci = tl) + expect_error(adjustPips(entry, c("v1", "v2")), + "no `lbf_variable` matrix") +}) + + +test_that("FineMappingEntry: adjustPips subsets mu2 and recomputes posterior_sd", { + vids <- paste0("chr1:", 1:5, ":A:G") + p <- length(vids); L <- 2L + set.seed(101L) + lbf <- matrix(rnorm(L * p), nrow = L, ncol = p) + colnames(lbf) <- vids + alpha <- lbfToAlpha(lbf) + pip <- as.numeric(1 - apply(1 - alpha, 2, prod)) + mu <- matrix(rnorm(L * p), L, p) + mu2 <- mu^2 + 1 # plausible second moment (>= mean^2) + entry <- FineMappingEntry( + variantIds = vids, + susieFit = list(pip = pip, alpha = alpha, lbf_variable = lbf, + mu = mu, mu2 = mu2, + X_column_scale_factors = rep(1, p)), + topLoci = data.frame(variant_id = vids, pip = pip, + stringsAsFactors = FALSE)) + keep <- vids[2:4] + adj <- adjustPips(entry, keep) + expect_s4_class(adj, "FineMappingEntry") + # mu2 carried through the variant subsetting alongside lbf/mu. + expect_equal(ncol(adj@susieFit$mu2), 3L) + # posterior_mean / posterior_sd recomputed from the subset alpha/mu/mu2. + expect_equal(nrow(adj@topLoci), 3L) + expect_true("posterior_sd" %in% names(adj@topLoci)) + expect_equal(length(adj@topLoci$posterior_sd), 3L) + expect_true(all(adj@topLoci$posterior_sd >= 0)) +}) + + diff --git a/tests/testthat/test_GwasFineMappingResult.R b/tests/testthat/test_GwasFineMappingResult.R index 52c8f86f..557e7405 100644 --- a/tests/testthat/test_GwasFineMappingResult.R +++ b/tests/testthat/test_GwasFineMappingResult.R @@ -173,3 +173,19 @@ test_that("GwasFineMappingResult: getStudy/getMethodNames inherit from base", { expect_setequal(getMethodNames(res), c("susie", "susieRss")) }) + +test_that("GwasFineMappingResult: getMarginalEffects with study/method selectors", { + e1 <- .ca_makeFmEntry(3) + e2 <- .ca_makeFmEntry(4) + res <- GwasFineMappingResult( + study = c("g1", "g2"), + method = c("susie", "susie"), + entry = list(e1, e2)) + # Collection-level selection picks the g2 entry, then delegates to the + # entry-level getMarginalEffects. + me <- getMarginalEffects(res, study = "g2", method = "susie") + expect_s3_class(me, "data.frame") + expect_equal(nrow(me), 4L) + expect_true(all(c("variant_id", "beta", "se", "z", "p") %in% names(me))) +}) + diff --git a/tests/testthat/test_GwasSumStats.R b/tests/testthat/test_GwasSumStats.R index 5d78c53b..813d733d 100644 --- a/tests/testthat/test_GwasSumStats.R +++ b/tests/testthat/test_GwasSumStats.R @@ -175,3 +175,73 @@ test_that("show.GwasSumStats prints nrow and genome build", { # === Tests migrated from test_h2ClassesSumstats.R (showMethods) === +# ============================================================================= +# GwasSumStats() constructor validation + accessor error branches +# (.sh_makeQtlSumstatsGr / .sh_makeGenotypeHandle come from +# helper-showMethods.R) +# ============================================================================= + +test_that("GwasSumStats() errors when required args are missing", { + expect_error(GwasSumStats(study = "g1"), "are all required") +}) + +test_that("GwasSumStats() errors when genome is not a single string", { + expect_error( + GwasSumStats(study = "g1", entry = list(.sh_makeQtlSumstatsGr()), + genome = c("hg19", "hg38"), + ldSketch = .sh_makeGenotypeHandle()), + "single character string") +}) + +test_that("GwasSumStats() errors when entry is not a list", { + expect_error( + GwasSumStats(study = "g1", entry = "not_a_list", + genome = "hg19", ldSketch = .sh_makeGenotypeHandle()), + "must be a list") +}) + +test_that("GwasSumStats() errors when length(entry) != length(study)", { + expect_error( + GwasSumStats(study = c("g1", "g2"), + entry = list(.sh_makeQtlSumstatsGr()), + genome = "hg19", ldSketch = .sh_makeGenotypeHandle()), + "must equal length") +}) + +test_that("GwasSumStats() errors when a per-study column has a bad length", { + expect_error( + GwasSumStats(study = c("g1", "g2"), + entry = list(.sh_makeQtlSumstatsGr(), + .sh_makeQtlSumstatsGr()), + genome = "hg19", ldSketch = .sh_makeGenotypeHandle(), + nCase = c(1, 2, 3)), + "must have length 1 or length") +}) + +test_that("GwasSumStats() attaches extra per-study columns via ...", { + obj <- GwasSumStats( + study = c("g1", "g2"), + entry = list(.sh_makeQtlSumstatsGr(), .sh_makeQtlSumstatsGr()), + genome = "hg19", ldSketch = .sh_makeGenotypeHandle(), + cohort = c("UKB", "FinnGen")) + expect_equal(as.character(obj$cohort), c("UKB", "FinnGen")) +}) + +test_that("getSumStats() errors on an empty GwasSumStats", { + empty <- GwasSumStats( + study = character(0), entry = list(), + genome = "hg19", ldSketch = .sh_makeGenotypeHandle(), + varY = numeric(0)) + expect_equal(nrow(empty), 0L) + expect_error(getSumStats(empty), "has no rows") +}) + +test_that("getSumStats() on a multi-study GwasSumStats needs a study selector", { + two <- GwasSumStats( + study = c("g1", "g2"), + entry = list(.sh_makeQtlSumstatsGr(), .sh_makeQtlSumstatsGr()), + genome = "hg19", ldSketch = .sh_makeGenotypeHandle()) + expect_error(getSumStats(two), "studies. Pass") + expect_error(getSumStats(two, study = "ghost"), "Unknown study") +}) + diff --git a/tests/testthat/test_JointGroup.R b/tests/testthat/test_JointGroup.R new file mode 100644 index 00000000..d1cac6ae --- /dev/null +++ b/tests/testthat/test_JointGroup.R @@ -0,0 +1,71 @@ +# Tests for R/JointGroup.R — the uniform contract for the joint-analysis engine: +# the JointGroup hierarchy (conditions-table identity model), the +# JointDispatchCell wiring row, and the pipeline marker classes. Construction is +# validated, so a malformed group / mistyped cell fails loudly at the source. + +.jg_cond <- function(study = "S", context = c("c1", "c2"), trait = "G") { + data.frame(study = study, context = context, trait = trait, + stringsAsFactors = FALSE) +} + +test_that("JointGroup subclasses construct from a conditions table", { + X <- matrix(0, 10, 2, dimnames = list(paste0("s", 1:10), c("v1", "v2"))) + Y <- matrix(0, 10, 2, dimnames = list(paste0("s", 1:10), c("c1", "c2"))) + g <- new("IndividualJointGroup", conditions = .jg_cond(), X = X, Y = Y) + expect_s4_class(g, "JointGroup") + expect_s4_class(g, "IndividualJointGroup") + expect_equal(nrow(g@conditions), 2L) + + Z <- matrix(0, 3, 2); R <- diag(3) + sg <- new("SumStatsJointGroup", conditions = .jg_cond(), Z = Z, R = R, + N = c(100, 120)) + expect_s4_class(sg, "JointGroup") + expect_s4_class(sg, "SumStatsJointGroup") +}) + +test_that("JointGroup validity rejects malformed groups", { + X <- matrix(0, 10, 2, dimnames = list(paste0("s", 1:10), c("v1", "v2"))) + Y <- matrix(0, 10, 2, dimnames = list(paste0("s", 1:10), c("c1", "c2"))) + # 1 condition is valid (the univariate cell); 0 conditions is not. + expect_s4_class( + new("IndividualJointGroup", conditions = .jg_cond(context = "c1"), + X = X[, 1, drop = FALSE], Y = Y[, 1, drop = FALSE]), + "IndividualJointGroup") + expect_error(new("IndividualJointGroup", + conditions = data.frame(study = character(0), + context = character(0), + trait = character(0)), + X = X, Y = Y[, 0, drop = FALSE]), + ">= 1 condition") + # Missing identity column. + expect_error(new("IndividualJointGroup", + conditions = data.frame(study = "S", context = c("c1", "c2")), + X = X, Y = Y), "must have columns") + # X/Y row mismatch. + expect_error(new("IndividualJointGroup", conditions = .jg_cond(), + X = X, Y = Y[1:5, , drop = FALSE]), "dimension") + # ncol(Y) must equal nrow(conditions). + expect_error(new("IndividualJointGroup", + conditions = .jg_cond(context = c("c1", "c2", "c3")), + X = X, Y = Y), "ncol\\(Y\\)") + # Non-square LD. + expect_error(new("SumStatsJointGroup", conditions = .jg_cond(), + Z = matrix(0, 3, 2), R = matrix(0, 3, 2), N = 1), "square") +}) + +test_that("JointDispatchCell + pipeline markers validate at construction", { + cell <- new("JointDispatchCell", pattern = "context", dataForm = "individual", + enumerate = function(data, scope, args) list(), minGroup = 2L) + expect_s4_class(cell, "JointDispatchCell") + expect_error(new("JointDispatchCell", pattern = "context", dataForm = "bogus", + enumerate = function() NULL, minGroup = 2L), "dataForm") + expect_error(new("JointDispatchCell", pattern = "context", + dataForm = "individual", enumerate = function() NULL, + minGroup = 0L), "minGroup") + + fm <- new("FmJointPipeline", config = list(coverage = 0.95)) + tw <- new("TwasJointPipeline", config = list(retainFit = TRUE)) + expect_s4_class(fm, "JointPipeline") + expect_s4_class(tw, "JointPipeline") + expect_equal(fm@config$coverage, 0.95) +}) diff --git a/tests/testthat/test_LdStatistic.R b/tests/testthat/test_LdStatistic.R new file mode 100644 index 00000000..bf1ed2b8 --- /dev/null +++ b/tests/testthat/test_LdStatistic.R @@ -0,0 +1,19 @@ +# Tests for R/LdStatistic.R (virtual base class) +# getGenome() is defined on the virtual LdStatistic and inherited by its +# concrete subclasses (LdEigen / LdScore); exercise it through a concrete +# LdScore instance. Fixtures (make_test_ldblocks / make_test_snp_info) come +# from helper-h2Classes.R. + +test_that("getGenome returns the genome build string (via an LdScore subclass)", { + n <- 10 + obj <- new("LdScore", + ldBlocks = make_test_ldblocks(), + snpInfo = make_test_snp_info(n), + nRef = 500L, + inSample = FALSE, + genome = "hg19", + ldScores = matrix(runif(n), nrow = n, ncol = 1), + ldScoreWeights = runif(n), + ldMatrixList = list()) + expect_equal(getGenome(obj), "hg19") +}) diff --git a/tests/testthat/test_MashPrior.R b/tests/testthat/test_MashPrior.R new file mode 100644 index 00000000..c1510d35 --- /dev/null +++ b/tests/testthat/test_MashPrior.R @@ -0,0 +1,57 @@ +# Tests for R/MashPrior.R — the data-driven (mash) prior bundle: full-data prior +# (fullFit) + per-fold cross-validated priors (cvFits) consumed by +# twasWeightsPipeline (mr.mash) and, transitively, fineMappingPipeline (mvSuSiE). + +test_that("MashPrior: construct + accessors (full + cv)", { + U <- list(U1 = diag(2)) + sp <- data.frame(Sample = paste0("s", 1:6), Fold = rep(1:3, each = 2), + stringsAsFactors = FALSE) + perFold <- list(list(U = U, w = c(0.5, 0.5)), + list(U = U, w = c(0.5, 0.5)), + list(U = U, w = c(0.5, 0.5))) + mp <- MashPrior(fullFit = list(U = U, w = c(0.5, 0.5)), + cvFits = list(samplePartition = sp, perFoldFits = perFold)) + expect_s4_class(mp, "MashPrior") + expect_identical(getFullFit(mp)$U, U) + expect_length(getCvFits(mp)$perFoldFits, 3L) + expect_identical(getCvFits(mp)$samplePartition, sp) + expect_output(show(mp), "MashPrior") +}) + +test_that("MashPrior: full-only and cv-only bundles", { + U <- list(U1 = diag(2)) + perFold <- list(list(U = U), list(U = U), list(U = U)) + mpFull <- MashPrior(fullFit = list(U = U)) + expect_null(getCvFits(mpFull)) + mpCv <- MashPrior(cvFits = list(perFoldFits = perFold)) + expect_null(getFullFit(mpCv)) + expect_length(getCvFits(mpCv)$perFoldFits, 3L) +}) + +test_that("MashPrior: validity rejects malformed bundles", { + U <- list(U1 = diag(2)) + sp <- data.frame(Sample = paste0("s", 1:6), Fold = rep(1:3, each = 2), + stringsAsFactors = FALSE) + # Empty bundle. + expect_error(MashPrior(), "at least one") + # perFoldFits count must match the partition's fold count. + expect_error( + MashPrior(cvFits = list(samplePartition = sp, + perFoldFits = list(list(U = U)))), + "fold") + # perFoldFits must be a non-empty list. + expect_error(MashPrior(cvFits = list(perFoldFits = "nope"))) + # samplePartition must carry Sample + Fold columns. + expect_error( + MashPrior(cvFits = list(samplePartition = data.frame(x = 1), + perFoldFits = list(list(U = U)))), + "Sample") +}) + +test_that("MashPrior: show reports 'cvFits: none' for a full-only bundle", { + U <- list(U1 = diag(2)) + mp <- MashPrior(fullFit = list(U = U, w = c(0.5, 0.5))) # cvFits NULL + out <- capture.output(show(mp)) + expect_true(any(grepl("cvFits: none", out))) + expect_true(any(grepl("fullFit: present", out))) +}) diff --git a/tests/testthat/test_MultiStudyQtlDataset.R b/tests/testthat/test_MultiStudyQtlDataset.R index 2034475b..7d8be277 100644 --- a/tests/testthat/test_MultiStudyQtlDataset.R +++ b/tests/testthat/test_MultiStudyQtlDataset.R @@ -57,3 +57,23 @@ test_that("MultiStudyQtlDataset: rejects trait/position conflicts across studies ) }) + +test_that("getSumStats(MultiStudyQtlDataset) rejects selection arguments", { + # Compose one individual-level QtlDataset with a QtlSumStats of + # summary-statistic-only studies (1 + 1 = 2 studies total). + gr <- GenomicRanges::GRanges("chr1", IRanges::IRanges(100L, width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = "rs1", A1 = "A", A2 = "G", Z = 1.0, N = 1000L) + ss <- QtlSumStats(study = "s3", context = "c1", trait = "t1", + entry = list(gr), genome = "hg19", + ldSketch = .sc_makeGenotypeHandle()) + qd1 <- QtlDataset(study = "s1", genotypes = .sc_makeGenotypeHandle(), + phenotypes = list(brain = .sc_makeSe())) + mt <- MultiStudyQtlDataset(qtlDatasets = list(s1 = qd1), sumStats = ss) + + # Bare call returns the embedded QtlSumStats collection ... + expect_s4_class(getSumStats(mt), "QtlSumStats") + # ... but any selection argument is rejected. + expect_error(getSumStats(mt, study = "s1"), "does not accept selection") +}) + diff --git a/tests/testthat/test_QtlDataset.R b/tests/testthat/test_QtlDataset.R index 44b5346e..e5fbeade 100644 --- a/tests/testthat/test_QtlDataset.R +++ b/tests/testthat/test_QtlDataset.R @@ -1678,4 +1678,343 @@ test_that(".qtlResolveVariantRegion rejects a non-GRanges / empty region", { "at least one range") }) +# =========================================================================== +# Accessors: getGenotypeCovariates / getScaleResiduals / getPhenotypeCovariates +# =========================================================================== + +test_that("getGenotypeCovariates / getScaleResiduals return their slots", { + gc <- matrix(rnorm(12 * 2), nrow = 12, ncol = 2, + dimnames = list(paste0("s", 1:12), c("pc1", "pc2"))) + qd <- .qr_makeDataset(contexts = "brain", geno_cov = gc, + scaleResiduals = FALSE) + expect_identical(getGenotypeCovariates(qd), qd@genotypeCovariates) + expect_equal(unname(getGenotypeCovariates(qd)), unname(gc)) + expect_false(getScaleResiduals(qd)) + qd2 <- .qr_makeDataset(contexts = "brain", scaleResiduals = TRUE) + expect_true(getScaleResiduals(qd2)) +}) + +test_that("getPhenotypeCovariates returns per-context colData matrices", { + qd <- .qr_makeDataset(contexts = c("brain", "liver")) + out <- getPhenotypeCovariates(qd, contexts = c("brain", "liver")) + expect_equal(names(out), c("brain", "liver")) + expect_true(is.matrix(out$brain)) + expect_setequal(colnames(out$brain), c("sex", "age")) + expect_equal(nrow(out$brain), 12L) + # Single-context request still returns a named list of length 1. + one <- getPhenotypeCovariates(qd, contexts = "liver") + expect_equal(names(one), "liver") +}) + +test_that("getPhenotypeCovariates: requires contexts and rejects unknown ones", { + qd <- .qr_makeDataset(contexts = "brain") + expect_error(getPhenotypeCovariates(qd), "`contexts` is required") + expect_error(getPhenotypeCovariates(qd, contexts = character(0)), + "`contexts` is required") + expect_error(getPhenotypeCovariates(qd, contexts = "ghost"), + "Unknown context") +}) + +# =========================================================================== +# .qtlResolveVariantRegion: region-path cisWindow validation (line ~247) +# =========================================================================== + +test_that(".qtlResolveVariantRegion: region path rejects a non-scalar/negative cisWindow", { + qd <- .qh_makeDataset() + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(100, 200)) + expect_error( + pecotmr:::.qtlResolveVariantRegion(qd, region = region, cisWindow = -5), + "must be a single non-negative value") + expect_error( + pecotmr:::.qtlResolveVariantRegion(qd, region = region, cisWindow = c(1, 2)), + "must be a single non-negative value") +}) + +# =========================================================================== +# .qtlExtractBlock: indel-empty / sample-empty / imiss / xvar / impute paths +# =========================================================================== + +test_that(".qtlExtractBlock: keepIndel = FALSE with an all-indel panel returns an empty block", { + qd <- .qh_makeDataset() + # Make every variant a multi-base allele so the indel filter drops them all. + qd@genotypes@snpInfo$A1 <- rep("AT", nrow(qd@genotypes@snpInfo)) + qd@keepIndel <- FALSE + local_mocked_bindings(extractBlockGenotypes = .qh_mockExtractor(), + .package = "pecotmr") + blk <- pecotmr:::.qtlExtractBlock(qd) + expect_equal(ncol(blk$geno), 0L) + expect_equal(nrow(blk$geno), 0L) + expect_equal(blk$variantIds, character(0)) + expect_equal(blk$maf, numeric(0)) +}) + +test_that(".qtlExtractBlock: keepSamples disjoint from the panel returns a zero-sample block", { + qd <- .qh_makeDataset() + qd@keepSamples <- c("zzz1", "zzz2") # none are panel samples + local_mocked_bindings(extractBlockGenotypes = .qh_mockExtractor(), + .package = "pecotmr") + blk <- pecotmr:::.qtlExtractBlock(qd) + expect_equal(nrow(blk$geno), 0L) + expect_equal(ncol(blk$geno), 6L) # columns retained, samples gone + expect_equal(blk$sampleIds, character(0)) + expect_true(all(is.na(blk$maf))) + expect_true(all(is.na(blk$af))) +}) + +# Extractor returning dosages with NAs: s1 is fully missing (driving the +# per-sample imiss filter), and s2/rs2 carries a single scattered NA that +# survives the filter (exercising the mean-impute loop). +.qh_naExtractor <- function() { + function(handle, snpIdx, meanImpute = TRUE) { + ns <- length(handle@sampleIds); nv <- nrow(handle@snpInfo) + set.seed(123L) + panel <- matrix(rbinom(ns * nv, 2, 0.4), nrow = ns, ncol = nv, + dimnames = list(handle@sampleIds, handle@snpInfo$SNP)) + panel["s1", ] <- NA_real_ # fully missing sample -> dropped by imiss + panel["s2", "rs2"] <- NA_real_ # scattered NA -> kept then mean-imputed + sub <- panel[, snpIdx, drop = FALSE] + rr <- GenomicRanges::GRanges( + seqnames = paste0("chr", handle@snpInfo$CHR[snpIdx]), + ranges = IRanges::IRanges(start = handle@snpInfo$BP[snpIdx], width = 1L)) + S4Vectors::mcols(rr) <- S4Vectors::DataFrame( + SNP = handle@snpInfo$SNP[snpIdx], A1 = handle@snpInfo$A1[snpIdx], + A2 = handle@snpInfo$A2[snpIdx]) + dosage <- t(sub) + rownames(dosage) <- handle@snpInfo$SNP[snpIdx] + colnames(dosage) <- handle@sampleIds + SummarizedExperiment::SummarizedExperiment( + assays = list(dosage = dosage), rowRanges = rr, + colData = S4Vectors::DataFrame(sampleId = handle@sampleIds, + row.names = handle@sampleIds)) + } +} + +test_that(".qtlExtractBlock: imissCutoff drops high-missingness samples and mean-imputes the rest", { + qd <- .qh_makeDataset() + qd@imissCutoff <- 0.5 # s1 (100% NA) dropped; s2 (1 NA) kept then imputed + local_mocked_bindings(extractBlockGenotypes = .qh_naExtractor(), + .package = "pecotmr") + blk <- pecotmr:::.qtlExtractBlock(qd) + expect_false("s1" %in% blk$sampleIds) + expect_true("s2" %in% blk$sampleIds) + expect_false(anyNA(blk$geno)) # scattered NA was mean-imputed + expect_equal(nrow(blk$geno), 11L) # 12 samples minus s1 +}) + +# Extractor with one constant (zero-variance) column to drive the xvar filter. +.qh_lowVarExtractor <- function() { + function(handle, snpIdx, meanImpute = TRUE) { + ns <- length(handle@sampleIds); nv <- nrow(handle@snpInfo) + set.seed(99L) + panel <- matrix(rbinom(ns * nv, 2, 0.4), nrow = ns, ncol = nv, + dimnames = list(handle@sampleIds, handle@snpInfo$SNP)) + panel[, "rs1"] <- 1L # constant column -> variance 0 + sub <- panel[, snpIdx, drop = FALSE] + rr <- GenomicRanges::GRanges( + seqnames = paste0("chr", handle@snpInfo$CHR[snpIdx]), + ranges = IRanges::IRanges(start = handle@snpInfo$BP[snpIdx], width = 1L)) + S4Vectors::mcols(rr) <- S4Vectors::DataFrame( + SNP = handle@snpInfo$SNP[snpIdx], A1 = handle@snpInfo$A1[snpIdx], + A2 = handle@snpInfo$A2[snpIdx]) + dosage <- t(sub) + rownames(dosage) <- handle@snpInfo$SNP[snpIdx] + colnames(dosage) <- handle@sampleIds + SummarizedExperiment::SummarizedExperiment( + assays = list(dosage = dosage), rowRanges = rr, + colData = S4Vectors::DataFrame(sampleId = handle@sampleIds, + row.names = handle@sampleIds)) + } +} + +test_that(".qtlExtractBlock: xvarCutoff drops near-constant (low-variance) variants", { + qd <- .qh_makeDataset() + qd@xvarCutoff <- 0.01 + local_mocked_bindings(extractBlockGenotypes = .qh_lowVarExtractor(), + .package = "pecotmr") + blk <- pecotmr:::.qtlExtractBlock(qd) + expect_false("rs1" %in% blk$variantIds) # constant column dropped on variance + expect_true(ncol(blk$geno) >= 1L) +}) + +# =========================================================================== +# getPhenotypes: contexts validation, region overlap filtering, empty-Y +# =========================================================================== + +test_that("getPhenotypes: requires contexts", { + qd <- .qr_makeDataset() + expect_error(getPhenotypes(qd), "`contexts` is required") + expect_error(getPhenotypes(qd, contexts = character(0)), + "`contexts` is required") +}) + +test_that("getPhenotypes: unknown context errors", { + qd <- .qr_makeDataset() + expect_error(getPhenotypes(qd, contexts = "ghost"), "Unknown context") +}) + +test_that("getPhenotypes: region keeps only overlapping traits", { + qd <- .qr_makeDataset(contexts = "brain") + # brain SE: ENSG1 @ chr1:1000-1499, ENSG2 @ chr1:2000-2499. + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(900, 1600)) + out <- getPhenotypes(qd, contexts = "brain", region = region) + expect_s4_class(out, "SummarizedExperiment") + expect_equal(rownames(out), "ENSG1") +}) + +test_that("getPhenotypes: non-overlapping region + naAction='drop' hits the empty-Y short-circuit", { + qd <- .qr_makeDataset(contexts = "brain") + region <- GenomicRanges::GRanges("chr2", IRanges::IRanges(1, 100)) # no overlap + out <- getPhenotypes(qd, contexts = "brain", region = region, + naAction = "drop") + expect_s4_class(out, "SummarizedExperiment") + expect_equal(nrow(out), 0L) +}) + +# =========================================================================== +# .qtlOutlierKeepMask: degenerate + robustbase-present-failure + robustbase-absent +# =========================================================================== + +test_that(".qtlOutlierKeepMask: degenerate (n==0 or p==0) short-circuits to all-TRUE", { + expect_equal(pecotmr:::.qtlOutlierKeepMask(matrix(numeric(0), 0L, 0L), 1e-3), + logical(0)) + expect_equal(pecotmr:::.qtlOutlierKeepMask(matrix(numeric(0), 5L, 0L), 1e-3), + rep(TRUE, 5L)) +}) + +test_that(".qtlOutlierKeepMask: robustbase covMcd failure falls back to colMeans/cov", { + skip_if_not_installed("robustbase") + local_mocked_bindings(covMcd = function(...) stop("forced covMcd failure"), + .package = "robustbase") + set.seed(8L) + Y <- matrix(c(rnorm(29), 60), ncol = 1L) + keep <- pecotmr:::.qtlOutlierKeepMask(Y, pvalThreshold = 1e-3) + expect_length(keep, 30L) + expect_false(keep[[30L]]) # the planted 60 is still flagged +}) + +test_that(".qtlOutlierKeepMask: falls back with a message when robustbase is absent", { + # Force the robustbase-absent branch by mocking base::requireNamespace. + local_mocked_bindings( + requireNamespace = function(package, ...) + if (identical(package, "robustbase")) FALSE else TRUE, + .package = "base") + set.seed(5L) + Y <- matrix(c(rnorm(29), 50), ncol = 1L) + expect_message( + keep <- pecotmr:::.qtlOutlierKeepMask(Y, pvalThreshold = 1e-3), + "install 'robustbase'") + expect_length(keep, 30L) +}) + +# =========================================================================== +# .qtlApplyPhenoOutliers: action=="keep" and the all-kept short-circuit +# =========================================================================== + +test_that(".qtlApplyPhenoOutliers: action='keep' returns the SE unchanged", { + se <- .qr_makeSe(n_samples = 12L) + out <- pecotmr:::.qtlApplyPhenoOutliers(se, "keep", 1e-3) + expect_identical(out, se) +}) + +test_that(".qtlApplyPhenoOutliers: action='drop' with clean data keeps every sample", { + set.seed(101L) + traits <- c("ENSG1", "ENSG2") + n <- 40L + rng <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(start = c(1000L, 2000L), width = 500L)) + names(rng) <- traits + expr <- matrix(rnorm(length(traits) * n), + nrow = length(traits), ncol = n, + dimnames = list(traits, paste0("s", seq_len(n)))) + cd <- S4Vectors::DataFrame(row.names = paste0("s", seq_len(n))) + se <- SummarizedExperiment::SummarizedExperiment( + assays = list(expression = expr), rowRanges = rng, colData = cd) + # A very strict threshold so clean Gaussian data flags nothing (all kept). + out <- pecotmr:::.qtlApplyPhenoOutliers(se, "drop", 1e-30) + expect_equal(ncol(out), n) +}) + +# =========================================================================== +# .qtlBuildResidualizationDesign: empty-selection skip, rowname restore, NULL +# =========================================================================== + +test_that(".qtlBuildResidualizationDesign: skips contexts with an empty selection", { + qd <- .qr_makeDataset(contexts = c("brain", "liver")) + D <- pecotmr:::.qtlBuildResidualizationDesign( + qd, contexts = c("brain", "liver"), + phenoSelection = list(brain = "age", liver = character(0)), + genoSelection = character(0), + includePheno = TRUE, includeGeno = FALSE) + expect_equal(ncol(D), 1L) + expect_setequal(colnames(D), "brain.age") +}) + +test_that(".qtlBuildResidualizationDesign: restores rownames when colData coercion drops them", { + # An SE whose assay has no colnames and whose colData carries no row.names: + # as.matrix(as.data.frame(colData)) then yields a rowname-less matrix, which + # drives the rowname-restore branch. colData rownames are also NULL, so the + # restore is a no-op and (with no other blocks) the design resolves to NULL. + expr <- matrix(rnorm(2 * 6), nrow = 2) + rng <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(start = c(1000L, 2000L), width = 500L)) + names(rng) <- c("ENSG1", "ENSG2") + cd <- S4Vectors::DataFrame(age = 1:6) # no row.names + se <- SummarizedExperiment::SummarizedExperiment( + assays = list(expression = expr), rowRanges = rng, colData = cd) + qd <- QtlDataset(study = "s1", genotypes = .qr_makeHandle(n_samples = 6L), + phenotypes = list(brain = se), + genotypeCovariates = matrix(numeric(0), 0L, 0L)) + D <- pecotmr:::.qtlBuildResidualizationDesign( + qd, contexts = "brain", + phenoSelection = list(brain = "age"), + genoSelection = character(0), + includePheno = TRUE, includeGeno = FALSE) + expect_null(D) +}) + +test_that(".qtlBuildResidualizationDesign: disjoint sample sets across blocks return NULL", { + # Phenotype colData covers s1..s6; genotype covariates cover s7..s12. + gh <- .qr_makeHandle(n_samples = 6L) + se <- .qr_makeSe(n_samples = 6L) + gc <- matrix(rnorm(6), nrow = 6, ncol = 1, + dimnames = list(paste0("s", 7:12), "pc1")) + qd <- QtlDataset(study = "s1", genotypes = gh, + phenotypes = list(brain = se), + genotypeCovariates = gc) + D <- pecotmr:::.qtlBuildResidualizationDesign( + qd, contexts = "brain", + phenoSelection = list(brain = "age"), + genoSelection = "pc1", + includePheno = TRUE, includeGeno = TRUE) + expect_null(D) +}) + +# =========================================================================== +# getResidualized{Genotypes,Phenotypes}: disjoint sample-set errors +# =========================================================================== + +test_that("getResidualizedGenotypes: errors when genotypes and covariates share no samples", { + gc <- matrix(rnorm(12 * 2), nrow = 12, ncol = 2, + dimnames = list(paste0("z", 1:12), c("pc1", "pc2"))) + qd <- .qr_makeDataset(contexts = "brain", geno_cov = gc) + local_mocked_bindings(extractBlockGenotypes = .qr_mockExtractor(), + .package = "pecotmr") + expect_error( + getResidualizedGenotypes(qd, contexts = "brain", + residualizePhenotypeCovariates = FALSE, + genotypeCovariatesToResidualize = c("pc1", "pc2")), + "No samples in common") +}) + +test_that("getResidualizedPhenotypes: errors when phenotypes and covariates share no samples", { + gc <- matrix(rnorm(12 * 2), nrow = 12, ncol = 2, + dimnames = list(paste0("z", 1:12), c("pc1", "pc2"))) + qd <- .qr_makeDataset(contexts = "brain", geno_cov = gc) + expect_error( + getResidualizedPhenotypes(qd, contexts = "brain", + residualizePhenotypeCovariates = FALSE, + genotypeCovariatesToResidualize = c("pc1", "pc2")), + "no samples shared") +}) + diff --git a/tests/testthat/test_QtlFineMappingResult.R b/tests/testthat/test_QtlFineMappingResult.R index 5daf89cd..569795dc 100644 --- a/tests/testthat/test_QtlFineMappingResult.R +++ b/tests/testthat/test_QtlFineMappingResult.R @@ -116,9 +116,11 @@ test_that("QtlFineMappingResult: joint columns absent by default", { test_that("QtlFineMappingResult: accepts jointContexts column", { e <- .sc_makeFineMappingEntry(3) + # Univariate susie at c1 + the c1 slice of an mvsusie joint over (c1, c2): + # both real context c1, distinguished by method and the jointContexts tag. res <- QtlFineMappingResult( study = c("s1", "s1"), - context = c("c1", "joint"), + context = c("c1", "c1"), trait = c("t1", "t1"), method = c("susie", "mvsusie"), entry = list(e, e), @@ -130,10 +132,13 @@ test_that("QtlFineMappingResult: accepts jointContexts column", { test_that("QtlFineMappingResult: jointStudies + jointTraits combine cleanly", { e <- .sc_makeFineMappingEntry(3) + # Three per-context rows sharing the real (s1, c1, t1) tuple, each a slice of + # a different joint fit: univariate; a cross-study+trait mvsusieRss; a + # cross-context mvsusie. The joint* tags carry each fit's co-fit membership. res <- QtlFineMappingResult( - study = c("s1", "joint", "joint"), - context = c("c1", "c1", "joint"), - trait = c("t1", "joint", "t1"), + study = c("s1", "s1", "s1"), + context = c("c1", "c1", "c1"), + trait = c("t1", "t1", "t1"), method = c("susie", "mvsusieRss", "mvsusie"), entry = list(e, e, e), jointStudies = c(NA_character_, "s1;s2", NA_character_), @@ -149,10 +154,12 @@ test_that("QtlFineMappingResult: jointStudies + jointTraits combine cleanly", { test_that("QtlFineMappingResult: uniqueness distinguishes joint members", { e <- .sc_makeFineMappingEntry(3) - # same 4-tuple but different jointContexts -> distinct + # Real scenario: context c1 participates in two different mvsusie joint fits + # -- one over (c1, c2), one over (c1, c3) -- producing two c1 rows with the + # same 4-tuple, kept distinct only by their jointContexts membership. res <- QtlFineMappingResult( study = c("s1", "s1"), - context = c("joint", "joint"), + context = c("c1", "c1"), trait = c("t1", "t1"), method = c("mvsusie", "mvsusie"), entry = list(e, e), @@ -162,7 +169,7 @@ test_that("QtlFineMappingResult: uniqueness distinguishes joint members", { expect_error( QtlFineMappingResult( study = c("s1", "s1"), - context = c("joint", "joint"), + context = c("c1", "c1"), trait = c("t1", "t1"), method = c("mvsusie", "mvsusie"), entry = list(e, e), @@ -310,6 +317,25 @@ test_that("getCvResult works at the QtlFineMappingResult collection level", { cv) }) + +test_that("QtlFineMappingResult: getMarginalEffects with tuple selectors", { + e1 <- .ca_makeFmEntry(3) + e2 <- .ca_makeFmEntry(4) + res <- QtlFineMappingResult( + study = c("s1", "s1"), + context = c("c1", "c2"), + trait = c("t1", "t1"), + method = c("susie", "susie"), + entry = list(e1, e2)) + # Collection-level selection picks the (s1, c2, t1, susie) entry, then + # delegates to the entry-level getMarginalEffects. + me <- getMarginalEffects(res, study = "s1", context = "c2", + trait = "t1", method = "susie") + expect_s3_class(me, "data.frame") + expect_equal(nrow(me), 4L) + expect_true(all(c("variant_id", "beta", "se", "z", "p") %in% names(me))) +}) + # =========================================================================== # GwasFineMappingResult collection accessors # =========================================================================== diff --git a/tests/testthat/test_SldscData.R b/tests/testthat/test_SldscData.R new file mode 100644 index 00000000..49ea4ad7 --- /dev/null +++ b/tests/testthat/test_SldscData.R @@ -0,0 +1,101 @@ +# Tests for R/SldscData.R (S4 container + accessors) + +test_that("SldscData constructs from in-memory objects", { + sd <- .sldscMkData() + expect_s4_class(sd, "SldscData") + expect_true(methods::validObject(sd)) +}) + +test_that("SldscData defaults frq to a 0-row data.frame when NULL", { + sd <- .sldscMkData(withFrq = FALSE) + expect_s4_class(sd, "SldscData") + expect_equal(nrow(getFrqData(sd)), 0L) +}) + +test_that("SldscData errors when `annot` is missing", { + expect_error(SldscData(), "`annot` is required") +}) + +# ---- validity ---- + +test_that("validity rejects annot without CHR/SNP", { + expect_error( + SldscData(annot = data.frame(SNP = "rs1", annot_A = 1)), + "must have columns CHR and SNP") +}) + +test_that("validity rejects annot with no annotation column", { + expect_error( + SldscData(annot = data.frame(CHR = 1, SNP = "rs1")), + "at least one annotation column") +}) + +test_that("validity rejects a non-empty frq without SNP/MAF", { + expect_error( + SldscData(annot = data.frame(CHR = 1, SNP = "rs1", a = 1), + frq = data.frame(CHR = 1, foo = 0.2)), + "non-empty `frq` must have columns SNP and MAF") +}) + +test_that("validity rejects unnamed traits", { + run <- .sldscMkRun(c("annot_A_0")) + expect_error( + SldscData(annot = data.frame(CHR = 1, SNP = "rs1", a = 1), + traits = list(list(single = list(run)))), + "must be a named list") +}) + +test_that("validity rejects a trait without a `single` element", { + run <- .sldscMkRun(c("annot_A_0")) + expect_error( + SldscData(annot = data.frame(CHR = 1, SNP = "rs1", a = 1), + traits = list(traitX = list(joint = run))), + "must be a list with a `single` element") +}) + +# ---- accessors ---- + +test_that("getAnnotData / getFrqData return the stored frames", { + sd <- .sldscMkData() + expect_s3_class(getAnnotData(sd), "data.frame") + expect_equal(nrow(getAnnotData(sd)), 6L) + expect_equal(nrow(getFrqData(sd)), 6L) +}) + +test_that("getAnnotCols returns the annotation columns only", { + sd <- .sldscMkData() + expect_equal(getAnnotCols(sd), c("annot_A", "annot_B")) +}) + +test_that("getTraitRuns / getTraitNames expose the traits list", { + sd <- .sldscMkData() + expect_equal(getTraitNames(sd), c("traitX", "traitY")) + expect_named(getTraitRuns(sd), c("traitX", "traitY")) +}) + +test_that("getTraitRun retrieves single (by idx), joint, and NULL cases", { + sd <- .sldscMkData() + single1 <- getTraitRun(sd, "traitX", "single", 1L) + expect_equal(single1$categories, c("annot_A_0", "baselineLD_0")) + expect_equal(getTraitRun(sd, "traitX", "joint")$categories, + c("annot_A_0", "annot_B_0", "baselineLD_0")) + # whole single list when idx is NULL + expect_length(getTraitRun(sd, "traitX", "single"), 2L) + # out-of-range idx -> NULL + expect_null(getTraitRun(sd, "traitX", "single", 5L)) + # unknown trait -> NULL + expect_null(getTraitRun(sd, "no_such_trait", "joint")) +}) + +test_that("getTraitRun returns NULL joint when a trait has none", { + sd <- .sldscMkData(withJoint = FALSE) + expect_null(getTraitRun(sd, "traitX", "joint")) +}) + +test_that("show prints a compact summary", { + sd <- .sldscMkData() + out <- capture.output(show(sd)) + expect_true(any(grepl("SldscData", out))) + expect_true(any(grepl("annot_A, annot_B", out))) + expect_true(any(grepl("traitX, traitY", out))) +}) diff --git a/tests/testthat/test_TwasWeightsEntry.R b/tests/testthat/test_TwasWeightsEntry.R index ecaa1075..ffdfa884 100644 --- a/tests/testthat/test_TwasWeightsEntry.R +++ b/tests/testthat/test_TwasWeightsEntry.R @@ -7,14 +7,14 @@ test_that("TwasWeightsEntry: constructor and accessors round-trip", { variantIds = c("v1", "v2", "v3"), weights = c(0.1, -0.2, 0.05), fits = list(model = "lasso"), - cvPerformance = list(rsq = 0.4), + cvResult = list(rsq = 0.4), standardized = TRUE, dataType = "expression") expect_s4_class(e, "TwasWeightsEntry") expect_equal(getVariantIds(e), c("v1", "v2", "v3")) expect_equal(getWeights(e), c(0.1, -0.2, 0.05)) expect_equal(getFits(e), list(model = "lasso")) - expect_equal(getCvPerformance(e), list(rsq = 0.4)) + expect_equal(getCvResult(e), list(rsq = 0.4)) expect_true(getStandardized(e)) expect_equal(getDataType(e), "expression") }) diff --git a/tests/testthat/test_causalInferencePipeline.R b/tests/testthat/test_causalInferencePipeline.R index 14a85a80..51abbca3 100644 --- a/tests/testthat/test_causalInferencePipeline.R +++ b/tests/testthat/test_causalInferencePipeline.R @@ -276,7 +276,7 @@ test_that(".cipZToSe: falls back to vector of 1 when maf/n are NA", { test_that(".cipFilterEligibleMethods: rsq+pval gating, drop sub-cutoff groups, SS-TWAS keeps all", { mkEntry <- function(rsq, pval = 0.01) TwasWeightsEntry( variantIds = paste0("v", 1:3), weights = rep(0.1, 3), - cvPerformance = list(metrics = c(corr = 0.1, rsq = rsq, pval = pval))) + cvResult = list(metrics = c(corr = 0.1, rsq = rsq, pval = pval))) tw <- TwasWeights( study = rep("S", 4), context = rep("c1", 4), @@ -302,7 +302,7 @@ test_that(".cipFilterEligibleMethods: rsq+pval gating, drop sub-cutoff groups, S f2 <- pecotmr:::.cipFilterEligibleMethods(q2, m2, rsqCutoff = 0.1, rsqPvalCutoff = 0.05) expect_equal(f2$method, "lasso") - # SS-TWAS: no usable cvPerformance -> keep all methods in the group. + # SS-TWAS: no usable cvResult -> keep all methods in the group. twss <- TwasWeights( study = rep("S", 2), context = rep("c1", 2), trait = rep("G", 2), method = c("susie", "lasso"), @@ -340,10 +340,10 @@ test_that("causalInferencePipeline: rsqCutoff selects the max-rsq method per gro entry = list( TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = c(0.1, 0.05, -0.2, 0.3, 0.0), - cvPerformance = list(metrics = c(rsq = 0.2, pval = 0.001))), + cvResult = list(metrics = c(rsq = 0.2, pval = 0.001))), TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = c(0.2, 0.1, -0.1, 0.2, 0.1), - cvPerformance = list(metrics = c(rsq = 0.5, pval = 0.001)))), + cvResult = list(metrics = c(rsq = 0.5, pval = 0.001)))), ldSketch = .cip_makeHandle()) local_mocked_bindings(extractBlockGenotypes = .cip_mockExtractor(), .package = "pecotmr") @@ -359,10 +359,10 @@ test_that("causalInferencePipeline: NA/Inf TWAS-Z triggers method re-selection", entry = list( # top rsq but all-zero weights -> wᵀRw = 0 -> twasZ NaN TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = rep(0, 5), - cvPerformance = list(metrics = c(rsq = 0.9, pval = 0.001))), + cvResult = list(metrics = c(rsq = 0.9, pval = 0.001))), TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = c(0.2, 0.1, -0.1, 0.2, 0.1), - cvPerformance = list(metrics = c(rsq = 0.5, pval = 0.001)))), + cvResult = list(metrics = c(rsq = 0.5, pval = 0.001)))), ldSketch = .cip_makeHandle()) local_mocked_bindings(extractBlockGenotypes = .cip_mockExtractor(), .package = "pecotmr") @@ -379,10 +379,10 @@ test_that("causalInferencePipeline: rsqPvalCutoff gates out high-CV-pval methods entry = list( TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = c(0.2, 0.1, -0.1, 0.2, 0.1), - cvPerformance = list(metrics = c(rsq = 0.9, pval = 0.5))), + cvResult = list(metrics = c(rsq = 0.9, pval = 0.5))), TwasWeightsEntry(variantIds = paste0("v", 1:5), weights = c(0.2, 0.1, -0.1, 0.2, 0.1), - cvPerformance = list(metrics = c(rsq = 0.5, pval = 0.001)))), + cvResult = list(metrics = c(rsq = 0.5, pval = 0.001)))), ldSketch = .cip_makeHandle()) local_mocked_bindings(extractBlockGenotypes = .cip_mockExtractor(), .package = "pecotmr") @@ -929,3 +929,142 @@ test_that("twasZ: error when weights and z have different lengths", { # Phase 2: loadLdSketch() and standardize_genotype_hwe() + +# =========================================================================== +# Direct unit tests for the MR / metric helpers (mock getTopLoci to feed +# controlled topLoci frames; gwasDf is a plain data.frame as produced by +# getSumstatDf upstream). +# =========================================================================== + +.cip_gwasDf <- function(vids = paste0("v", 1:5), + z = rep_len(c(2, -1.5, 1.8, 0.5, -2.2), length(vids))) { + data.frame(variant_id = vids, z = z, N = rep(1000, length(vids)), + maf = rep(0.3, length(vids)), chrom = "chr1", + pos = seq(100L, by = 100L, length.out = length(vids)), + stringsAsFactors = FALSE) +} + +test_that(".cipCalcI2: degenerate Q / single group -> 0; normal Q -> clamped", { + expect_equal(pecotmr:::.cipCalcI2(0, 3L), 0) # Q <= 1e-3 + expect_equal(pecotmr:::.cipCalcI2(NA_real_, 3L), 0) # non-finite Q + expect_equal(pecotmr:::.cipCalcI2(10, 1L), 0) # nGroups <= 1 + expect_equal(pecotmr:::.cipCalcI2(10, 3L), (10 - 2) / 10) + expect_equal(pecotmr:::.cipCalcI2(1.5, 5L), 0) # negative -> clamp to 0 +}) + +test_that(".cipComputeMr: IVW Wald-ratio over PIP-passing instruments", { + tl <- data.frame(variant_id = paste0("v", 1:4), + pip = c(0.9, 0.8, 0.1, 0.7), + beta = c(0.3, -0.2, 0.5, 0.25), se = rep(0.05, 4), + stringsAsFactors = FALSE) + local_mocked_bindings(getTopLoci = function(x) tl, .package = "pecotmr") + res <- pecotmr:::.cipComputeMr(NULL, + .cip_gwasDf(paste0("v", 1:4), c(2, -1.5, 1, 1.8)), pipCutoff = 0.5) + expect_true(is.finite(res$waldRatio)) + expect_true(is.finite(res$mrPval)) + expect_equal(res$nIV, 3L) # v1, v2, v4 pass + overlap +}) + +test_that(".cipComputeMr: NA on empty / missing-col / no-IV / no-overlap / zero-beta", { + g <- .cip_gwasDf(paste0("v", 1:4)) + isNa <- function(r) { expect_true(is.na(r$waldRatio)); expect_equal(r$nIV, 0L) } + local_mocked_bindings(getTopLoci = function(x) data.frame(), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMr(NULL, g, 0.5)) # empty + local_mocked_bindings(getTopLoci = function(x) # no beta/se + data.frame(variant_id = paste0("v", 1:4), pip = rep(0.9, 4)), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMr(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # none pass + data.frame(variant_id = paste0("v", 1:4), pip = rep(0.1, 4), + beta = rep(0.2, 4), se = rep(0.05, 4)), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMr(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # no overlap + data.frame(variant_id = paste0("z", 1:4), pip = rep(0.9, 4), + beta = rep(0.2, 4), se = rep(0.05, 4)), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMr(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # zero beta + data.frame(variant_id = paste0("v", 1:4), pip = rep(0.9, 4), + beta = rep(0, 4), se = rep(0.05, 4)), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMr(NULL, g, 0.5)) +}) + +test_that(".cipComputeMrCsAware: CS-aware composite Wald + heterogeneity", { + tl <- data.frame(variant_id = paste0("v", 1:4), + cs = c(1L, 1L, 2L, 2L), pip = c(0.5, 0.4, 0.6, 0.3), + beta = c(0.3, 0.2, -0.25, -0.15), se = rep(0.05, 4), + stringsAsFactors = FALSE) + local_mocked_bindings(getTopLoci = function(x) tl, .package = "pecotmr") + res <- pecotmr:::.cipComputeMrCsAware(NULL, + .cip_gwasDf(paste0("v", 1:4), c(2, -1.5, 1.8, -1.2)), cpipCutoff = 0.5) + expect_true(is.finite(res$waldRatio)) + expect_true(is.finite(res$I2)) + expect_equal(res$nCs, 2L) # both CSs clear cpip +}) + +test_that(".cipComputeMrCsAware: credible-set column found via the ^cs fallback", { + tl <- data.frame(variant_id = paste0("v", 1:2), cs_0.95 = c(1L, 1L), + pip = c(0.5, 0.4), beta = c(0.3, 0.2), se = rep(0.05, 2), + stringsAsFactors = FALSE) + local_mocked_bindings(getTopLoci = function(x) tl, .package = "pecotmr") + res <- pecotmr:::.cipComputeMrCsAware(NULL, + .cip_gwasDf(paste0("v", 1:2), c(2, -1.5)), cpipCutoff = 0.5) + expect_true(is.finite(res$waldRatio)) # cs_0.95 via grep("^cs") +}) + +test_that(".cipComputeMrCsAware: naResult on empty / missing-col / no-CS / no-overlap / low-cpip", { + g <- .cip_gwasDf(paste0("v", 1:4)) + isNa <- function(r) expect_true(is.na(r$waldRatio)) + local_mocked_bindings(getTopLoci = function(x) data.frame(), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMrCsAware(NULL, g, 0.5)) # empty + local_mocked_bindings(getTopLoci = function(x) # no pip col + data.frame(variant_id = paste0("v", 1:4), cs = rep(1L, 4), + beta = rep(0.2, 4), se = rep(0.05, 4)), .package = "pecotmr") + isNa(pecotmr:::.cipComputeMrCsAware(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # cs all 0 + data.frame(variant_id = paste0("v", 1:4), cs = rep(0L, 4), + pip = rep(0.5, 4), beta = rep(0.2, 4), se = rep(0.05, 4)), + .package = "pecotmr") + isNa(pecotmr:::.cipComputeMrCsAware(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # no overlap + data.frame(variant_id = paste0("z", 1:4), cs = rep(1L, 4), + pip = rep(0.5, 4), beta = rep(0.2, 4), se = rep(0.05, 4)), + .package = "pecotmr") + isNa(pecotmr:::.cipComputeMrCsAware(NULL, g, 0.5)) + local_mocked_bindings(getTopLoci = function(x) # cpip < cutoff + data.frame(variant_id = paste0("v", 1:4), cs = 1:4, + pip = rep(0.1, 4), beta = rep(0.2, 4), se = rep(0.05, 4)), + .package = "pecotmr") + isNa(pecotmr:::.cipComputeMrCsAware(NULL, g, cpipCutoff = 0.5)) +}) + +test_that(".cipExtractWeights: NULL on missing tuples and bad topLoci", { + tw <- .cip_makeTwasWeights(method = "susie") + fmr <- .cip_makeQtlFmr() + # TwasWeights / FMR: tuple absent. + expect_null(pecotmr:::.cipExtractWeights(tw, NULL, "Q1", "c1", "t1", + "absent", useFmr = FALSE)) + expect_null(pecotmr:::.cipExtractWeights(NULL, fmr, "Q1", "c1", "t1", + "absent", useFmr = TRUE)) + # FMR path: tuple present but topLoci empty / no beta column / all-NA. + local_mocked_bindings(getTopLoci = function(x) data.frame(), .package = "pecotmr") + expect_null(pecotmr:::.cipExtractWeights(NULL, fmr, "Q1", "c1", "t1", + "susie", useFmr = TRUE)) + local_mocked_bindings(getTopLoci = function(x) data.frame(variant_id = "v1"), + .package = "pecotmr") + expect_null(pecotmr:::.cipExtractWeights(NULL, fmr, "Q1", "c1", "t1", + "susie", useFmr = TRUE)) + local_mocked_bindings( + getTopLoci = function(x) data.frame(variant_id = NA_character_, beta = NA_real_), + .package = "pecotmr") + expect_null(pecotmr:::.cipExtractWeights(NULL, fmr, "Q1", "c1", "t1", + "susie", useFmr = TRUE)) +}) + +test_that(".cipCombineAcrossMethods: a single-method group yields no combined rows", { + gr <- pecotmr:::.cipDfToGranges(data.frame( + qtlStudy = "Q1", context = "c1", trait = "t1", method = "susie", + gwasStudy = "G1", twasZ = 2.0, twasPval = 0.05, waldRatio = NA_real_, + waldRatioSe = NA_real_, mrPval = NA_real_, nIV = NA_integer_, + Q = NA_real_, I2 = NA_real_, nCs = NA_integer_, + chrom = "chr1", startPos = 100L, endPos = 200L, stringsAsFactors = FALSE)) + expect_identical(pecotmr:::.cipCombineAcrossMethods(gr, c("acat")), gr) +}) diff --git a/tests/testthat/test_colocPipeline.R b/tests/testthat/test_colocPipeline.R index e57ce2c3..6f883a18 100644 --- a/tests/testthat/test_colocPipeline.R +++ b/tests/testthat/test_colocPipeline.R @@ -370,3 +370,256 @@ test_that(".colocEmptyResult(enriched=TRUE): includes enrichment + p12Used schem expect_true(all(c("enrichment", "p12Used") %in% colnames(out))) }) + +# =========================================================================== +# Additional coverage (appended) +# =========================================================================== + +# Minimal canonical topLoci (variant_id + pip) for skeletal entries. +.cp_tl <- function(vids, pip = rep(0.1, length(vids))) { + data.frame(variant_id = as.character(vids), + pip = pip, + stringsAsFactors = FALSE) +} + +# --- enrichment= argument validation --------------------------------------- + +test_that("colocPipeline: rejects a non-data.frame enrichment", { + qfmr <- .cp_makeQtlFmr() + gfmr <- .cp_makeGwasFmr() + expect_error( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + enrichment = "not a data frame"), + "must be a data.frame") +}) + +test_that("colocPipeline: rejects enrichment missing required columns", { + qfmr <- .cp_makeQtlFmr() + gfmr <- .cp_makeGwasFmr() + bad <- data.frame(gwasStudy = "G1", qtlStudy = "Q1", + stringsAsFactors = FALSE) # missing qtlContext + enrichment + expect_error( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + enrichment = bad), + "is missing column") +}) + +# --- enrichment end-to-end (lookup + p12 scaling + output columns) ---------- + +test_that("colocPipeline: enrichment hit scales p12 and emits enrichment/p12Used", { + qfmr <- .cp_makeQtlFmr() # Q1 / c1 / t1 / susie + gfmr <- .cp_makeGwasFmr() # G1 / susie + enr <- data.frame(gwasStudy = "G1", + qtlStudy = "Q1", + qtlContext = "c1", + enrichment = 2.0, + stringsAsFactors = FALSE) + local_mocked_bindings(coloc.bf_bf = .cp_mockColocBfBf(), .package = "coloc") + out <- suppressWarnings( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + enrichment = enr)) + expect_equal(nrow(out), 1L) + expect_true(all(c("enrichment", "p12Used") %in% colnames(out))) + expect_equal(unique(out$enrichment), 2.0) + # min(p12 * (1 + enrichment), p12Max) = min(5e-6 * 3, 1e-3) = 1.5e-5 + expect_equal(unique(out$p12Used), min(5e-6 * 3, 1e-3)) +}) + +test_that("colocPipeline: enrichment miss warns and falls back to baseline p12", { + qfmr <- .cp_makeQtlFmr() + gfmr <- .cp_makeGwasFmr() + enr <- data.frame(gwasStudy = "OTHER", # no row matches gwasStudy 'G1' + qtlStudy = "Q1", + qtlContext = "c1", + enrichment = 2.0, + stringsAsFactors = FALSE) + local_mocked_bindings(coloc.bf_bf = .cp_mockColocBfBf(), .package = "coloc") + w <- testthat::capture_warnings( + out <- colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + enrichment = enr)) + expect_match(w, "no enrichment entry", all = FALSE) + expect_equal(nrow(out), 1L) + expect_equal(unique(out$enrichment), 0) # enRow reset to 0 on miss + expect_equal(unique(out$p12Used), 5e-6) # baseline p12 unchanged +}) + +# --- returnGwasFineMapping attached on the empty-pair early return (~205) ---- + +test_that("colocPipeline: attaches gwasFineMapping when no pairs survive", { + qfmr <- .cp_makeQtlFmr() + gss <- .cp_makeGwasSumstats() + # Resolved GWAS FMR whose only entry has no usable LBF (V filtered to 0 + # rows) -> .colocPreextractGwasLbf returns an empty list -> early return. + emptyFit <- list(alpha = matrix(0, 1, 1), pip = c(v1 = 0), V = 0, + lbf_variable = matrix(NA_real_, 1, 1)) + e <- FineMappingEntry(variantIds = "v1", susieFit = emptyFit, + topLoci = .cp_tl("v1", pip = 0)) + resolved <- GwasFineMappingResult(study = "G1", method = "susie", + entry = list(e), + ldSketch = .cp_makeHandle()) + local_mocked_bindings(coloc.bf_bf = .cp_mockColocBfBf(), .package = "coloc") + local_mocked_bindings( + fineMappingPipeline = function(data, methods, ...) resolved, + .package = "pecotmr") + out <- suppressWarnings( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gss, + returnGwasFineMapping = TRUE, + adjustPips = FALSE)) + expect_equal(nrow(out), 0L) + expect_identical(attr(out, "gwasFineMapping"), resolved) +}) + +# --- per-pair loop skips: no usable QTL LBF (~223) and no overlap (~233) ----- + +test_that("colocPipeline: skips a QTL entry with no usable LBF", { + badFit <- list(alpha = matrix(0, 1, 1), pip = c(v1 = 0), V = 0, + lbf_variable = matrix(NA_real_, 1, 1)) + badEntry <- FineMappingEntry(variantIds = "v1", susieFit = badFit, + topLoci = .cp_tl("v1", pip = 0)) + goodEntry <- .cp_makeFmEntry() + qfmr <- .cp_makeQtlFmr(tuples = list(c("Q1", "c1", "t1", "susie"), + c("Q1", "c2", "t1", "susie")), + entries = list(badEntry, goodEntry)) + gfmr <- .cp_makeGwasFmr() + local_mocked_bindings(coloc.bf_bf = .cp_mockColocBfBf(), .package = "coloc") + out <- suppressWarnings( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + adjustPips = FALSE)) + # bad entry (c1) skipped; only the good entry (c2) yields a row. + expect_equal(nrow(out), 1L) + expect_equal(unique(out$context), "c2") +}) + +test_that("colocPipeline: skips a pair with no shared variants", { + qEntry <- .cp_makeFmEntry(variant_ids = paste0("chr1:", 100 * (1:5), ":A:G")) + gEntry <- .cp_makeFmEntry(variant_ids = paste0("chr2:", 100 * (1:5), ":A:G")) + qfmr <- .cp_makeQtlFmr(entries = list(qEntry)) + gfmr <- .cp_makeGwasFmr(entries = list(gEntry)) + local_mocked_bindings(coloc.bf_bf = .cp_mockColocBfBf(), .package = "coloc") + out <- suppressWarnings( + colocPipeline(qtlFineMappingResult = qfmr, + gwasInput = gfmr, + adjustPips = FALSE)) + expect_equal(nrow(out), 0L) +}) + +# --- .colocFilterCsByConcentration (~324-331) ------------------------------- + +test_that(".colocFilterCsByConcentration: keeps narrow CS, drops diffuse ones", { + fit <- list(alpha = matrix(0.1, nrow = 3, ncol = 10)) # 10 variants + # maxSize = ncol * coverage * concentration = 10 * 0.5 * 0.5 = 2.5 + local_mocked_bindings( + susie_get_cs = function(s, coverage = 0.5, dedup = TRUE, ...) + list(cs = list(L1 = c(1L, 2L), # size 2 -> keep + L2 = 1:8, # size 8 -> drop + L3 = 3L)), # size 1 -> keep + .package = "pecotmr") + keep <- pecotmr:::.colocFilterCsByConcentration(fit, coverage = 0.5, + concentration = 0.5) + expect_setequal(keep, c(1, 3)) +}) + +# --- .colocExtractLbfFromEntry branches ------------------------------------- + +test_that(".colocExtractLbfFromEntry: stacks fSuSiE lBF list into a matrix", { + m1 <- matrix(rnorm(8), 2, 4, dimnames = list(NULL, paste0("v", 1:4))) + m2 <- matrix(rnorm(8), 2, 4, dimnames = list(NULL, paste0("v", 1:4))) + fit <- list(fsusie_result = list(lBF = list(m1, m2))) + e <- FineMappingEntry(variantIds = paste0("v", 1:4), susieFit = fit, + topLoci = .cp_tl(paste0("v", 1:4))) + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9) + expect_equal(nrow(out$lbf), 4L) # 2 + 2 stacked + expect_setequal(colnames(out$lbf), paste0("v", 1:4)) +}) + +test_that(".colocExtractLbfFromEntry: stacks nested fSuSiE lBF (fit[[1]] path)", { + m1 <- matrix(rnorm(8), 2, 4, dimnames = list(NULL, paste0("v", 1:4))) + fit <- list(list(fsusie_result = list(lBF = list(m1)))) + e <- FineMappingEntry(variantIds = paste0("v", 1:4), susieFit = fit, + topLoci = .cp_tl(paste0("v", 1:4))) + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9) + expect_equal(nrow(out$lbf), 2L) +}) + +test_that(".colocExtractLbfFromEntry: warns + NULL when fit carries no LBF slot", { + fit <- list(notLbf = list(a = 1)) # fit[[1]] is a list -> $ access is safe + e <- FineMappingEntry(variantIds = "v1", susieFit = fit, + topLoci = .cp_tl("v1")) + expect_warning( + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9), + "no lbf_variable") + expect_null(out) +}) + +test_that(".colocExtractLbfFromEntry: warns + NULL on an empty LBF matrix", { + fit <- list(lbf_variable = matrix(numeric(0), nrow = 0, ncol = 3, + dimnames = list(NULL, paste0("v", 1:3)))) + e <- FineMappingEntry(variantIds = paste0("v", 1:3), susieFit = fit, + topLoci = .cp_tl(paste0("v", 1:3))) + expect_warning( + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9), + "LBF matrix is empty") + expect_null(out) +}) + +test_that(".colocExtractLbfFromEntry: secondary CS filter subsets rows", { + fit <- list(lbf_variable = matrix(1:12, 3, 4, + dimnames = list(NULL, paste0("v", 1:4))), + alpha = matrix(0.25, 3, 4)) + e <- FineMappingEntry(variantIds = paste0("v", 1:4), susieFit = fit, + topLoci = .cp_tl(paste0("v", 1:4))) + local_mocked_bindings( + .colocFilterCsByConcentration = function(fit, coverage, concentration) 2L, + .package = "pecotmr") + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = 0.95, + filterLbfCsConcentration = 0.5, priorTol = 1e-9) + expect_equal(nrow(out$lbf), 1L) + expect_equal(as.numeric(out$lbf), c(2, 5, 8, 11)) # row 2 of matrix(1:12, 3, 4) +}) + +test_that(".colocExtractLbfFromEntry: assigns colnames from variantIds when fit lacks them", { + fit <- list(lbf_variable = matrix(1:8, 2, 4)) # no dimnames + e <- FineMappingEntry(variantIds = paste0("v", 1:4), susieFit = fit, + topLoci = .cp_tl(paste0("v", 1:4))) + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9) + expect_equal(colnames(out$lbf), paste0("v", 1:4)) +}) + +test_that(".colocExtractLbfFromEntry: NULL when every variant column is NA-named", { + fit <- list(lbf_variable = matrix(1:8, 2, 4, + dimnames = list(NULL, rep(NA_character_, 4)))) + # variantIds length (1) != ncol (4) so colnames are NOT back-filled, and the + # NA-named columns are dropped, leaving a 0-column matrix. + e <- FineMappingEntry(variantIds = "v1", susieFit = fit, + topLoci = .cp_tl("v1")) + out <- pecotmr:::.colocExtractLbfFromEntry( + e, filterLbfCs = FALSE, filterLbfCsSecondary = NULL, + filterLbfCsConcentration = 0.5, priorTol = 1e-9) + expect_null(out) +}) + +# --- .colocAlignLbf no-overlap branch (~479) -------------------------------- + +test_that(".colocAlignLbf: returns NULL when there are no shared variants", { + q <- matrix(0, 2, 3, dimnames = list(NULL, paste0("chr1:", 100 * (1:3), ":A:G"))) + g <- matrix(0, 2, 3, dimnames = list(NULL, paste0("chr2:", 100 * (1:3), ":A:G"))) + expect_null(pecotmr:::.colocAlignLbf(q, g)) +}) + diff --git a/tests/testthat/test_colocboostPipeline.R b/tests/testthat/test_colocboostPipeline.R index d0cc7469..edabd365 100644 --- a/tests/testthat/test_colocboostPipeline.R +++ b/tests/testthat/test_colocboostPipeline.R @@ -492,3 +492,125 @@ test_that(".cbResolveCutoff: scalar applies to all; named vector is per-context" expect_equal(pecotmr:::.cbResolveCutoff(c(brain = 0.3), "missing"), 0) expect_equal(pecotmr:::.cbResolveCutoff(NULL, "brain"), 0) }) + +# =========================================================================== +# Additional coverage: MultiStudy method, engine-failure path, multi-context +# bundle building, and sumstat-bundle helper early returns. +# =========================================================================== + +.cbp_makeMultiStudy <- function() MultiStudyQtlDataset( + qtlDatasets = list(study1 = .cbp_makeQtlDataset(contexts = "brain", + traits = "ENSG_A")), + sumStats = .cbp_makeQtlSumStats()) + +test_that("colocboostPipeline(MultiStudyQtlDataset): combines per-study bundles + embedded sumstats", { + mt <- .cbp_makeMultiStudy() + capturedArgs <- NULL + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + local_mocked_bindings( + colocboost = function(...) { capturedArgs <<- list(...); list(stub = TRUE) }, + .package = "colocboost") + out <- suppressMessages(colocboostPipeline(mt, xqtlColoc = TRUE, + jointGwas = FALSE, separateGwas = FALSE)) + expect_type(out, "list") + expect_true(!is.null(out$xqtl_coloc)) + # The individual study's outcome is prefixed "study1:" in the combined bundle. + expect_true(any(grepl("study1:", names(capturedArgs$Y)))) +}) + +test_that(".cbRun: an engine failure is caught -> message + NULL", { + qd <- .cbp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + local_mocked_bindings(colocboost = function(...) stop("engine boom"), + .package = "colocboost") + out <- suppressMessages(colocboostPipeline(qd, xqtlColoc = TRUE)) + expect_null(out$xqtl_coloc) # .cbRun caught (123-124) +}) + +test_that(".cbIndividualBundle: multi-context bundle names + prefixes outcomes", { + qd <- .cbp_makeQtlDataset(contexts = c("brain", "liver"), traits = "ENSG_A") + capturedArgs <- NULL + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + local_mocked_bindings( + colocboost = function(...) { capturedArgs <<- list(...); list(stub = TRUE) }, + .package = "colocboost") + out <- suppressMessages(colocboostPipeline(qd, xqtlColoc = TRUE)) + expect_true(!is.null(out$xqtl_coloc)) + # Two contexts -> two context-prefixed outcomes (covers the xMatch + naming). + expect_gte(length(capturedArgs$Y), 2L) +}) + +test_that("colocboostPipeline(QtlDataset): unknown context errors", { + qd <- .cbp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + expect_error(colocboostPipeline(qd, contexts = "ghost", xqtlColoc = TRUE), + "Unknown context") # 209 +}) + +test_that("colocboostPipeline(QtlDataset): pipCutoffToSkip dropping every outcome -> empty", { + qd <- .cbp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + out <- suppressMessages(suppressWarnings( + colocboostPipeline(qd, xqtlColoc = TRUE, pipCutoffToSkip = 0.9999))) + expect_null(out$xqtl_coloc) # 249-253 skip -> empty +}) + +test_that(".cbPipSkipOutcomes: an outcome with < 2 observations is skipped", { + set.seed(4) + X <- matrix(rnorm(60), 30, 2, dimnames = list(paste0("s", 1:30), c("v1", "v2"))) + Y <- cbind(a = c(1, rep(NA, 29)), b = rnorm(30)) # col a: 1 obs (< 2) + res <- pecotmr:::.cbPipSkipOutcomes(X, Y, cutoff = 0.5) + expect_true(is.null(res) || is.matrix(res)) # col a -> next (180) +}) + +test_that(".cbSumstatPair: NULL / empty df -> NULL", { + expect_null(pecotmr:::.cbSumstatPair(NULL, .cbp_makeHandle())) # 317 + expect_null(pecotmr:::.cbSumstatPair(data.frame(), .cbp_makeHandle())) +}) + +test_that(".cbQtlSumStatsBundle: NULL / context / trait filters and empty result", { + ss <- .cbp_makeQtlSumStats() + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + expect_equal(pecotmr:::.cbQtlSumStatsBundle(NULL), list()) # 359 + expect_length(pecotmr:::.cbQtlSumStatsBundle(ss, contexts = "c1"), 1L) # 363 + expect_length(pecotmr:::.cbQtlSumStatsBundle(ss, traitId = "t1"), 1L) # 366 + expect_equal(pecotmr:::.cbQtlSumStatsBundle(ss, contexts = "ghost"), list()) # 368 +}) + +test_that(".cbGwasSumStatsBundle: NULL -> empty list", { + expect_equal(pecotmr:::.cbGwasSumStatsBundle(NULL), list()) # 390 +}) + +test_that(".cbSumstatPair: varY attaches var_y; NA variant ids fall back to chr:pos", { + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + h <- .cbp_makeHandle() # panel SNPs v1..v6 + df <- data.frame(variant_id = c("v1", "v2", "v3"), z = c(1, -1, 0.5), + N = rep(1000, 3), stringsAsFactors = FALSE) + pair <- pecotmr:::.cbSumstatPair(df, h, varY = 0.7) + expect_true("var_y" %in% names(pair$sumstat)) # 350 + expect_equal(unique(pair$sumstat$var_y), 0.7) + # NA variant_id -> formatVariantId fallback (322-323); no panel overlap -> NULL (330) + dfNA <- data.frame(variant_id = NA_character_, chrom = "chr1", pos = 999999L, + A2 = "G", A1 = "A", z = 1, N = 1000, stringsAsFactors = FALSE) + expect_null(pecotmr:::.cbSumstatPair(dfNA, h)) +}) + +test_that("colocboostPipeline(MultiStudyQtlDataset): a study with no usable bundle is skipped", { + mt <- .cbp_makeMultiStudy() # qd (study1, ENSG_A) + ss (Q1, t1) + local_mocked_bindings(extractBlockGenotypes = .cbp_mockExtractor(), + .package = "pecotmr") + local_mocked_bindings(colocboost = function(...) list(stub = TRUE), + .package = "colocboost") + # traitId="t1" matches the embedded sumstats but not the QtlDataset -> its + # per-study bundle is NULL and skipped (684); the sumstat side still runs. + out <- suppressMessages(suppressWarnings( + colocboostPipeline(mt, traitId = "t1", xqtlColoc = TRUE))) + expect_type(out, "list") +}) diff --git a/tests/testthat/test_ctwasPipeline.R b/tests/testthat/test_ctwasPipeline.R index ba36f65a..8d6a4492 100644 --- a/tests/testthat/test_ctwasPipeline.R +++ b/tests/testthat/test_ctwasPipeline.R @@ -1119,3 +1119,296 @@ test_that("mergeCtwasBoundaryRegions: no-LD path used when LD loaders are absent out <- mergeCtwasBoundaryRegions(fmr) expect_equal(called, "noLD") }) + +# =========================================================================== +# assembleCtwasInputs: remaining input-validation branches +# --------------------------------------------------------------------------- +# These fire before any heavy panel work; they go through the ctwas-gated +# entry point, so skip when ctwas is absent (otherwise the requireNamespace +# guard would surface a different error). +# =========================================================================== + +test_that("assembleCtwasInputs: rejects an unnamed gwasSumStats list", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + tw <- .ctp_makeTwasWeights() + expect_error( + assembleCtwasInputs(gwasSumStats = list(ss, ss), # unnamed + twasWeights = list(block1 = tw)), + "named list keyed by region_id") +}) + +test_that("assembleCtwasInputs: rejects an unnamed twasWeights list", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + tw <- .ctp_makeTwasWeights() + expect_error( + assembleCtwasInputs(gwasSumStats = list(block1 = ss, block2 = ss), + twasWeights = list(tw)), # unnamed + "named list keyed by region_id") +}) + +test_that("assembleCtwasInputs: rejects a non-GwasSumStats list entry", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + tw <- .ctp_makeTwasWeights() + expect_error( + assembleCtwasInputs( + gwasSumStats = list(block1 = ss, block2 = "not a GwasSumStats"), + twasWeights = list(block1 = tw)), + "is not a GwasSumStats") +}) + +test_that("assembleCtwasInputs: rejects a non-TwasWeights list entry", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + expect_error( + assembleCtwasInputs( + gwasSumStats = list(block1 = ss, block2 = ss), + twasWeights = list(block1 = "not a TwasWeights")), + "is not a TwasWeights") +}) + +test_that("assembleCtwasInputs: rejects a non-FineMappingResultBase fineMappingResult", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + tw <- .ctp_makeTwasWeights() + expect_error( + assembleCtwasInputs( + gwasSumStats = list(block1 = ss, block2 = ss), + twasWeights = list(block1 = tw), + fineMappingResult = "not a FineMappingResult"), + "must be a FineMappingResultBase") +}) + +test_that("assembleCtwasInputs: skips a block whose TwasWeights lacks the resolved method", { + skip_if_not_installed("ctwas") + ss <- .ctp_makeGwasSumstats() + ids5 <- vapply(1:5, .ctp_snpId, character(1)) + mkTw <- function(m) TwasWeights( + study = "Q1", context = "c1", trait = "t1", method = m, + entry = list(TwasWeightsEntry(variantIds = ids5, + weights = c(0.1, 0.2, 0.3, 0.4, 0.5))), + ldSketch = .ctp_makeHandle()) + local_mocked_bindings(extractBlockGenotypes = .ctp_mockExtractor(), + .package = "pecotmr") + # Resolved method is "ensemble" (present in block1); block2 carries only + # "mrash", so .ctwasFilterMethod returns NULL and the block is skipped + # in the second pass (the `if (is.null(twMethod)) next` branch). + inputs <- assembleCtwasInputs( + gwasSumStats = list(block1 = ss, block2 = ss), + twasWeights = list(block1 = mkTw("ensemble"), block2 = mkTw("mrash"))) + expect_length(inputs$weights, 1L) + expect_true(all(grepl("^block1\\|", names(inputs$weights)))) + expect_false(any(grepl("^block2\\|", names(inputs$weights)))) +}) + +# =========================================================================== +# .ctwasResolveMethod / .ctwasFilterMethod edge branches +# =========================================================================== + +test_that(".ctwasResolveMethod: errors when no method entries exist", { + expect_error(pecotmr:::.ctwasResolveMethod(list()), + "no method entries") +}) + +test_that(".ctwasFilterMethod: returns NULL when no row matches the method", { + tw <- .ctp_makeTwasWeights() # method == "susie" + expect_null(pecotmr:::.ctwasFilterMethod(tw, "mrash")) +}) + +# =========================================================================== +# .ctwasHarmonizeWeights / .ctwasRenormalizeSusieWeights early NULL returns +# =========================================================================== + +test_that(".ctwasHarmonizeWeights: returns NULL when there is nothing to parse", { + panel <- .ctp_makeAllelePanel() + refVariants <- data.frame( + chrom = panel$snpInfo$chrom, pos = panel$snpInfo$pos, + A2 = panel$snpInfo$ref, A1 = panel$snpInfo$alt, + variant_id = panel$snpInfo$id, stringsAsFactors = FALSE) + expect_null(pecotmr:::.ctwasHarmonizeWeights( + origVids = character(0), origW = numeric(0), refVariants = refVariants)) +}) + +test_that(".ctwasHarmonizeWeights: returns NULL when .matchRefPanel fails", { + local_mocked_bindings(.matchRefPanel = function(...) NULL, + .package = "pecotmr") + panel <- .ctp_makeAllelePanel() + refVariants <- data.frame( + chrom = panel$snpInfo$chrom, pos = panel$snpInfo$pos, + A2 = panel$snpInfo$ref, A1 = panel$snpInfo$alt, + variant_id = panel$snpInfo$id, stringsAsFactors = FALSE) + expect_null(pecotmr:::.ctwasHarmonizeWeights( + origVids = "1:100:C:T", origW = 0.5, refVariants = refVariants)) +}) + +test_that(".ctwasRenormalizeSusieWeights: returns NULL when fit components are NULL", { + fits <- list(lbf_variable = NULL, + mu = matrix(0, 2, 3), + X_column_scale_factors = rep(1, 3)) + expect_null(pecotmr:::.ctwasRenormalizeSusieWeights( + fits, origVids = paste0("v", 1:3), origW = rep(0.1, 3), + keptIdx = 1:3, harmonizedW = rep(0.1, 3))) +}) + +# =========================================================================== +# .ctwasBuildWeights: per-gene skip branches + variance / renorm paths +# =========================================================================== + +test_that(".ctwasBuildWeights: skips a gene with mismatched variantIds/weights lengths", { + ent <- TwasWeightsEntry( + variantIds = vapply(1:5, .ctp_snpId, character(1)), + weights = c(0.1, 0.2, 0.3)) # vector length 3 != 5 ids + tw <- TwasWeights(study = "Q1", context = "c1", trait = "t1", + method = "susie", entry = list(ent), + ldSketch = .ctp_makeHandle()) + expect_length(pecotmr:::.ctwasBuildWeights(tw, .ctp_makeLdPanel()), 0L) +}) + +test_that(".ctwasBuildWeights: skips a gene when harmonization yields nothing", { + local_mocked_bindings(.ctwasHarmonizeWeights = function(...) NULL, + .package = "pecotmr") + expect_length( + pecotmr:::.ctwasBuildWeights(.ctp_makeTwasWeights(), .ctp_makeLdPanel()), + 0L) +}) + +test_that(".ctwasBuildWeights: skips a gene when no variant survives gwasSnpIds intersect", { + expect_length( + pecotmr:::.ctwasBuildWeights( + .ctp_makeTwasWeights(), .ctp_makeLdPanel(), + gwasSnpIds = .ctp_snpId(6)), # gene covers ids 1..5 only + 0L) +}) + +test_that(".ctwasBuildWeights: SuSiE renormalization fires when variants are dropped", { + panel <- .ctp_makeLdPanel() + ids4 <- vapply(1:4, .ctp_snpId, character(1)) + bogus <- "chr1:99900:G:A" # absent from the 6-SNP panel + # Fit dims line up with the 5 original variants. Two single effects, + # each concentrated on one of the first two variants; lbfToAlpha + # softmaxes to ~identity rows, so the renormalized weight over the 4 + # kept columns mirrors mu: w[v1]=mu[1,1]=1, w[v2]=mu[2,2]=7, rest ~0. + fits <- list( + lbf_variable = rbind(c( 100, -100, -100, -100, -100), + c(-100, 100, -100, -100, -100)), + mu = rbind(c(1, 2, 3, 4, 5), + c(6, 7, 8, 9, 10)), + X_column_scale_factors = rep(1, 5L)) + ent <- TwasWeightsEntry( + variantIds = c(ids4, bogus), + weights = c(0.1, 0.2, 0.3, 0.4, 0.5), + fits = fits) + tw <- TwasWeights(study = "Q1", context = "c1", trait = "t1", + method = "susie", entry = list(ent), + ldSketch = .ctp_makeHandle()) + wl <- pecotmr:::.ctwasBuildWeights(tw, panel) + expect_equal(wl[[1L]]$n_wgt, 4L) + expect_equal(unname(wl[[1L]]$wgt[ids4, 1L]), c(1, 7, 0, 0), + tolerance = 1e-6) +}) + +test_that(".ctwasBuildWeights: errors when the LD panel lacks a kept variant's variance", { + panel <- .ctp_makeLdPanel() + panel$variance <- panel$variance[1:4] # drop variance for ids 5, 6 + expect_error( + pecotmr:::.ctwasBuildWeights(.ctp_makeTwasWeights(), panel), + "missing genotype variance") +}) + +test_that(".ctwasBuildWeights: skips a gene when the filter removes every variant", { + expect_length( + pecotmr:::.ctwasBuildWeights( + .ctp_makeTwasWeights(), .ctp_makeLdPanel(), + twasWeightCutoff = 1.0), # |w| max 0.3 -> all dropped + 0L) +}) + +# =========================================================================== +# .ctwasGetFinemapAux — PIP + credible-set membership / purity extraction +# =========================================================================== + +test_that(".ctwasGetFinemapAux: parses pip + cs_95 membership + purity", { + tl <- data.frame( + variant_id = c("chr1:100:G:A", "chr1:200:G:A", + "chr1:300:G:A", "chr1:400:G:A"), + pip = c(0.3, 0.6, 0.8, 0.02), + cs_95 = c("susie_1", "susie_1", "susie_2", "susie_0"), + cs_95_purity = c(0.95, 0.95, 0.7, NA), + stringsAsFactors = FALSE) + fmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(FineMappingEntry(tl$variant_id, NULL, tl))) + aux <- pecotmr:::.ctwasGetFinemapAux(fmr, "Q1", "c1", "t1", "susie") + expect_equal(aux$pip[["chr1:200:G:A"]], 0.6) + expect_length(aux$csMembers, 2L) + expect_setequal(aux$csMembers[[1L]], c("chr1:100:G:A", "chr1:200:G:A")) + expect_setequal(aux$csMembers[[2L]], "chr1:300:G:A") + expect_equal(aux$csPurity, c(0.95, 0.7)) +}) + +test_that(".ctwasGetFinemapAux: cs_95 without a purity column yields NA purity", { + tl <- data.frame( + variant_id = c("v1", "v2", "v3"), + pip = c(0.1, 0.5, 0.9), + cs_95 = c("susie_1", "susie_1", "susie_0"), + stringsAsFactors = FALSE) + fmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(FineMappingEntry(tl$variant_id, NULL, tl))) + aux <- pecotmr:::.ctwasGetFinemapAux(fmr, "Q1", "c1", "t1", "susie") + expect_length(aux$csMembers, 1L) + expect_setequal(aux$csMembers[[1L]], c("v1", "v2")) + expect_true(all(is.na(aux$csPurity))) +}) + +test_that(".ctwasGetFinemapAux: no cs_95 column yields empty CS membership", { + tl <- data.frame(variant_id = c("v1", "v2"), pip = c(0.2, 0.8), + stringsAsFactors = FALSE) + fmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(FineMappingEntry(tl$variant_id, NULL, tl))) + aux <- pecotmr:::.ctwasGetFinemapAux(fmr, "Q1", "c1", "t1", "susie") + expect_length(aux$csMembers, 0L) + expect_length(aux$csPurity, 0L) + expect_equal(aux$pip[["v2"]], 0.8) +}) + +test_that(".ctwasGetFinemapAux: NULL input, no-match tuple, and empty topLoci all return NULL", { + expect_null(pecotmr:::.ctwasGetFinemapAux(NULL, "Q1", "c1", "t1", "susie")) + tl <- data.frame(variant_id = "v1", pip = 0.5, stringsAsFactors = FALSE) + fmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(FineMappingEntry(tl$variant_id, NULL, tl))) + # No matching (study, context, trait, method) tuple -> NULL. + expect_null(pecotmr:::.ctwasGetFinemapAux(fmr, "NOPE", "c1", "t1", "susie")) + # Matching tuple but an empty topLoci -> NULL. + fmrEmpty <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(FineMappingEntry(character(0), NULL, data.frame()))) + expect_null(pecotmr:::.ctwasGetFinemapAux(fmrEmpty, "Q1", "c1", "t1", "susie")) +}) + +# =========================================================================== +# .ctwasFilterVariants / .ctwasSnpInfoForGwasBlock early returns +# =========================================================================== + +test_that(".ctwasFilterVariants: returns NULL for an empty variant set", { + expect_null(pecotmr:::.ctwasFilterVariants( + vids = character(0), w = numeric(0), finemapAux = NULL, + twasWeightCutoff = 0, csMinCor = 0.8, + minPipCutoff = 0, maxNumVariants = Inf)) +}) + +test_that(".ctwasSnpInfoForGwasBlock: returns an empty frame when the block has no SNP ids", { + # GRanges entry with no mcols -> no SNP column -> blockIds is empty. + gr <- GenomicRanges::GRanges("chr1", IRanges::IRanges(100L, width = 1L)) + gss <- GwasSumStats(study = "G1", entry = list(gr), genome = "hg19", + ldSketch = .ctp_makeHandle(), qcInfo = list(step1 = "ok")) + panelInfo <- data.frame(chrom = 1L, id = "chr1:100:G:A", pos = 100L, + alt = "A", ref = "G", stringsAsFactors = FALSE) + out <- pecotmr:::.ctwasSnpInfoForGwasBlock(gss, panelInfo) + expect_equal(nrow(out), 0L) + expect_setequal(colnames(out), colnames(panelInfo)) +}) diff --git a/tests/testthat/test_fineMappingPipeline.R b/tests/testthat/test_fineMappingPipeline.R index dfce0a2d..8772114e 100644 --- a/tests/testthat/test_fineMappingPipeline.R +++ b/tests/testthat/test_fineMappingPipeline.R @@ -138,7 +138,7 @@ context("fineMappingPipeline") .fmp_mockPostprocess <- function() { function(fit, method, dataX, dataY, coverage, secondaryCoverage, signalCutoff, minAbsCorr, csInput = NULL, af = NULL, - region = NULL) { + region = NULL, conditionIdx = NULL) { # Capture the requesting method on the FineMappingEntry so the test can # verify the right dispatch happened. if (is.matrix(dataX)) { @@ -172,20 +172,34 @@ test_that(".fmNormalizeMethods: rejects NULL / empty / non-character/list", { "character vector or") }) -test_that(".fmNormalizeMethods: char-vector form deduplicates + empty methodArgs", { +test_that(".fmNormalizeMethods: char-vector form deduplicates + seeds susie L defaults", { res <- pecotmr:::.fmNormalizeMethods(c("susie", "susie", "susieInf")) expect_equal(res$tokens, c("susie", "susieInf")) expect_equal(names(res$methodArgs), c("susie", "susieInf")) - expect_true(all(vapply(res$methodArgs, length, integer(1)) == 0L)) + # SuSiE-family tokens get the pipeline L / L_greedy defaults (pecotmr owns + # these, not the CLI wrappers). + expect_equal(res$methodArgs$susie$L, 20L) + expect_equal(res$methodArgs$susie$L_greedy, 5L) + expect_equal(res$methodArgs$susieInf$L, 20L) + # Non-susie-family tokens are left untouched. + expect_length(pecotmr:::.fmNormalizeMethods("mvsusie")$methodArgs$mvsusie, 0L) }) -test_that(".fmNormalizeMethods: named-list form carries per-method kwargs", { +test_that(".fmNormalizeMethods: named-list keeps kwargs + fills missing susie L", { res <- pecotmr:::.fmNormalizeMethods( list(susie = list(L = 1, refine = FALSE), susieInf = list())) expect_equal(res$tokens, c("susie", "susieInf")) - expect_equal(res$methodArgs$susie, list(L = 1, refine = FALSE)) - expect_equal(res$methodArgs$susieInf, list()) + expect_equal(res$methodArgs$susie$L, 1) # explicit kwarg wins + expect_false(res$methodArgs$susie$refine) + expect_equal(res$methodArgs$susie$L_greedy, 5L) # filled-in default + expect_equal(res$methodArgs$susieInf$L, 20L) # both filled +}) + +test_that(".fmNormalizeMethods: L / Lgreedy args override the susie defaults", { + res <- pecotmr:::.fmNormalizeMethods(c("susie"), L = 30L, Lgreedy = 7L) + expect_equal(res$methodArgs$susie$L, 30L) + expect_equal(res$methodArgs$susie$L_greedy, 7L) }) test_that(".fmNormalizeMethods: list without names errors", { @@ -242,6 +256,20 @@ test_that(".fmCheckMethodCapabilities: mvsusie on GwasSumStats rejected", { ) }) +test_that(".fmTraitsInRegion: keeps genes overlapping the region; NULL keeps all", { + # g1: 1000-1500, g2: 2000-2500, g3: 3000-3500 (width 500). + se <- .fmp_makeSe(traits = c("g1", "g2", "g3"), + starts = c(1000L, 2000L, 3000L)) + r1 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(900L, 1600L)) # g1 only + r12 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(900L, 2600L)) # g1 + g2 + expect_equal(pecotmr:::.fmTraitsInRegion(se, c("g1", "g2", "g3"), r1), "g1") + expect_setequal(pecotmr:::.fmTraitsInRegion(se, c("g1", "g2", "g3"), r12), + c("g1", "g2")) + # NULL region (gene/cisWindow mode) leaves the set unchanged. + expect_setequal(pecotmr:::.fmTraitsInRegion(se, c("g1", "g2", "g3"), NULL), + c("g1", "g2", "g3")) +}) + # =========================================================================== # .fmResolveSusieChain # =========================================================================== @@ -488,7 +516,8 @@ test_that("fineMappingPipeline(QtlDataset): threads directional af into postproc captured$cols <- NULL recordingPostprocess <- function(fit, method, dataX, dataY, coverage, secondaryCoverage, signalCutoff, minAbsCorr, - csInput = NULL, af = NULL, region = NULL) { + csInput = NULL, af = NULL, region = NULL, + conditionIdx = NULL) { captured$af <- af captured$cols <- colnames(dataX) vids <- colnames(dataX) @@ -656,6 +685,86 @@ test_that(".fmLookupMrmashFit: finds the mr.mash fit by (study, trait)", { expect_null(lk(NULL, "S", "G")) # no TwasWeights supplied }) +test_that(".fmLookupMrmashCv: finds the per-fold CV payload by (study, trait)", { + mkEntry <- function(cv) TwasWeightsEntry( + variantIds = c("v1", "v2"), weights = c(0.1, 0.2), cvResult = cv) + cv <- list(samplePartition = data.frame(Sample = "s1", Fold = 1L), + foldFits = list(fold_1 = list(w0 = 1))) + tw <- TwasWeights( + study = c("S", "S"), context = c("c1", "c2"), trait = c("G", "G"), + method = c("mrmash", "mrmash"), entry = list(mkEntry(cv), mkEntry(NULL))) + lk <- function(...) pecotmr:::.fmLookupMrmashCv(...) + expect_identical(lk(tw, "S", "G"), cv) + expect_null(lk(tw, "S", "OTHER")) + expect_null(lk(NULL, "S", "G")) + # A cvResult without foldFits is not a per-fold prior payload -> NULL. + tw2 <- TwasWeights(study = "S", context = "c1", trait = "G", method = "mrmash", + entry = list(mkEntry(list(predictions = 1)))) + expect_null(lk(tw2, "S", "G")) +}) + +test_that(".buildMvsusieReweightedPrior: overrideU swaps matrices, keeps fit w0/V", { + fit <- list(dataDrivenPriorMatrices = list(U = list(K = diag(2)), w = c(K = 1)), + w0 = c(K_grid1 = 1), V = diag(2) * 7) + override <- list(U = list(K = diag(2) * 5)) + captured <- NULL + local_mocked_bindings(rescaleCovW0 = function(w0) c(K = 1), .package = "pecotmr") + local_mocked_bindings( + create_mixture_prior = function(...) { captured <<- list(...); "PRIOR" }, + .package = "mvsusieR") + res <- pecotmr:::.buildMvsusieReweightedPrior(fit, c("c1", "c2"), + overrideU = override) + expect_equal(captured$mixture_prior$matrices$K, diag(2) * 5) # the override U + expect_equal(res$residualVariance, diag(2) * 7) # the fit's own V +}) + +test_that(".fmBuildMvsusiePriorCv: mode B reweights each fold's own fit", { + sp <- data.frame(Sample = paste0("s", 1:6), Fold = rep(1:3, each = 2), + stringsAsFactors = FALSE) + mkFold <- function(uname, v) list( + dataDrivenPriorMatrices = list(U = setNames(list(diag(2)), uname), + w = setNames(0.5, uname)), + w0 = setNames(0.5, paste0(uname, "_grid1")), + V = diag(2) * v) + mvCv <- list(samplePartition = sp, foldFits = list( + fold_1 = mkFold("A", 10), fold_2 = mkFold("B", 20), fold_3 = mkFold("C", 30))) + local_mocked_bindings( + rescaleCovW0 = function(w0) setNames(1, sub("_grid1$", "", names(w0))), + .package = "pecotmr") + local_mocked_bindings( + create_mixture_prior = function(...) "PRIOR", .package = "mvsusieR") + out <- pecotmr:::.fmBuildMvsusiePriorCv(mvCv, fullFitParts = NULL, + conditionNames = c("c1", "c2")) + expect_equal(names(out), c("1", "2", "3")) + expect_equal(out[["1"]]$residualVariance, diag(2) * 10) # fold 1's own V + expect_equal(out[["3"]]$residualVariance, diag(2) * 30) # fold 3's own V +}) + +test_that(".fmBuildMvsusiePriorCv: mode C reuses full-fit w0/V with per-fold U", { + sp <- data.frame(Sample = paste0("s", 1:4), Fold = rep(1:2, each = 2), + stringsAsFactors = FALSE) + full <- list(dataDrivenPriorMatrices = list(U = list(Z = diag(2)), w = c(Z = 1)), + w0 = c(Z_grid1 = 1), V = diag(2) * 99) + # Fold stubs carry only U (no w0) -> mode C: override the full fit's U. + mvCv <- list(samplePartition = sp, foldFits = list( + fold_1 = list(dataDrivenPriorMatrices = list(U = list(Z = diag(2) * 2))), + fold_2 = list(dataDrivenPriorMatrices = list(U = list(Z = diag(2) * 3))))) + captured <- list() + local_mocked_bindings( + rescaleCovW0 = function(w0) setNames(1, sub("_grid1$", "", names(w0))), + .package = "pecotmr") + local_mocked_bindings( + create_mixture_prior = function(...) { + captured[[length(captured) + 1L]] <<- list(...); "PRIOR" }, + .package = "mvsusieR") + out <- pecotmr:::.fmBuildMvsusiePriorCv(mvCv, fullFitParts = full, + conditionNames = c("c1", "c2")) + expect_equal(names(out), c("1", "2")) + expect_equal(out[["1"]]$residualVariance, diag(2) * 99) # full fit's V + expect_equal(captured[[1]]$mixture_prior$matrices$Z, diag(2) * 2) # fold 1's U + expect_equal(captured[[2]]$mixture_prior$matrices$Z, diag(2) * 3) # fold 2's U +}) + test_that("fineMappingPipeline(QtlDataset): pipCutoffToSkip skips no-signal univariate traits", { qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) # Stateful screen: reject the first block (ENSG_A), keep the rest (ENSG_B). @@ -950,6 +1059,99 @@ test_that("fineMappingPipeline(QtlDataset): mvsusie both multi falls back to per fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L)) # 2 contexts * 2 traits = 4 rows (joint fit reused per context). expect_equal(nrow(res), 4L) + # Auto-detection now routes through the joint engine: each per-context group + # is cross-trait, so every row tags its co-fit trait membership. + expect_true("jointTraits" %in% names(res)) + expect_true(all(grepl("ENSG_A;ENSG_B|ENSG_B;ENSG_A", + as.character(res$jointTraits)))) +}) + +test_that("fineMappingPipeline(QtlDataset): multi-trait auto-detection USES the data-driven mr.mash prior", { + # Regression for the original bug: the old multi-trait path hardcoded the + # canonical create_mixture_prior(R=ncol). Routed through the engine, it now + # looks up a prior cross-trait mr.mash fit (study, context fixed; trait + # match-any) and reweights from it. + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + fitParts <- list(dataDrivenPriorMatrices = list(U = list(K = diag(2)), + w = c(K = 1)), + w0 = c(K_grid1 = 1), V = diag(2)) + mkE <- function() TwasWeightsEntry(variantIds = c("v1", "v2"), + weights = c(0.1, 0.2), fits = fitParts) + # What twasWeightsPipeline(jointSpecification='trait') emits: per-trait rows + # each carrying the SHARED joint fit for (study1, brain). + tw <- TwasWeights(study = c("study1", "study1"), context = c("brain", "brain"), + trait = c("ENSG_A", "ENSG_B"), method = c("mrmash", "mrmash"), + entry = list(mkE(), mkE())) + sawMixturePrior <- FALSE + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + rescaleCovW0 = function(w0) c(K = 1), + .package = "pecotmr") + local_mocked_bindings( + mvsusie = function(X, Y, prior_variance, coverage, ...) + list(token = "mvsusie"), + create_mixture_prior = function(...) { + if (!is.null(list(...)$mixture_prior)) sawMixturePrior <<- TRUE + "PRIOR" + }, + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L, + twasWeights = tw)) + expect_equal(nrow(res), 2L) + expect_true(sawMixturePrior) # data-driven prior built, not canonical +}) + +test_that("fineMappingPipeline(QtlDataset): multi-trait without twasWeights keeps the canonical prior", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + sawMixturePrior <- FALSE; sawCanonical <- FALSE + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie = .fmp_mockMvsusie(), + create_mixture_prior = function(...) { + a <- list(...) + if (!is.null(a$mixture_prior)) sawMixturePrior <<- TRUE + if (!is.null(a$R)) sawCanonical <<- TRUE + "PRIOR" + }, + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L)) + expect_false(sawMixturePrior) + expect_true(sawCanonical) +}) + +test_that("fineMappingPipeline(QtlDataset): mvsusie resume cache short-circuits the joint fitter", { + # All conditions of the cross-trait group are already in the prior partial + # result -> the engine reuses the cached entries and never calls mvsusie. + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + cachedEntry <- function() FineMappingEntry( + variantIds = paste0("v", 1:3), + susieFit = list(token = "mvsusie_cached"), + topLoci = data.frame(variant_id = paste0("v", 1:3), + pip = c(0.9, 0.5, 0.1), stringsAsFactors = FALSE)) + cache <- QtlFineMappingResult( + study = c("study1", "study1"), context = c("brain", "brain"), + trait = c("ENSG_A", "ENSG_B"), method = c("mvsusie", "mvsusie"), + entry = list(cachedEntry(), cachedEntry())) + mv_calls <- 0 + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie = function(...) { mv_calls <<- mv_calls + 1L; list() }, + create_mixture_prior = .fmp_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L, + fineMappingResult = cache)) + expect_equal(mv_calls, 0L) # cache hit -> fitter never called + expect_equal(nrow(res), 2L) }) test_that("fineMappingPipeline(QtlDataset): jointSpec='context' produces one joint row per trait", { @@ -967,10 +1169,11 @@ test_that("fineMappingPipeline(QtlDataset): jointSpec='context' produces one joi fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L, jointSpecification = "context")) expect_s4_class(res, "QtlFineMappingResult") - # One joint row per trait (context column collapses to "joint") - expect_equal(nrow(res), 2L) + # Per-context rows: each trait's 2-context joint emits 2 rows (4 total), + # sharing the joint fit; jointContexts tags each with the co-fit membership. + expect_equal(nrow(res), 4L) expect_true("jointContexts" %in% names(res)) - expect_true(all(as.character(res$context) == "joint")) + expect_setequal(as.character(res$context), c("brain", "liver")) expect_setequal(getTraits(res), c("ENSG_A", "ENSG_B")) expect_true(all(grepl("brain;liver|liver;brain", as.character(res$jointContexts)))) @@ -992,14 +1195,15 @@ test_that("fineMappingPipeline(QtlDataset): jointSpec='context' + univariate com fineMappingPipeline(qd, methods = c("susie", "mvsusie"), cisWindow = 1000L, jointSpecification = "context", addSusieInf = FALSE)) - # susie -> 2 univariate rows (one per context); mvsusie -> 1 joint row. - expect_equal(nrow(res), 3L) - expect_equal(sum(as.character(res$method) == "mvsusie"), 1L) + # susie -> 2 univariate rows (one per context); mvsusie joint over 2 contexts + # -> 2 per-context rows sharing the joint fit. 4 rows total. + expect_equal(nrow(res), 4L) + expect_equal(sum(as.character(res$method) == "mvsusie"), 2L) expect_equal(sum(as.character(res$method) == "susie"), 2L) - # Univariate rows have NA in jointContexts; joint row has the membership. + # Univariate rows have NA in jointContexts; both mvsusie rows carry membership. jc <- as.character(res$jointContexts) expect_equal(sum(is.na(jc)), 2L) - expect_equal(sum(!is.na(jc)), 1L) + expect_equal(sum(!is.na(jc)), 2L) }) test_that("fineMappingPipeline(QtlDataset): jointSpec='context' with only one context skips with message", { @@ -1035,10 +1239,10 @@ test_that("fineMappingPipeline(QtlDataset): jointSpec='trait' produces one joint fineMappingPipeline(qd, methods = "mvsusie", cisWindow = 1000L, jointSpecification = "trait")) expect_s4_class(res, "QtlFineMappingResult") - # One joint row per context (trait collapses to "joint") - expect_equal(nrow(res), 2L) + # Per-trait rows: each context's 2-trait joint emits 2 rows (4 total). + expect_equal(nrow(res), 4L) expect_true("jointTraits" %in% names(res)) - expect_true(all(as.character(res$trait) == "joint")) + expect_setequal(as.character(res$trait), c("ENSG_A", "ENSG_B")) expect_setequal(as.character(res$context), c("brain", "liver")) }) @@ -1056,9 +1260,10 @@ test_that("fineMappingPipeline(QtlDataset): jointSpec='trait' with fsusie wires fineMappingPipeline(qd, methods = "fsusie", cisWindow = 1000L, jointSpecification = "trait")) expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$method), "fsusie") - expect_equal(as.character(res$trait), "joint") + # fsusie over 2 traits emits one per-trait row each (shared functional fit). + expect_equal(nrow(res), 2L) + expect_true(all(as.character(res$method) == "fsusie")) + expect_setequal(as.character(res$trait), c("ENSG_A", "ENSG_B")) expect_true("jointTraits" %in% names(res)) }) @@ -1424,6 +1629,28 @@ test_that("fineMappingPipeline(QtlSumStats): mvsusie rejected when every (study, ) }) +test_that("fineMappingPipeline(QtlSumStats): mvsusie auto-detection fits cross-context per (study, trait)", { + # Multi-context (c1, c2) single-trait collection: mvsusie without an explicit + # jointSpecification routes through the joint engine -> per-context rows that + # share the cross-context RSS fit, each tagged with the co-fit membership. + ss <- .fmp_makeMultiCtxQtlSumStats() + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + local_mocked_bindings( + mvsusie_rss = function(Z, R, N, prior_variance, coverage, ...) + list(token = "mvsusie_rss"), + create_mixture_prior = .fmp_mockMixturePrior(), + .package = "mvsusieR") + res <- suppressMessages(fineMappingPipeline(ss, methods = "mvsusie")) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 2L) + expect_setequal(as.character(res$context), c("c1", "c2")) + expect_true("jointContexts" %in% names(res)) + expect_true(all(grepl("c1;c2|c2;c1", as.character(res$jointContexts)))) +}) + test_that("fineMappingPipeline(QtlSumStats): cache hit short-circuits the RSS fitter", { ss <- .fmp_makeQtlSumStats() cachedEntry <- FineMappingEntry( @@ -1903,10 +2130,566 @@ test_that("fineMappingPipeline(QtlDataset): jointSpec + jointRegions=FALSE merge qd, methods = "mvsusie", traitId = c("ENSG_A", "ENSG_B"), region = regions, jointRegions = FALSE, jointSpecification = "context")) expect_s4_class(res, "QtlFineMappingResult") - # cross-context joint -> one row per trait, context collapses to "joint". - expect_equal(nrow(res), 2L) - expect_true(all(as.character(res$context) == "joint")) - fit <- getSusieFit(res, study = "study1", context = "joint", + # cross-context joint -> per-context rows (2 contexts x 2 traits = 4), each + # merged across the 2 regions. + expect_equal(nrow(res), 4L) + expect_setequal(as.character(res$context), c("brain", "liver")) + fit <- getSusieFit(res, study = "study1", context = "brain", trait = "ENSG_A", method = "mvsusie") expect_equal(names(fit), c("region1", "region2")) # merged across regions }) + +# =========================================================================== +# Additional coverage: tractable pure helpers. (The real-fit internals -- +# .fmPostprocessOne, mvsusie/fsusie fold weights, susieInf chaining, the +# top-PC cross-trait path -- are driven by the mocked-fit pipeline tests and +# the actual fitting is left to the external solvers.) +# =========================================================================== + +test_that(".fmCheckMethodCapabilities: empty token list is a no-op", { + expect_null(pecotmr:::.fmCheckMethodCapabilities(character(0), "QtlDataset")) +}) + +test_that(".fmCacheLookupGwas: NULL / non-GwasFineMappingResult -> NULL", { + expect_null(pecotmr:::.fmCacheLookupGwas(NULL, "G1", "susie", "chr1:1-100")) + fmr <- QtlFineMappingResult(study = "S", context = "c1", trait = "t1", + method = "susie", entry = list(FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.9)))) + expect_null(pecotmr:::.fmCacheLookupGwas(fmr, "S", "susie", "chr1:1-100")) +}) + +test_that(".buildMvsusieReweightedPrior: empty reweighted w0 -> canonical(V)", { + local_mocked_bindings(rescaleCovW0 = function(w0) c(zzz = 1), .package = "pecotmr") + local_mocked_bindings(create_mixture_prior = function(...) "PV", + .package = "mvsusieR") + fp <- list(dataDrivenPriorMatrices = list(U = list(compA = diag(2))), + w0 = c(compA_grid1 = 1), V = diag(2)) + res <- pecotmr:::.buildMvsusieReweightedPrior(fp, c("c1", "c2")) + expect_equal(res$residualVariance, diag(2)) # w0 names disjoint from U (749) +}) + +test_that(".fmBuildMvsusiePriorCv: NULL CV -> NULL; NULL fold fits are skipped", { + expect_null(pecotmr:::.fmBuildMvsusiePriorCv(NULL, NULL, c("c1", "c2"))) + local_mocked_bindings(.buildMvsusieReweightedPrior = function(...) "PRIOR", + .package = "pecotmr") + mvCv <- list(samplePartition = data.frame(Sample = paste0("s", 1:4), + Fold = c(1, 1, 2, 2)), + foldFits = list(fold_1 = list(w0 = 1), fold_2 = NULL)) + out <- pecotmr:::.fmBuildMvsusiePriorCv(mvCv, list(w0 = 1, V = diag(2)), + c("c1", "c2")) + expect_equal(out[["1"]], "PRIOR") + expect_null(out[["2"]]) # NULL fold -> next (831) +}) + +test_that(".fmTopPcScores: PCA scores for multi-trait Y; degenerate inputs -> NULL", { + expect_null(pecotmr:::.fmTopPcScores(matrix(1, 5, 1), 2L)) # < 2 traits + expect_null(pecotmr:::.fmTopPcScores(matrix(c(1, NA), 2, 2), 2L)) # < 2 complete + expect_null(pecotmr:::.fmTopPcScores( + cbind(a = rep(1, 4), b = rnorm(4)), 2L)) # < 2 nonzero-var + set.seed(1) + Y <- matrix(rnorm(20), 10, 2, dimnames = list(paste0("s", 1:10), c("t1", "t2"))) + sc <- pecotmr:::.fmTopPcScores(Y, 2L) + expect_equal(dim(sc), c(10L, 2L)) + expect_equal(colnames(sc), c("topPC1", "topPC2")) +}) + +test_that(".fmSerScreen / .fmScreenActive / .fmSerScreenColumns", { + set.seed(2) + X <- matrix(rnorm(40), 20, 2, dimnames = list(paste0("s", 1:20), c("v1", "v2"))) + y <- rnorm(20) + expect_true(pecotmr:::.fmSerScreen(X, y, cutoff = 0)) # disabled + expect_true(pecotmr:::.fmSerScreen(X, c(1, rep(NA, 19)), 0.5)) # < 2 obs (880) + expect_type(pecotmr:::.fmSerScreen(X, y, 0.5), "logical") # real susie fit + local_mocked_bindings(susie = function(...) stop("boom"), .package = "susieR") + expect_true(pecotmr:::.fmSerScreen(X, y, 0.5)) # fit fails -> keep (886) + expect_false(pecotmr:::.fmScreenActive(0)) + expect_true(pecotmr:::.fmScreenActive(0.5)) + expect_length(pecotmr:::.fmSerScreenColumns(X, matrix(rnorm(40), 20, 2), 0), 2L) +}) + +test_that(".fmMergeEntries: empty -> NULL; merges per-region entries + relabels CS", { + expect_null(pecotmr:::.fmMergeEntries(list(NULL, NULL))) + e1 <- FineMappingEntry("v1", list(a = 1), + data.frame(variant_id = "v1", pip = 0.9, cs_95 = "susie_1")) + e2 <- FineMappingEntry("v2", list(b = 2), + data.frame(variant_id = "v2", pip = 0.8, cs_95 = "susie_1")) + m <- pecotmr:::.fmMergeEntries(list(e1, e2)) + expect_s4_class(m, "FineMappingEntry") + expect_equal(m@variantIds, c("v1", "v2")) + expect_equal(m@topLoci$cs_95, c("susie_1", "susie_2")) # region2 CS relabelled + expect_equal(names(m@susieFit), c("region1", "region2")) +}) + +test_that(".fmJointBlocks: all-NULL -> NULL; single -> unchanged; many -> merged", { + mkE <- function(v) FineMappingEntry(v, list(), + data.frame(variant_id = v, pip = 0.9)) + expect_null(pecotmr:::.fmJointBlocks(list(1, 2), function(rg) NULL)) + expect_equal(pecotmr:::.fmJointBlocks(list(1), + function(rg) mkE("v1"))@variantIds, "v1") + expect_equal(pecotmr:::.fmJointBlocks(list(1, 2), + function(rg) mkE(paste0("v", rg)))@variantIds, c("v1", "v2")) +}) + +test_that(".fmTwasMethodKey: bare token without adapter returned unchanged", { + expect_equal(pecotmr:::.fmTwasMethodKey("lasso"), "lasso") # no adapter (1170) + expect_equal(pecotmr:::.fmTwasMethodKey("susie"), "susie") # adapter -> stripped +}) + +test_that(".fmCvMetricRow: < 3 usable predictions -> all-NA row", { + expect_true(all(is.na(pecotmr:::.fmCvMetricRow(c(1, 2), c(1, 2))))) # < 3 (1182) + ok <- pecotmr:::.fmCvMetricRow(c(1, 2, 3, 4, 5), c(1.1, 2, 2.9, 4, 5)) + expect_false(is.na(ok[["rsq"]])) +}) + +test_that(".fmSliceCv: NULL cv or missing predicted key -> NULL", { + expect_null(pecotmr:::.fmSliceCv(NULL, "susie")) # 1323 + cv <- list(samplePartition = NULL, + prediction = list(enet_predicted = matrix(0, 1, 1)), + performance = list(enet_performance = matrix(0, 1, 6))) + expect_null(pecotmr:::.fmSliceCv(cv, "susie")) # pk absent (1327) + expect_true("enet_predicted" %in% names(pecotmr:::.fmSliceCv(cv, "enet")$prediction)) +}) + +test_that(".fmAttachCv: NULL entry or NULL cvResult returns the entry unchanged", { + e <- FineMappingEntry("v1", list(), data.frame(variant_id = "v1", pip = 0.9)) + expect_identical(pecotmr:::.fmAttachCv(e, NULL), e) # 1336 + expect_null(pecotmr:::.fmAttachCv(NULL, list(x = 1))) + expect_equal(getCvResult(pecotmr:::.fmAttachCv(e, list(samplePartition = 1))), + list(samplePartition = 1)) +}) + +# ---- jointSpec dispatch branches in the QtlSumStats / MultiStudy methods ----- +# (dispatchers mocked; the real joint fitting is covered in test_jointSpecification.R) + +test_that(".fmTopPcScores: nPCs = 0 -> k < 1 -> NULL", { + set.seed(3) + Y <- matrix(rnorm(20), 10, 2, dimnames = list(paste0("s", 1:10), c("t1", "t2"))) + expect_null(pecotmr:::.fmTopPcScores(Y, 0L)) # k < 1 (858) +}) + +test_that("fineMappingPipeline(QtlSumStats): mvsusie-only jointSpec returns the joint result", { + ss <- .fmp_makeQtlSumStats() + jr <- QtlFineMappingResult(study = "Q1", context = "c1", trait = "t1", + method = "mvsusie", entry = list(FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.9)))) + local_mocked_bindings(.fmDispatchJointSpecsQtlSumStats = function(...) jr, + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "mvsusie", jointSpecification = "context")) + expect_s4_class(res, "QtlFineMappingResult") + expect_setequal(as.character(res$method), "mvsusie") # 1843-1857 +}) + +test_that("fineMappingPipeline(QtlSumStats): mvsusie-only jointSpec with no fits -> error", { + ss <- .fmp_makeQtlSumStats() + local_mocked_bindings(.fmDispatchJointSpecsQtlSumStats = function(...) NULL, + .package = "pecotmr") + expect_error( + suppressMessages(fineMappingPipeline(ss, methods = "mvsusie", + jointSpecification = "context")), + "no joint fits produced") +}) + +.fmp_makeMultiStudy <- function() MultiStudyQtlDataset( + qtlDatasets = list(study1 = .fmp_makeQtlDataset(contexts = "brain", + traits = "ENSG_A")), + sumStats = .fmp_makeQtlSumStats()) + +test_that("fineMappingPipeline(MultiStudyQtlDataset): region + cisWindow is rejected", { + expect_error( + fineMappingPipeline(.fmp_makeMultiStudy(), methods = "mvsusie", + region = GenomicRanges::GRanges("chr1", + IRanges::IRanges(1, 100)), + cisWindow = 1000L), + "specify either") # 1694 +}) + +test_that("fineMappingPipeline(MultiStudyQtlDataset): mvsusie-only jointSpec returns the joint result", { + mt <- .fmp_makeMultiStudy() + jr <- QtlFineMappingResult(study = "study1", context = "brain", trait = "ENSG_A", + method = "mvsusie", entry = list(FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.9)))) + local_mocked_bindings(.fmDispatchJointSpecsMultiStudy = function(...) jr, + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(mt, methods = "mvsusie", jointSpecification = "context")) + expect_s4_class(res, "QtlFineMappingResult") + expect_setequal(as.character(res$method), "mvsusie") # 1709-1726 +}) + +# ============================================================================= +# Coverage top-ups (mock the fitters, exercise the orchestration) +# ============================================================================= + +test_that(".fmRelabelCs returns non-matching labels unchanged", { + fn <- pecotmr:::.fmRelabelCs + # "nomatch" doesn't match ^(.*)_([0-9]+)$ (length(parts) != 3) -> returned + # as-is; "susie_0" keeps the not-in-CS sentinel; "susie_1" shifts by offset. + out <- fn(c("susie_1", "nomatch", "susie_0"), offset = 2L) + expect_equal(out, c("susie_3", "nomatch", "susie_0")) +}) + +test_that(".fmCrossValidate + .fmFoldWeights cover the mvSuSiE CV path (mocked fitter)", { + # Mock one level below the orchestration: fitMvsusie/mvsusieWeights return + # canned outputs (sized to the per-fold training columns), so the real + # .fmFoldWeights mvsusie branch + .fmCrossValidate fold loop run at ~no cost. + local_mocked_bindings( + fitMvsusie = function(X, Y, ...) list(vn = colnames(X), R = ncol(as.matrix(Y))), + mvsusieWeights = function(mvsusieFit = NULL, ...) + matrix(0.01, length(mvsusieFit$vn), mvsusieFit$R, + dimnames = list(mvsusieFit$vn, NULL)), + .package = "pecotmr") + set.seed(1) + n <- 30L; p <- 5L; R <- 2L + X <- matrix(rbinom(n * p, 2, 0.4), n, p, + dimnames = list(paste0("s", 1:n), paste0("v", 1:p))) + Y <- matrix(rnorm(n * R), n, R, + dimnames = list(rownames(X), c("c1", "c2"))) + cv <- pecotmr:::.fmCrossValidate( + X, Y, tokens = "mvsusie", methodArgs = list(mvsusie = list()), + fold = 3L, coverage = 0.95, verbose = 0) + expect_named(cv, c("samplePartition", "prediction", "performance")) + expect_true("mvsusie_performance" %in% names(cv$performance)) + expect_equal(dim(cv$prediction[["mvsusie_predicted"]]), c(n, R)) +}) + +test_that(".fmFoldWeights covers the fSuSiE branch (mocked fitter)", { + local_mocked_bindings( + fitFsusie = function(...) list(), + fsusieWeights = function(fsusieFit = NULL, variantIds = NULL, ...) + matrix(0.02, length(variantIds), 1L, dimnames = list(variantIds, NULL)), + .package = "pecotmr") + set.seed(2) + n <- 30L; p <- 5L + X <- matrix(rbinom(n * p, 2, 0.4), n, p, + dimnames = list(paste0("s", 1:n), paste0("v", 1:p))) + Y <- matrix(rnorm(n * 4L), n, 4L, dimnames = list(rownames(X), NULL)) + W <- pecotmr:::.fmFoldWeights("fsusie", X, Y, coverage = 0.95, + userArgs = list(), pos = seq_len(p)) + expect_true(is.matrix(W)) + expect_equal(rownames(W), colnames(X)) +}) + +test_that(".fmFitXBlock fits the susieInf indiv chain + cross-validates (mocked)", { + local_mocked_bindings( + .fmFitSusieIndiv = function(...) list(), + .fmPostprocessOne = function(fit, method, dataX, dataY, ...) + FineMappingEntry(colnames(dataX), list(), + data.frame(variant_id = colnames(dataX), pip = 0.5)), + .fmFoldWeights = function(token, Xtr, Ytr, ...) + matrix(0.01, ncol(Xtr), 1L, dimnames = list(colnames(Xtr), NULL)), + .package = "pecotmr") + set.seed(1) + X <- matrix(rbinom(60, 2, 0.4), 20, 3, + dimnames = list(paste0("s", 1:20), c("v1", "v2", "v3"))) + y <- rnorm(20) + out <- pecotmr:::.fmFitXBlock( + X, y, toRun = "susieInf", addSusieInf = FALSE, coverage = 0.95, + secondaryCoverage = 0.7, signalCutoff = 0.1, minAbsCorr = 0.5, + methodArgs = list(susieInf = list()), verbose = 1, + ctx = "brain", tid = "ENSG_A", cvFolds = 3L) + expect_named(out, "susieInf") + expect_s4_class(out$susieInf, "FineMappingEntry") +}) + +test_that(".fmPostprocessOne wraps a fit into a FineMappingEntry", { + local_mocked_bindings( + postprocessFinemappingFits = function(...) list(x = 1), + formatFinemappingOutput = function(post, primaryMethod, ...) + list(finemappingEntry = FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.5))), + .package = "pecotmr") + ent <- pecotmr:::.fmPostprocessOne( + fit = list(), method = "susie", + dataX = matrix(0, 2, 1, dimnames = list(NULL, "v1")), dataY = c(1, 2), + coverage = 0.95, secondaryCoverage = 0.7, signalCutoff = 0.1, + minAbsCorr = 0.5) + expect_s4_class(ent, "FineMappingEntry") +}) + +test_that(".fmPostprocessOne errors when output carries no FineMappingEntry", { + local_mocked_bindings( + postprocessFinemappingFits = function(...) list(), + formatFinemappingOutput = function(...) list(finemappingEntry = "nope"), + .package = "pecotmr") + expect_error( + pecotmr:::.fmPostprocessOne(list(), "susie", + matrix(0, 2, 1), c(1, 2), 0.95, 0.7, 0.1, 0.5), + "FineMappingEntry payload") +}) + +test_that(".fmCrossValidate covers per-fold prior, NULL-weights, and no-overlap branches", { + # mvPriorCv supplies a prior for fold "1" only: fold 1 takes the per-fold + # prior (else-branch) and returns weights whose rownames don't overlap the + # test columns (no-common `next`); fold 2 has no prior, so .fmFoldWeights + # returns NULL (NULL-weights `next`). + local_mocked_bindings( + .fmFoldWeights = function(token, Xtr, Ytr, coverage, userArgs, pos, mvPrior) { + if (is.null(mvPrior)) return(NULL) + matrix(0.5, 1L, 1L, dimnames = list("not_a_variant", NULL)) + }, + .package = "pecotmr") + set.seed(3) + X <- matrix(rbinom(40, 2, 0.4), 20, 2, + dimnames = list(paste0("s", 1:20), c("v1", "v2"))) + Y <- matrix(rnorm(40), 20, 2, dimnames = list(rownames(X), c("c1", "c2"))) + cv <- pecotmr:::.fmCrossValidate( + X, Y, tokens = "mvsusie", methodArgs = list(mvsusie = list()), + fold = 2L, coverage = 0.95, verbose = 0, + mvPriorCv = list("1" = list(priorVariance = diag(2)))) + expect_named(cv, c("samplePartition", "prediction", "performance")) +}) + +test_that("fineMappingPipeline(QtlSumStats): susieInf RSS chain (mocked)", { + ss <- .fmp_makeQtlSumStats() + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieRss = .fmp_mockFitRss(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(ss, methods = "susieInf", addSusieInf = FALSE, + verbose = 1)) + expect_s4_class(res, "QtlFineMappingResult") + expect_setequal(as.character(res$method), "susieInf") +}) + +test_that("fineMappingPipeline(GwasSumStats): susieInf RSS chain (mocked)", { + gss <- .fmp_makeGwasSumStats() + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieRss = .fmp_mockFitRss(), + .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(gss, methods = "susieInf", addSusieInf = FALSE, + verbose = 1)) + expect_s4_class(res, "GwasFineMappingResult") + expect_setequal(getMethodNames(res), "susieInf") +}) + +test_that(".fmFoldWeights covers mvPrior residual var, missing rownames, unknown token", { + local_mocked_bindings( + fitMvsusie = function(X, Y, ...) list(vn = colnames(X), R = ncol(as.matrix(Y))), + # weights WITHOUT rownames -> .fmFoldWeights sets them from Xtr (1231) + mvsusieWeights = function(mvsusieFit = NULL, ...) + matrix(0.01, length(mvsusieFit$vn), mvsusieFit$R), + .package = "pecotmr") + X <- matrix(rbinom(40, 2, 0.4), 20, 2, + dimnames = list(paste0("s", 1:20), c("v1", "v2"))) + Y <- matrix(rnorm(40), 20, 2) + # mvPrior carrying a residualVariance exercises line 1227 + W <- pecotmr:::.fmFoldWeights( + "mvsusie", X, Y, coverage = 0.95, userArgs = list(), pos = NULL, + mvPrior = list(priorVariance = diag(2), residualVariance = diag(2))) + expect_equal(rownames(W), colnames(X)) + # an unknown token falls through to NULL (1241) + expect_null(pecotmr:::.fmFoldWeights("bogus", X, Y, 0.95, list(), NULL)) +}) + +test_that(".fmCrossValidate returns NULL for empty tokens", { + X <- matrix(0, 10, 2, dimnames = list(paste0("s", 1:10), c("v1", "v2"))) + expect_null(pecotmr:::.fmCrossValidate( + X, matrix(0, 10, 1), tokens = character(0), methodArgs = list(), fold = 2L)) +}) + +test_that(".fmCrossValidate fills Y rownames and reports per-fold fit failures", { + local_mocked_bindings(.fmFoldWeights = function(...) stop("boom"), + .package = "pecotmr") + X <- matrix(rbinom(40, 2, 0.4), 20, 2, + dimnames = list(paste0("s", 1:20), c("v1", "v2"))) + Y <- matrix(rnorm(20), 20, 1) # no rownames -> filled from X (1258) + expect_message( + cv <- suppressWarnings(pecotmr:::.fmCrossValidate( + X, Y, tokens = "susie", methodArgs = list(susie = list()), + fold = 2L, verbose = 1)), + "CV fold .* failed") # 1291-1294 +}) + +test_that(".fmCrossValidate skips a fold that holds out every sample", { + X <- matrix(rbinom(40, 2, 0.4), 20, 2, + dimnames = list(paste0("s", 1:20), c("v1", "v2"))) + Y <- matrix(rnorm(20), 20, 1, dimnames = list(rownames(X), NULL)) + sp <- data.frame(Sample = rownames(X), Fold = 1L) # single fold = all test -> 1273 + cv <- pecotmr:::.fmCrossValidate( + X, Y, tokens = "susie", methodArgs = list(susie = list()), fold = 1L, + samplePartition = sp, verbose = 0) + expect_true(all(is.na(cv$prediction[["susie_predicted"]]))) +}) + +# --- method-level branches (drive the pipeline methods with mocked fitters) --- + +test_that(".fmAfForX returns NULL when getAf yields nothing", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings(getAf = function(...) NULL, .package = "pecotmr") + X <- matrix(0, 3, 2, dimnames = list(paste0("s", 1:3), c("v1", "v2"))) + expect_null(pecotmr:::.fmAfForX(qd, X, traitId = "ENSG_A")) +}) + +test_that("fineMappingPipeline(QtlDataset): explicit valid contexts arg is honored", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "susie", contexts = "brain", + cisWindow = 1000L, addSusieInf = FALSE)) + expect_s4_class(res, "QtlFineMappingResult") +}) + +test_that("fineMappingPipeline(QtlDataset): region selects traits by rowRanges overlap", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .package = "pecotmr") + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(1L, 3000L)) + res <- suppressMessages( + fineMappingPipeline(qd, methods = "susie", region = region, + addSusieInf = FALSE)) + expect_s4_class(res, "QtlFineMappingResult") +}) + +test_that("fineMappingPipeline(QtlDataset): too few shared samples errors", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + # residualized X carries a sample absent from Y -> < 2 shared samples + .fmResidGeno = function(x, ...) + matrix(0, 1L, 2L, dimnames = list("ghost_sample", c("v1", "v2"))), + .package = "pecotmr") + expect_error( + suppressMessages(fineMappingPipeline(qd, methods = "susie", + cisWindow = 1000L, addSusieInf = FALSE)), + "too few shared samples") +}) + +test_that("fineMappingPipeline(QtlDataset): errors when no tuple produces a result", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmSerScreen = function(...) FALSE, # screen out every block + .package = "pecotmr") + expect_error( + suppressMessages(fineMappingPipeline(qd, methods = "susie", + cisWindow = 1000L, addSusieInf = FALSE)), + "no .*tuples") +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA fine-maps top PCs of a multi-trait context", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .fmSerScreen = function(...) TRUE, + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "susie", usePCA = TRUE, nPCs = 1L, + cisWindow = 1000L, addSusieInf = FALSE)) + expect_s4_class(res, "QtlFineMappingResult") + expect_true(any(grepl("PC", as.character(res$trait)))) +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA skips single-trait contexts", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .fmSerScreen = function(...) TRUE, + .package = "pecotmr") + res <- suppressMessages( + fineMappingPipeline(qd, methods = "susie", usePCA = TRUE, nPCs = 1L, + cisWindow = 1000L, addSusieInf = FALSE)) + # single-trait context -> PC loop hits `length(traits) < 2L` next; only the + # univariate susie row survives, no topPC pseudo-trait. + expect_false(any(grepl("PC", as.character(res$trait)))) +}) + +test_that("fineMappingPipeline(MultiStudyQtlDataset): jointSpec with no intersecting scope errors", { + mt <- .fmp_makeMultiStudy() + # The joint engine yields nothing for this scope -> the no-joint-fits stop. + local_mocked_bindings(.fmDispatchJointSpecsMultiStudy = function(...) NULL, + .package = "pecotmr") + expect_error( + suppressMessages(fineMappingPipeline(mt, methods = "mvsusie", + jointSpecification = "context")), + "no joint fits produced") +}) + +# --- usePCA sub-branches. Use methods="mvsusie" so the univariate dispatch is +# skipped (no 1531 stop / SER entanglement); the PCA path still runs susie, and +# the mvsusie joint dispatch is mocked to keep a non-empty result (no 1647). +.fmp_jr <- function() + QtlFineMappingResult(study = "study1", context = "brain", trait = "ENSG_A", + method = "mvsusie", + entry = list(FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.9)))) + +test_that("fineMappingPipeline(QtlDataset): usePCA skips a context whose PCA yields no scores", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .fmTopPcScores = function(...) NULL, # 1577 next + .package = "pecotmr") + res <- suppressMessages(fineMappingPipeline(qd, methods = "susie", usePCA = TRUE, + nPCs = 1L, cisWindow = 1000L, addSusieInf = FALSE)) + expect_false(any(grepl("PC", as.character(res$trait)))) +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA reuses a cached PC entry", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + cachedFMR <- QtlFineMappingResult(study = "study1", context = "brain", + trait = "topPC1", method = "susie", + entry = list(FineMappingEntry("v1", list(), + data.frame(variant_id = "v1", pip = 0.9)))) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .fmTopPcScores = function(Y, nPCs) matrix(rnorm(nrow(Y)), nrow(Y), 1L, + dimnames = list(rownames(Y), "topPC1")), + .package = "pecotmr") + res <- suppressMessages(fineMappingPipeline(qd, methods = "susie", usePCA = TRUE, + nPCs = 1L, cisWindow = 1000L, addSusieInf = FALSE, fineMappingResult = cachedFMR)) + expect_true(any(as.character(res$trait) == "topPC1")) # 1584 cache hit +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA + region uses the region genotype block", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmFitSusieIndiv = .fmp_mockFitIndiv(), .fmPostprocessOne = .fmp_mockPostprocess(), + .fmSerScreen = function(...) TRUE, + .fmDispatchJointSpecsQtlDataset = function(...) NULL, + .package = "pecotmr") + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(1L, 3000L)) + res <- suppressMessages(fineMappingPipeline(qd, methods = "mvsusie", usePCA = TRUE, + nPCs = 1L, region = region)) + expect_true(any(grepl("PC", as.character(res$trait)))) # 1591-1592 region block +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA skips a PC block with too few shared samples", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmResidGeno = function(x, ...) matrix(0, 1L, 2L, + dimnames = list("ghost_sample", c("v1", "v2"))), # PC common < 2 -> 1595 + .fmDispatchJointSpecsQtlDataset = function(...) .fmp_jr(), + .package = "pecotmr") + res <- suppressMessages(fineMappingPipeline(qd, methods = "mvsusie", usePCA = TRUE, + nPCs = 1L, cisWindow = 1000L)) + expect_false(any(grepl("PC", as.character(res$trait)))) # 1595 + 1607 +}) + +test_that("fineMappingPipeline(QtlDataset): usePCA skips a PC block screened out by SER", { + qd <- .fmp_makeQtlDataset(contexts = "brain", traits = c("ENSG_A", "ENSG_B")) + local_mocked_bindings( + extractBlockGenotypes = .fmp_mockExtractor(), + .fmSerScreen = function(...) FALSE, # PC screened -> 1597 + .fmDispatchJointSpecsQtlDataset = function(...) .fmp_jr(), + .package = "pecotmr") + res <- suppressMessages(fineMappingPipeline(qd, methods = "mvsusie", usePCA = TRUE, + nPCs = 1L, cisWindow = 1000L)) + expect_false(any(grepl("PC", as.character(res$trait)))) # 1597 + 1607 +}) diff --git a/tests/testthat/test_fineMappingWrappers.R b/tests/testthat/test_fineMappingWrappers.R index 7c3e6560..08e1f84e 100644 --- a/tests/testthat/test_fineMappingWrappers.R +++ b/tests/testthat/test_fineMappingWrappers.R @@ -629,6 +629,34 @@ test_that("buildTopLoci returns the exact 22-column schema in order with stable expect_true(is.integer(out$grange_end)) }) +test_that("buildTopLoci: conditionIdx slices the 3-D mvsusie posterior per condition", { + set.seed(1); L <- 2L; p <- 3L; R <- 2L + alpha <- matrix(c(0.7, 0.2, 0.1, 0.6, 0.2, 0.2), nrow = L, ncol = p) + mu <- array(rnorm(L * p * R), dim = c(L, p, R)) + mu2 <- mu^2 + 0.1 + fit <- list(alpha = alpha, mu = mu, mu2 = mu2, + pip = 1 - apply(1 - alpha, 2, prod)) + vids <- c("1:100:A:G", "1:200:C:T", "1:300:G:A") + cst <- list(list(sets = list(cs = list())), + list(sets = list(cs = list())), + list(sets = list(cs = list()))) + attr(cst, "coverage") <- c(0.95, 0.70, 0.50) + tl1 <- buildTopLoci(fit, cst, variantNames = vids, method = "mvsusie", + conditionIdx = 1L) + tl2 <- buildTopLoci(fit, cst, variantNames = vids, method = "mvsusie", + conditionIdx = 2L) + tl0 <- buildTopLoci(fit, cst, variantNames = vids, method = "mvsusie") + # Each conditionIdx yields THAT condition's posterior (colSums(alpha*mu[,,r])). + expect_equal(tl1$posterior_mean, colSums(alpha * mu[, , 1])) + expect_equal(tl2$posterior_mean, colSums(alpha * mu[, , 2])) + expect_false(isTRUE(all.equal(tl1$posterior_mean, tl2$posterior_mean))) + expect_true(all(is.finite(tl1$posterior_sd))) + # PIP is shared across conditions (mvSuSiE inclusion is joint). + expect_equal(tl1$pip, tl2$pip) + # A 3-D fit without a conditionIdx leaves the per-variant posterior NA. + expect_true(all(is.na(tl0$posterior_mean))) +}) + test_that("buildTopLoci emits 22 columns in the fixed order on a non-empty fit", { variant_ids <- c("chr1:100:A:G", "chr1:200:C:T") inp <- .fake_fit_and_cs(variant_ids, @@ -1120,3 +1148,779 @@ test_that("fsusieGetCs creates susie-like sets", { # cs_index should identify which effects had credible sets expect_length(result$cs_index, 2) }) + + +# ============================================================================= +# APPENDED COVERAGE TESTS +# Pure / mostly-pure helpers, S3 post-processing methods, two-stage fit, and +# the thin mvSuSiE / fSuSiE fit wrappers. Internal helpers are called via +# pecotmr:::; exported functions are called bare to match the file's style. +# ============================================================================= +context("fineMappingWrappers coverage") + +# ---- formatPipColumn / resolvePipColumn ---- +test_that("formatPipColumn prefixes the method", { + expect_equal(pecotmr:::formatPipColumn("susie"), "pip_susie") + expect_equal(pecotmr:::formatPipColumn("susieInf"), "pip_susieInf") +}) + +test_that("resolvePipColumn covers NULL/empty/method/pip/single/multi branches", { + expect_null(pecotmr:::resolvePipColumn(NULL)) + expect_null(pecotmr:::resolvePipColumn(data.frame(pip = numeric(0)))) + # method-specific column present -> returned directly + expect_equal(pecotmr:::resolvePipColumn(data.frame(pip_susie = 0.1, pip_x = 0.2), + method = "susie"), "pip_susie") + # method given but absent -> fall through to "pip" + expect_equal(pecotmr:::resolvePipColumn(data.frame(pip = 0.1, pip_x = 0.2), + method = "susie"), "pip") + # plain "pip" present + expect_equal(pecotmr:::resolvePipColumn(data.frame(pip = 0.1, pip_susie = 0.2)), "pip") + # single pip_ column, no "pip" + expect_equal(pecotmr:::resolvePipColumn(data.frame(pip_susie = 0.1)), "pip_susie") + # multiple pip_ columns, no "pip", no method -> NULL (ambiguous) + expect_null(pecotmr:::resolvePipColumn(data.frame(pip_a = 0.1, pip_b = 0.2))) +}) + +# ---- formatCsColumn / legacy column translation ---- +test_that("formatCsColumn formats integer and fractional coverage, errors on non-numeric", { + expect_equal(pecotmr:::formatCsColumn(0.95, "susie"), "CS_95_susie") + expect_equal(pecotmr:::formatCsColumn(0.7, "susie"), "CS_70_susie") + expect_equal(pecotmr:::formatCsColumn(0.999, "susieInf"), "CS_99_9_susieInf") + expect_error(pecotmr:::formatCsColumn(NA, "susie"), "coverage must be numeric") +}) + +test_that(".translateLegacyCsColumnName converts cs_coverage_* and passes others through", { + expect_null(pecotmr:::.translateLegacyCsColumnName(NULL)) + expect_equal( + pecotmr:::.translateLegacyCsColumnName(c("cs_coverage_0.95", "variant_id", "cs_coverage_0.7")), + c("CS_95_susie", "variant_id", "CS_70_susie") + ) +}) + +test_that(".translateLegacyTopLociCsColumns returns non-data.frame inputs unchanged", { + x <- list(a = 1) + expect_identical(pecotmr:::.translateLegacyTopLociCsColumns(x), x) + expect_null(pecotmr:::.translateLegacyTopLociCsColumns(NULL)) +}) + +# ---- .camelToSnakeMethod ---- +test_that(".camelToSnakeMethod handles NULL, empty, and vectors of method ids", { + expect_null(pecotmr:::.camelToSnakeMethod(NULL)) + expect_equal(pecotmr:::.camelToSnakeMethod(character(0)), character(0)) + expect_equal( + pecotmr:::.camelToSnakeMethod(c("susieInfRss", "mvsusie", "susie", "susieAsh", "singleEffect")), + c("susie_inf_rss", "mvsusie", "susie", "susie_ash", "single_effect") + ) +}) + +# ---- .setFinemappingFitClass ---- +test_that(".setFinemappingFitClass assigns method classes and handles NULL/unknown", { + expect_null(pecotmr:::.setFinemappingFitClass(NULL, "susie")) + expect_true("susiF" %in% class(pecotmr:::.setFinemappingFitClass(list(a = 1), "fsusie"))) + expect_true("mvsusie" %in% class(pecotmr:::.setFinemappingFitClass(list(a = 1), "mvsusie"))) + expect_true("susieRss" %in% class(pecotmr:::.setFinemappingFitClass(list(a = 1), "singleEffect"))) + expect_true("susieRss" %in% + class(pecotmr:::.setFinemappingFitClass(list(a = 1), "bayesianConditionalRegression"))) + # unknown method -> class unchanged + obj <- structure(list(a = 1), class = "foo") + expect_equal(class(pecotmr:::.setFinemappingFitClass(obj, "weird")), "foo") +}) + +# ---- prepareSusieFromInfArgs ---- +test_that("prepareSusieFromInfArgs sets none-branch defaults", { + fit <- list(V = rep(1, 5)) + a <- pecotmr:::prepareSusieFromInfArgs(list(), fit, refineDefault = TRUE) + expect_true(a$refine) + expect_equal(a$unmappable_effects, "none") + expect_identical(a$model_init, fit) + expect_null(a$convergence_method) +}) + +test_that("prepareSusieFromInfArgs ash branch sets convergence, caps L_greedy, keeps preset refine", { + fit <- list(V = rep(1, 5)) + a <- pecotmr:::prepareSusieFromInfArgs(list(L = 3, L_greedy = 10), fit, + refineDefault = TRUE, unmappableEffects = "ash") + expect_equal(a$convergence_method, "pip") + expect_equal(a$L_greedy, 3) # min(length(V) = 5, L = 3) + expect_equal(a$unmappable_effects, "ash") + # a preset refine is not overwritten by refineDefault + b <- pecotmr:::prepareSusieFromInfArgs(list(refine = FALSE), fit, refineDefault = TRUE) + expect_false(b$refine) +}) + +# ---- .asEffectMatrix / .asLbfMatrix ---- +test_that(".asEffectMatrix handles NULL, list, matrix, and data.frame", { + expect_equal(dim(pecotmr:::.asEffectMatrix(NULL)), c(0L, 0L)) + expect_equal(pecotmr:::.asEffectMatrix(list(c(1, 2), c(3, 4))), + matrix(c(1, 2, 3, 4), nrow = 2, byrow = TRUE)) + df_out <- pecotmr:::.asEffectMatrix(data.frame(a = 1:2, b = 3:4)) + expect_true(is.matrix(df_out)) + expect_equal(dim(df_out), c(2L, 2L)) + m <- matrix(1:6, 2, 3) + expect_equal(pecotmr:::.asEffectMatrix(m), m) +}) + +test_that(".asLbfMatrix prefers lbf_variable, falls back to lBF, else NULL", { + expect_equal(pecotmr:::.asLbfMatrix(list(lbf_variable = matrix(1, 2, 2))), matrix(1, 2, 2)) + expect_equal(pecotmr:::.asLbfMatrix(list(lBF = matrix(2, 2, 2))), matrix(2, 2, 2)) + expect_null(pecotmr:::.asLbfMatrix(list(x = 1))) +}) + +# ---- .parseGrange ---- +test_that(".parseGrange returns NA for NULL/empty/invalid and parses valid regions", { + expect_equal(pecotmr:::.parseGrange(NULL), c(start = NA_integer_, end = NA_integer_)) + expect_equal(pecotmr:::.parseGrange(""), c(start = NA_integer_, end = NA_integer_)) + expect_equal(pecotmr:::.parseGrange("not_a_region"), c(start = NA_integer_, end = NA_integer_)) + expect_equal(pecotmr:::.parseGrange("chr10:10823338-14348298"), + c(start = 10823338L, end = 14348298L)) +}) + +# ---- selectEffects ---- +test_that("selectEffects returns integer(0) for empty alpha, V-filtered or all effects", { + expect_equal(pecotmr:::selectEffects(list(alpha = NULL)), integer(0)) + expect_equal(pecotmr:::selectEffects(list(alpha = matrix(0.2, 3, 4), V = c(1, 1e-20, 2))), + c(1L, 3L)) + expect_equal(pecotmr:::selectEffects(list(alpha = matrix(0.2, 3, 4))), 1:3) +}) + +# ---- .csPurityVec ---- +test_that(".csPurityVec uses purity, falls back to cs_corr, else NA", { + expect_equal( + pecotmr:::.csPurityVec(list(sets = list(purity = data.frame(min.abs.corr = c(0.8, 0.9)), + cs = list(1, 2)))), + c(0.8, 0.9)) + m1 <- matrix(c(1, 0.7, 0.7, 1), 2) + m2 <- matrix(1, 1, 1) + expect_equal( + pecotmr:::.csPurityVec(list(sets = list(cs = list(1, 2, 3)), + cs_corr = list(m1, m2, NULL))), + c(0.7, 1, NA)) + expect_equal(pecotmr:::.csPurityVec(list(sets = list(cs = list(1, 2)))), + c(NA_real_, NA_real_)) +}) + +# ---- .translateSusiePurity ---- +test_that(".translateSusiePurity renames df and matrix columns, leaves others alone", { + expect_null(pecotmr:::.translateSusiePurity(NULL)) + df <- data.frame(min.abs.corr = 1, mean.abs.corr = 2, median.abs.corr = 3, other = 4) + expect_equal(names(pecotmr:::.translateSusiePurity(df)), + c("minAbsCorr", "meanAbsCorr", "medianAbsCorr", "other")) + mm <- matrix(1:6, 2, 3) + colnames(mm) <- c("min.abs.corr", "mean.abs.corr", "median.abs.corr") + expect_equal(colnames(pecotmr:::.translateSusiePurity(mm)), + c("minAbsCorr", "meanAbsCorr", "medianAbsCorr")) + mm2 <- matrix(1:6, 2, 3) + expect_null(colnames(pecotmr:::.translateSusiePurity(mm2))) +}) + +# ---- .topLociForS4Slot ---- +test_that(".topLociForS4Slot returns an empty frame for NULL/0-row input", { + e <- pecotmr:::.topLociForS4Slot(NULL) + expect_equal(names(e), c("variant_id", "method")) + expect_equal(nrow(e), 0L) + expect_equal(nrow(pecotmr:::.topLociForS4Slot(data.frame(a = integer(0)))), 0L) +}) + +test_that(".topLociForS4Slot derives variant_id from variant and integer cs from cs_95", { + tl <- data.frame(variant = c("v1", "v2", "v3", "v4"), + cs_95 = c("susie_1", "susie_0", "susie_2", NA), + stringsAsFactors = FALSE) + r <- pecotmr:::.topLociForS4Slot(tl) + expect_equal(r$variant_id, c("v1", "v2", "v3", "v4")) + expect_equal(r$cs, c(1L, 0L, 2L, 0L)) + # a non-numeric cs_95 tail collapses to 0L + expect_equal(pecotmr:::.topLociForS4Slot( + data.frame(variant_id = "v1", cs_95 = "susie_x", stringsAsFactors = FALSE))$cs, 0L) +}) + +# ---- extractVariantNames ---- +test_that("extractVariantNames reads pip names, then alpha colnames, then a fallback", { + expect_equal( + pecotmr:::extractVariantNames(list(pip = setNames(c(0.1, 0.2), + c("chr1:100:A:G", "chr1:200:C:T")))), + c("chr1:100:A:G", "chr1:200:C:T")) + expect_equal( + pecotmr:::extractVariantNames(list( + pip = c(0.1, 0.2), + alpha = matrix(0, 1, 2, dimnames = list(NULL, c("chr1:100:A:G", "chr1:200:C:T"))))), + c("chr1:100:A:G", "chr1:200:C:T")) + r <- pecotmr:::extractVariantNames(list(pip = c(0.1, 0.2, 0.3))) + expect_length(r, 3) + expect_true(is.character(r)) +}) + +# ---- extractSumstats ---- +test_that("extractSumstats returns NULL / passthrough across non-regression branches", { + expect_null(pecotmr:::extractSumstats(list(), NULL, NULL)) + expect_equal(pecotmr:::extractSumstats(list(), NULL, list(z = c(1, 2)), method = "susieRss"), + list(z = c(1, 2))) + expect_equal(pecotmr:::extractSumstats(list(), NULL, + list(betahat = c(1, 2), sebetahat = c(0.1, 0.2))), + list(betahat = c(1, 2), sebetahat = c(0.1, 0.2))) + expect_null(pecotmr:::extractSumstats(list(), NULL, c(1, 2, 3))) # dataX NULL + expect_null(pecotmr:::extractSumstats(list(), matrix(0, 3, 2), matrix(0, 3, 2))) # multi-col dataY +}) + +test_that("extractSumstats runs univariate regression and applies x/y scalars", { + skip_if_not_installed("susieR") + set.seed(1) + X <- matrix(rnorm(60), 20, 3) + colnames(X) <- c("chr1:1:A:G", "chr1:2:A:G", "chr1:3:A:G") + y <- X[, 1] * 2 + rnorm(20) + s1 <- pecotmr:::extractSumstats(list(), X, y) + expect_named(s1, c("betahat", "sebetahat")) + s2 <- pecotmr:::extractSumstats(list(), X, y, yScalar = 2, xScalar = 1) + expect_equal(s2$betahat, s1$betahat * 2) + expect_equal(s2$sebetahat, s1$sebetahat * 2) +}) + +# ---- computeCsTable / computeCsTables ---- +test_that("computeCsTable fsusie branch returns empty sets and NULL cs_corr when no CS", { + ct <- pecotmr:::computeCsTable( + list(cs = list(), pip = setNames(c(0.1, 0.2), c("a", "b"))), + matrix(0, 5, 2), coverage = 0.95, csInput = "fsusie") + expect_equal(names(ct), c("sets", "cs_corr", "pip")) + expect_null(ct$cs_corr) + expect_length(ct$sets$cs, 0) +}) + +test_that("computeCsTable X and Xcorr branches return sets/pip/cs_corr", { + skip_if_not_installed("susieR") + d <- .make_univariate_data(seed = 7, n = 200, p = 8, effect_idx = c(2)) + fit <- susieR::susie(d$X, d$y, L = 4) + ctx <- pecotmr:::computeCsTable(fit, d$X, coverage = 0.95, csInput = "X") + expect_true(all(c("sets", "pip", "cs_corr") %in% names(ctx))) + ctc <- pecotmr:::computeCsTable(fit, cor(d$X), coverage = 0.95, csInput = "Xcorr") + expect_true(all(c("sets", "pip", "cs_corr") %in% names(ctc))) +}) + +test_that("computeCsTables names tables, sets coverage attr, defaults coverage from fit", { + skip_if_not_installed("susieR") + d <- .make_univariate_data(seed = 7, n = 200, p = 8, effect_idx = c(2)) + fit <- susieR::susie(d$X, d$y, L = 4) + cts <- pecotmr:::computeCsTables(fit, d$X, coverage = 0.95, + secondaryCoverage = c(0.7, 0.5), + method = "susie", csInput = "X") + expect_equal(attr(cts, "coverage"), c(0.95, 0.7, 0.5)) + expect_equal(names(cts), c("CS_95_susie", "CS_70_susie", "CS_50_susie")) + # coverage = NULL falls back to fit$sets$requested_coverage + fit2 <- fit + fit2$sets$requested_coverage <- 0.9 + cts2 <- pecotmr:::computeCsTables(fit2, d$X, coverage = NULL, + secondaryCoverage = 0.5, method = "susie", csInput = "X") + expect_equal(attr(cts2, "coverage"), c(0.9, 0.5)) +}) + +# ---- trimFinemappingFit ---- +test_that("trimFinemappingFit (susie) trims to selected effects and keeps scalar slots", { + fit <- list( + pip = setNames(c(0.5, 0.3), c("chr1:100:A:G", "chr1:200:C:T")), + alpha = matrix(c(0.5, 0.5, 0.3, 0.7), nrow = 2, byrow = TRUE), + mu = matrix(0.1, 2, 2), mu2 = matrix(0.02, 2, 2), + lbf_variable = matrix(0, 2, 2), + V = c(1, 1e-20), niter = 5, + theta = c(1, 2), omega_weights = c(0.5, 0.5), X_column_scale_factors = c(1, 1)) + eff <- pecotmr:::selectEffects(fit) # only effect 1 survives V filtering + csTables <- list( + list(sets = list(cs = list(L1 = c(1, 2)), purity = data.frame(min.abs.corr = 0.8)), + cs_corr = list(matrix(c(1, 0.8, 0.8, 1), 2)), pip = fit$pip), + list(sets = list(cs = list()), cs_corr = NULL, pip = fit$pip)) + tr <- pecotmr:::trimFinemappingFit(fit, eff, "susie", csTables) + expect_equal(nrow(tr$alpha), 1L) + expect_equal(nrow(tr$mu), 1L) + expect_equal(nrow(tr$mu2), 1L) + expect_equal(nrow(tr$lbf_variable), 1L) + expect_equal(tr$V, 1) + expect_equal(tr$theta, c(1, 2)) + expect_equal(tr$omega_weights, c(0.5, 0.5)) + expect_equal(tr$X_column_scale_factors, c(1, 1)) + expect_equal(tr$niter, 5) + expect_equal(tr$max_L, 2L) + expect_equal(tr$n_effects, 2L) + expect_equal(class(tr), "susie") + # secondary tables drop the pip element + expect_length(tr$sets_secondary, 1L) + expect_false("pip" %in% names(tr$sets_secondary[[1]])) +}) + +test_that("trimFinemappingFit (fsusie) retains coef and sets the fsusie/susie class", { + fit <- list(pip = setNames(c(0.4, 0.6), c("chr1:100:A:G", "chr1:200:C:T")), + alpha = matrix(0.5, 2, 2), coef = matrix(1, 2, 3)) + csTables <- list(list(sets = list(cs = list()), cs_corr = NULL, pip = fit$pip)) + tr <- pecotmr:::trimFinemappingFit(fit, c(1, 2), "fsusie", csTables) + expect_equal(tr$coef, matrix(1, 2, 3)) + expect_equal(class(tr), c("fsusie", "susie")) + expect_null(tr$V) +}) + +test_that("trimFinemappingFit (mvsusie) slices 3-D mu/mu2/mu2_diag/clfsr and stores coef", { + skip_if_not_installed("mvsusieR") + L <- 2L; p <- 3L; R <- 2L + fit <- list( + pip = setNames(seq(0.1, 0.3, length.out = p), + c("chr1:1:A:G", "chr1:2:A:G", "chr1:3:A:G")), + alpha = matrix(1 / p, L, p), + mu = array(rnorm(L * p * R), dim = c(L, p, R)), + mu2 = array(0.1, dim = c(L, p, R)), + mu2_diag = array(0.2, dim = c(L, p, R)), + V = c(1, 1e-20), + conditional_lfsr = array(0.5, dim = c(L, p, R)), + niter = 3) + eff <- pecotmr:::selectEffects(fit) # effect 1 + csTables <- list(list(sets = list(cs = list()), cs_corr = NULL, pip = fit$pip)) + fake_coef <- matrix(rnorm((p + 1) * R), nrow = p + 1, ncol = R) + local_mocked_bindings(coef.mvsusie = function(...) fake_coef, .package = "mvsusieR") + tr <- pecotmr:::trimFinemappingFit(fit, eff, "mvsusie", csTables) + expect_equal(dim(tr$mu), c(1L, p, R)) + expect_equal(dim(tr$mu2), c(1L, p, R)) + expect_equal(dim(tr$mu2_diag), c(1L, p, R)) + expect_equal(dim(tr$clfsr), c(1L, p, R)) + expect_equal(tr$coef, fake_coef[-1, , drop = FALSE]) + expect_equal(class(tr), c("mvsusie", "susie")) +}) + +# ---- postprocessFinemappingFit S3 methods ---- +test_that("postprocessFinemappingFit.susiF post-processes an fsusie fit (empty-CS path)", { + fit <- pecotmr:::.setFinemappingFitClass(list( + pip = setNames(c(0.5, 0.3), c("chr1:100:A:G", "chr1:200:C:T")), + alpha = matrix(0.5, 1, 2), mu = matrix(0.1, 1, 2), mu2 = matrix(0.02, 1, 2), + cs = list()), "fsusie") + expect_true("susiF" %in% class(fit)) + res <- pecotmr:::postprocessFinemappingFit( + fit, method = "fsusie", + dataX = matrix(0, 5, 2, dimnames = list(NULL, c("chr1:100:A:G", "chr1:200:C:T"))), + dataY = NULL, coverage = 0.95, + otherQuantities = list(condition_id = "ctx")) + expect_equal(res$method, "fsusie") + expect_equal(unique(res$top_loci$method), "fsusie") + expect_equal(res$otherQuantities, list(condition_id = "ctx")) + expect_equal(class(getSusieFit(res$finemappingEntry)), c("fsusie", "susie")) +}) + +test_that("postprocessFinemappingFit.susieInf labels credible sets with the susie_inf_ prefix", { + skip_if_not_installed("susieR") + d <- .make_univariate_data(seed = 7, n = 200, p = 8, effect_idx = c(2)) + fits <- fitSusieInfThenSusie(d$X, d$y) + res <- pecotmr:::postprocessFinemappingFit(fits$susieInf, method = "susieInf", + dataX = d$X, dataY = d$y, coverage = 0.95) + expect_equal(res$method, "susieInf") + expect_gt(nrow(res$top_loci), 0L) + expect_equal(unique(res$top_loci$method), "susieInf") + expect_true(all(grepl("^susie_inf_\\d+$", res$top_loci$cs_95))) +}) + +test_that(".postprocessFinemappingFitCommon trim=FALSE stores the untrimmed fit", { + fit <- pecotmr:::.setFinemappingFitClass(list( + pip = setNames(c(0.5, 0.3), c("chr1:100:A:G", "chr1:200:C:T")), + alpha = matrix(0.5, 1, 2), mu = matrix(0.1, 1, 2), mu2 = matrix(0.02, 1, 2), + cs = list(), extra_slot = "kept"), "fsusie") + res <- pecotmr:::postprocessFinemappingFit( + fit, method = "fsusie", trim = FALSE, + dataX = matrix(0, 5, 2, dimnames = list(NULL, c("chr1:100:A:G", "chr1:200:C:T"))), + dataY = NULL, coverage = 0.95) + expect_equal(getSusieFit(res$finemappingEntry)$extra_slot, "kept") +}) + +# ---- postprocessFinemappingFits / formatFinemappingOutput error branches ---- +test_that("postprocessFinemappingFits errors on empty or unnamed fit lists", { + expect_error(postprocessFinemappingFits(list(), dataX = matrix(0, 2, 2)), + "At least one fine-mapping fit") + expect_error(postprocessFinemappingFits(list(NULL), dataX = matrix(0, 2, 2)), + "At least one fine-mapping fit") + expect_error(postprocessFinemappingFits(list(matrix(0, 1, 1)), dataX = matrix(0, 2, 2)), + "named list") +}) + +test_that("formatFinemappingOutput errors when primaryMethod is absent", { + post <- list(finemappingResults = list(susie = list(method = "susie")), + top_loci = pecotmr:::.emptyTopLoci()) + expect_error(formatFinemappingOutput(post, "nonexistent"), "primaryMethod was not found") +}) + +# ---- buildTopLoci: marginal z/p passthrough ---- +test_that("buildTopLoci passes through marginal z and p supplied in sumstats", { + variant_ids <- c("chr1:100:A:G", "chr1:200:C:T") + inp <- .fake_fit_and_cs(variant_ids, + cs_at_cov = list("0.95" = list(c(1L, 2L)), + "0.7" = list(c(1L, 2L)), + "0.5" = list(c(1L, 2L))), + pip = c(0.9, 0.9)) + out <- .runBuildTopLoci(inp, method = "susie", + sumstats = list(z = c(2.5, -1.5), p = c(0.01, 0.13))) + expect_equal(out$marginal_z, c(2.5, -1.5)) + expect_equal(out$marginal_p, c(0.01, 0.13)) +}) + +# ---- lbfToAlpha single-column matrix branch ---- +test_that("lbfToAlpha handles a single-column matrix", { + lbf <- matrix(c(1, 2, 3), ncol = 1) + colnames(lbf) <- "v1" + res <- pecotmr:::lbfToAlpha(lbf) + expect_equal(dim(res), c(3L, 1L)) + expect_true(all(res == 1)) # single column -> the only entry carries all weight +}) + +# ---- fitSusieInfThenSusie ---- +test_that("fitSusieInfThenSusie returns classed susie and susieInf fits", { + skip_if_not_installed("susieR") + d <- .make_univariate_data(seed = 3, n = 200, p = 8, effect_idx = c(4)) + fits <- fitSusieInfThenSusie(d$X, d$y) + expect_named(fits, c("susie", "susieInf")) + expect_true("susie" %in% class(fits$susie)) + expect_true("susieInf" %in% class(fits$susieInf)) + expect_length(fits$susie$pip, ncol(d$X)) +}) + +test_that("fitSusieInfThenSusie reuses fittedModels without refitting", { + skip_if_not_installed("susieR") + d <- .make_univariate_data(seed = 3, n = 200, p = 8, effect_idx = c(4)) + fits <- fitSusieInfThenSusie(d$X, d$y) + again <- fitSusieInfThenSusie(d$X, d$y, + fittedModels = list(susie = fits$susie, susieInf = fits$susieInf)) + expect_equal(unname(again$susie$pip), unname(fits$susie$pip)) + expect_equal(unname(again$susieInf$pip), unname(fits$susieInf$pip)) +}) + +# ---- thin fit wrappers (mvSuSiE / fSuSiE) ---- +test_that("fitMvsusie forwards arguments to mvsusieR::mvsusie", { + skip_if_not_installed("mvsusieR") + local_mocked_bindings( + mvsusie = function(X, Y, prior_variance, coverage, ...) list(tag = "mv", coverage = coverage), + .package = "mvsusieR") + r <- fitMvsusie(matrix(0, 4, 2), matrix(0, 4, 2), prior_variance = 1, coverage = 0.9) + expect_equal(r$tag, "mv") + expect_equal(r$coverage, 0.9) +}) + +test_that("fitMvsusieRss forwards arguments to mvsusieR::mvsusie_rss", { + skip_if_not_installed("mvsusieR") + local_mocked_bindings( + mvsusie_rss = function(Z, R, N, prior_variance, coverage, ...) list(tag = "rss", N = N), + .package = "mvsusieR") + r <- fitMvsusieRss(matrix(0, 2, 1), diag(2), N = 100, prior_variance = 1) + expect_equal(r$tag, "rss") + expect_equal(r$N, 100) +}) + +test_that("fitFsusie forwards arguments to fsusieR::susiF", { + skip_if_not_installed("fsusieR") + local_mocked_bindings( + susiF = function(X, Y, pos, ...) list(tag = "fs", npos = length(pos)), + .package = "fsusieR") + r <- fitFsusie(matrix(0, 4, 3), matrix(0, 4, 2), pos = 1:2) + expect_equal(r$tag, "fs") + expect_equal(r$npos, 2) +}) + +# =========================================================================== +# SuSiE / mvSuSiE / fSuSiE weight-extractor tests (relocated to match the source move) +# =========================================================================== + +test_that(".susie_rss_extract_weights returns correct-length vector", { + skip_if_not_installed("susieR") + set.seed(42) + p <- 20 + n <- 500 + R <- diag(p) + z <- rnorm(p) + w <- pecotmr:::.susieRssExtractWeights( + fit = NULL, z = z, R = R, n = n, + requiredFields = c("alpha", "mu", "X_column_scale_factors"), + fitArgs = list(L = 5) + ) + expect_equal(length(w), p) + expect_true(all(is.finite(w))) +}) + +test_that("susieRssWeights follows (stat, LD) convention", { + skip_if_not_installed("susieR") + set.seed(42) + p <- 20 + n <- 500 + R <- diag(p) + z <- rnorm(p) + stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) + w <- susieRssWeights(stat, R, methodArgs = list(L = 5)) + expect_equal(length(w), p) + expect_true(all(is.finite(w))) +}) + +test_that("susieRssWeights retains fit when retainFit = TRUE", { + skip_if_not_installed("susieR") + set.seed(42) + p <- 20 + n <- 500 + R <- diag(p) + z <- rnorm(p) + stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) + w <- susieRssWeights(stat, R, retainFit = TRUE, methodArgs = list(L = 5)) + expect_false(is.null(attr(w, "fit"))) +}) + +test_that("susieInfRssWeights works", { + skip_if_not_installed("susieR") + set.seed(42) + p <- 20 + n <- 500 + R <- diag(p) + z <- rnorm(p) + stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) + w <- susieInfRssWeights(stat, R, methodArgs = list(L = 5)) + expect_equal(length(w), p) + expect_true(all(is.finite(w))) +}) + +test_that("mvsusieWeights real fit returns p x K weights or errors on unstable small data", { + skip_if_not_installed("mvsusieR") + m <- .rrwMulti(n = 80, p = 8, K = 2) + res <- tryCatch(suppressMessages(mvsusieWeights(X = m$X, Y = m$Y, L = 5, LGreedy = 2)), + error = function(e) e) + if (inherits(res, "error")) { + # mvSuSiE can be numerically unstable on tiny (X, Y); a clean error is + # acceptable here. The coef-extraction path is covered by the mocked tests + # in test_rrMrmashMvsusie.R. + succeed("mvsusieWeights errored on small data (documented instability)") + } else { + expect_equal(dim(res), c(m$p, m$K)) + expect_true(all(is.finite(res))) + } +}) + +test_that("susieAshRssWeights returns weights of length p", { + skip_if_not_installed("susieR") + f <- .rrwStatLd() + w <- susieAshRssWeights(f$stat, f$LD, methodArgs = list(L = 5)) + expect_length(w, f$p) + expect_true(all(is.finite(w))) +}) + +test_that("mvsusieRssWeights fits mvsusie_rss and returns p x K weights", { + skip_if_not_installed("mvsusieR") + m <- .rrwMulti(n = 80, p = 8, K = 2) + w <- mvsusieRssWeights(m$stat, m$LD, L = 5, LGreedy = 2) + expect_equal(dim(w), c(m$p, m$K)) + expect_true(all(is.finite(w))) +}) + +test_that("mvsusieRssWeights errors on single-context stat$z", { + skip_if_not_installed("mvsusieR") + f <- .rrwStatLd() + oneCol <- list(z = matrix(f$stat$z, ncol = 1), n = f$n) + expect_error(mvsusieRssWeights(oneCol, f$LD), ">= 2 columns") +}) + +# ---- mvsusieWeights ---- +test_that("mvsusieWeights errors when mvsusieR package is not available", { + skip_if(requireNamespace("mvsusieR", quietly = TRUE), + "mvsusieR is installed; skipping missing-package test") + + expect_error( + mvsusieWeights(mvsusieFit = NULL, X = matrix(1, 10, 5), Y = matrix(1, 10, 3)), + "mvsusieR" + ) +}) + +test_that("mvsusieWeights errors when X and Y are NULL and fit is NULL", { + skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), + "mvsusieR not installed") + expect_error(mvsusieWeights(mvsusieFit = NULL, X = NULL, Y = NULL), + "Both X and Y must be provided") +}) + +test_that("mvsusieWeights fits model and returns coefficients when fit is NULL", { + skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), + "mvsusieR not installed") + set.seed(42) + n <- 30 + p <- 5 + R <- 3 + X <- matrix(rnorm(n * p), n, p) + Y <- matrix(rnorm(n * R), n, R) + fake_coef <- matrix(rnorm((p + 1) * R), nrow = p + 1, ncol = R) + captured <- list() + + local_mocked_bindings( + create_mixture_prior = function(...) list(), + mvsusie = function(...) { + captured <<- list(...) + "mock_fit" + }, + coef.mvsusie = function(...) fake_coef, + .package = "mvsusieR" + ) + + result <- expect_message( + mvsusieWeights(X = X, Y = Y, L = 12, LGreedy = 4), + "mvsusieFit is not provided" + ) + # Should return coef without intercept row + expect_equal(dim(result), c(p, R)) + expect_equal(result, fake_coef[-1, ]) + expect_equal(captured$L, 12) + expect_equal(captured$L_greedy, 4) +}) + +test_that("mvsusieWeights returns coefficients from provided fit", { + skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), + "mvsusieR not installed") + p <- 5 + R <- 3 + fake_coef <- matrix(rnorm((p + 1) * R), nrow = p + 1, ncol = R) + + local_mocked_bindings( + coef.mvsusie = function(...) fake_coef, + .package = "mvsusieR" + ) + + result <- mvsusieWeights(mvsusieFit = "precomputed_fit") + expect_equal(dim(result), c(p, R)) + expect_equal(result, fake_coef[-1, ]) +}) + +.fw_makeFsusieFit <- function(seed = 1, n = 150L, p = 24L, J = 16L) { + set.seed(seed) + X <- matrix(rnorm(n * p), n, p, + dimnames = list(paste0("s", seq_len(n)), paste0("v", seq_len(p)))) + b1 <- sin(seq(0, 2 * pi, length.out = J)) + b2 <- cos(seq(0, pi, length.out = J)) + Y <- X[, 3] %o% b1 + X[, 10] %o% b2 + + matrix(rnorm(n * J, sd = 0.3), n, J) + colnames(Y) <- paste0("f", seq_len(J)) + list(X = X, Y = Y, + fit = suppressWarnings(fsusieR::susiF( + X = X, Y = Y, pos = seq_len(J), L = 5, + post_processing = "none", verbose = FALSE))) +} + +test_that("fsusieWeights returns a variants x features matrix with variant rownames", { + skip_if_not_installed("fsusieR") + skip_if_not_installed("wavethresh") + obj <- .fw_makeFsusieFit() + W <- fsusieWeights(fsusieFit = obj$fit, variantIds = colnames(obj$X)) + expect_true(is.matrix(W)) + expect_equal(nrow(W), ncol(obj$X)) + expect_equal(ncol(W), ncol(obj$Y)) + expect_equal(rownames(W), colnames(obj$X)) +}) + +test_that("fsusieWeights matches fsusieR's own out_prep reconstruction (post_processing='none')", { + skip_if_not_installed("fsusieR") + skip_if_not_installed("wavethresh") + obj <- .fw_makeFsusieFit() + fit <- obj$fit + # The alpha-weighted sum over SNPs of the per-SNP feature-domain curves that + # fsusieWeights reconstructs must equal fSuSiE's own fitted_func[[l]] (built + # by out_prep.susiF) for every effect l. + csdX <- as.numeric(fit$csd_X) + perScale <- "mixture_normal_per_scale" %in% class(fsusieR::get_G_prior(fit)) + indxLst <- fsusieR::gen_wavelet_indx(log2(length(fit$outing_grid))) + scaleCols <- if (perScale) indxLst[[length(indxLst)]] + else ncol(as.matrix(fit$fitted_wc[[1L]])) + S <- pecotmr:::.fsusieSynthesisMatrix(fit$n_wac, scaleCols) + maxErr <- 0 + for (l in seq_along(fit$fitted_wc)) { + al <- as.numeric(fit$alpha[[l]]) + contrib <- colSums((al * (1 / csdX) * as.matrix(fit$fitted_wc[[l]])) %*% S) + maxErr <- max(maxErr, max(abs(contrib - as.numeric(fit$fitted_func[[l]])))) + } + expect_lt(maxErr, 1e-8) +}) + +test_that("fsusieWeights concentrates weight on the causal SNPs", { + skip_if_not_installed("fsusieR") + skip_if_not_installed("wavethresh") + obj <- .fw_makeFsusieFit() + W <- fsusieWeights(fsusieFit = obj$fit, variantIds = colnames(obj$X)) + rowNorm <- sqrt(rowSums(W^2)) + top2 <- names(sort(rowNorm, decreasing = TRUE))[1:2] + expect_setequal(top2, c("v3", "v10")) +}) + +test_that("fsusieWeights fast path returns precomputed $coef for a trimmed fit", { + # A trimmed fSuSiE fit drops fitted_wc but keeps the precomputed weight + # matrix in $coef; fsusieWeights returns it without touching wavelet slots. + W0 <- matrix(c(1, 0, 2, 0, 0, 3), nrow = 3, + dimnames = list(c("v1", "v2", "v3"), c("f1", "f2"))) + trimmed <- list(coef = W0, pip = c(0.1, 0.2, 0.7)) + class(trimmed) <- c("fsusie", "susie") + W <- fsusieWeights(fsusieFit = trimmed) + expect_identical(W, W0) +}) + +test_that("fsusieWeights errors without a fit and on an unusable (trimmed, no coef) fit", { + expect_error(fsusieWeights(fsusieFit = NULL), "is required") + bad <- list(pip = c(0.1, 0.9)) # no coef, no fitted_wc + class(bad) <- c("fsusie", "susie") + expect_error(fsusieWeights(fsusieFit = bad), "missing required slot") +}) + +# =========================================================================== +# mergeSusieCs — cross-condition credible-set merging on a QtlFineMappingResult +# (relocated from mashWrapper.R and adapted to consume the S4 result type) +# =========================================================================== + +.msc_entry <- function(vid, pip, cs, csName = "cs_95") { + tl <- data.frame(variant_id = vid, pip = pip, stringsAsFactors = FALSE) + tl[[csName]] <- cs + FineMappingEntry(variantIds = vid, susieFit = list(), topLoci = tl) +} +.msc_fmr <- function(entries, method = "susie") { + n <- length(entries) + QtlFineMappingResult(study = rep("S", n), context = paste0("c", seq_len(n)), + trait = rep("t", n), method = rep(method, n), entry = entries) +} + +test_that("mergeSusieCs: non-overlapping CSs keep distinct per-condition labels", { + fmr <- .msc_fmr(list( + .msc_entry(c("v1", "v2"), c(0.8, 0.6), c("susie_1", "susie_1")), + .msc_entry(c("v3", "v4"), c(0.9, 0.7), c("susie_1", "susie_2")))) + res <- mergeSusieCs(fmr) + expect_equal(res$variant_id, c("v1", "v2", "v3", "v4")) + expect_equal(res$credibleSetNames, c("cs_1_1", "cs_1_1", "cs_2_1", "cs_2_2")) + expect_equal(res$maxPip, c(0.8, 0.6, 0.9, 0.7)) +}) + +test_that("mergeSusieCs: a variant shared across conditions merges their credible sets", { + fmr <- .msc_fmr(list( + .msc_entry(c("v1", "v2", "v3"), c(0.9, 0.5, 0.8), c("susie_1", "susie_0", "susie_2")), + .msc_entry(c("v3", "v4"), c(0.7, 0.6), c("susie_1", "susie_1")))) + res <- mergeSusieCs(fmr) + expect_false("v2" %in% res$variant_id) # susie_0 -> not in a CS + expect_equal(res$credibleSetNames[res$variant_id == "v3"], "cs_1_2,cs_2_1") + expect_equal(res$credibleSetNames[res$variant_id == "v4"], "cs_1_2,cs_2_1") # via shared v3 + expect_equal(res$maxPip[res$variant_id == "v3"], 0.8) + expect_equal(res$medianPip[res$variant_id == "v3"], 0.75) # median(0.8, 0.7) +}) + +test_that("mergeSusieCs: coverage selects the cs_ column", { + fmr <- .msc_fmr(list( + .msc_entry(c("v1", "v2"), c(0.8, 0.6), c("susie_1", "susie_1"), csName = "cs_70"))) + res <- mergeSusieCs(fmr, coverage = 0.70) + expect_equal(res$variant_id, c("v1", "v2")) + expect_equal(res$credibleSetNames, c("cs_1_1", "cs_1_1")) +}) + +test_that("mergeSusieCs: a condition with no usable CS is skipped, valid ones kept", { + valid <- .msc_entry(c("v1", "v2"), c(0.9, 0.8), c("susie_1", "susie_1")) + noCs <- FineMappingEntry(variantIds = "v3", susieFit = list(), + topLoci = data.frame(variant_id = "v3", pip = 0.9)) + res <- mergeSusieCs(.msc_fmr(list(valid, noCs))) + expect_setequal(res$variant_id, c("v1", "v2")) # noCs condition skipped +}) + +test_that("mergeSusieCs: NULL when no condition contributes a credible set", { + expect_null(mergeSusieCs(.msc_fmr(list(.msc_entry("v1", 0.9, "susie_0"))))) # all _0 + noCs <- FineMappingEntry(variantIds = "v1", susieFit = list(), + topLoci = data.frame(variant_id = "v1", pip = 0.9)) + expect_null(mergeSusieCs(.msc_fmr(list(noCs)))) # no cs col +}) + +test_that("mergeSusieCs: single condition with one credible set", { + res <- mergeSusieCs(.msc_fmr(list( + .msc_entry(c("v1", "v2"), c(0.9, 0.8), c("susie_1", "susie_1"))))) + expect_equal(res$credibleSetNames, c("cs_1_1", "cs_1_1")) +}) + +test_that("mergeSusieCs: non-FineMappingResult input errors", { + expect_error(mergeSusieCs(list(1, 2)), "QtlFineMappingResult") +}) diff --git a/tests/testthat/test_genotypeHandle.R b/tests/testthat/test_genotypeHandle.R index 5a318d00..9634cc8d 100644 --- a/tests/testthat/test_genotypeHandle.R +++ b/tests/testthat/test_genotypeHandle.R @@ -394,4 +394,127 @@ test_that("genoMeta sharded handle show() reports the layout", { expect_match(out, "per-chromosome files") }) +# =========================================================================== +# .genotypeHandleFromLdMeta: row-resolution error branches. +# The real getRegionalLdMeta errors earlier for genuinely-uncovered regions +# (see the "region with no covering row" test above), so we mock it to drive +# the post-resolution branches (0 rows, >1 row, unsupported payload ext) +# without crafting whole on-disk meta layouts. +# =========================================================================== + +test_that(".genotypeHandleFromLdMeta errors when no LD-meta row covers the region", { + local_mocked_bindings( + getRegionalLdMeta = function(...) list( + intersections = list(LD_file_paths = character(0), bimFilePaths = NULL)), + .package = "pecotmr") + expect_error( + pecotmr:::.genotypeHandleFromLdMeta("dummy.tsv", "chr1:1-100"), + "no LD-meta row covers") +}) + +test_that(".genotypeHandleFromLdMeta errors when the region spans multiple rows", { + local_mocked_bindings( + getRegionalLdMeta = function(...) list( + intersections = list( + LD_file_paths = c("/tmp/a.bed", "/tmp/b.bed"), bimFilePaths = NULL)), + .package = "pecotmr") + expect_error( + pecotmr:::.genotypeHandleFromLdMeta("dummy.tsv", "chr1:1-100"), + "spans multiple LD-meta") +}) + +test_that(".genotypeHandleFromLdMeta errors on an unsupported payload extension", { + local_mocked_bindings( + getRegionalLdMeta = function(...) list( + intersections = list(LD_file_paths = "/tmp/foo.txt", bimFilePaths = NULL)), + .package = "pecotmr") + expect_error( + pecotmr:::.genotypeHandleFromLdMeta("dummy.tsv", "chr1:1-100"), + "unsupported LD-meta file extension") +}) + +# =========================================================================== +# .parseChromMeta: input-shape validation (no shard reads). +# =========================================================================== + +test_that(".parseChromMeta errors on a meta file with fewer than 2 columns", { + f <- tempfile(fileext = ".tsv") + on.exit(unlink(f), add = TRUE) + writeLines(c("chr", "21", "22"), f) + expect_error(pecotmr:::.parseChromMeta(f), "must have at least 2 columns") +}) + +test_that(".parseChromMeta errors on an unrecognized genoMeta shape", { + # An unnamed multi-element character vector (names forgotten) is neither a + # meta-file path nor a chrom->path map. + expect_error(pecotmr:::.parseChromMeta(c("21", "22")), + "expected a path to a") +}) + +# =========================================================================== +# .genotypeHandleFromChromMeta: empty meta input. +# =========================================================================== + +test_that(".genotypeHandleFromChromMeta errors when the meta has no rows", { + f <- tempfile(fileext = ".tsv") + on.exit(unlink(f), add = TRUE) + writeLines("#chr\tpath", f) # header only -> zero data rows + expect_error(pecotmr:::.genotypeHandleFromChromMeta(f), + "no chromosomes found") +}) + +# =========================================================================== +# .resolveGenotypeShard: explicit-format dispatch, extension auto-detection, +# prefix sidecar probing, and the unresolvable-format error. +# =========================================================================== + +test_that(".resolveGenotypeShard honours explicit plink1 format and .bed extension", { + skip_if_not_installed("snpStats") + h1 <- pecotmr:::.resolveGenotypeShard(plink_prefix, format = "plink1") + expect_s4_class(h1, "GenotypeHandle") + expect_equal(h1@format, "plink1") + h2 <- pecotmr:::.resolveGenotypeShard(paste0(plink_prefix, ".bed")) + expect_equal(h2@format, "plink1") +}) + +test_that(".resolveGenotypeShard honours explicit plink2 format and .pgen extension", { + skip_if_not_installed("pgenlibr") + h1 <- pecotmr:::.resolveGenotypeShard(plink_prefix, format = "plink2") + expect_s4_class(h1, "GenotypeHandle") + expect_equal(h1@format, "plink2") + h2 <- pecotmr:::.resolveGenotypeShard(paste0(plink_prefix, ".pgen")) + expect_equal(h2@format, "plink2") +}) + +test_that(".resolveGenotypeShard dispatches gds via explicit format and .gds extension", { + skip_if_not_installed("SNPRelate") + h1 <- pecotmr:::.resolveGenotypeShard(gds_path, format = "gds") + expect_equal(h1@format, "gds") + h2 <- pecotmr:::.resolveGenotypeShard(gds_path) + expect_equal(h2@format, "gds") +}) + +test_that(".resolveGenotypeShard dispatches vcf by extension", { + skip_if_not_installed("VariantAnnotation") + h <- pecotmr:::.resolveGenotypeShard(vcf_path) + expect_equal(h@format, "vcf") +}) + +test_that(".resolveGenotypeShard probes the .pgen sidecar for an extension-less prefix", { + # Only a .pgen sidecar exists (no .bed) so the prefix-probe routes to + # plink2; mock the reader so we exercise the dispatch, not real IO. + p <- tempfile() + on.exit(unlink(paste0(p, ".pgen")), add = TRUE) + file.create(paste0(p, ".pgen")) + local_mocked_bindings(.makePlink2Handle = function(...) "STUB_PLINK2", + .package = "pecotmr") + expect_identical(pecotmr:::.resolveGenotypeShard(p), "STUB_PLINK2") +}) + +test_that(".resolveGenotypeShard errors when the format cannot be determined", { + # No recognized extension and no .bed/.pgen sidecar to probe. + expect_error(pecotmr:::.resolveGenotypeShard(tempfile()), + "cannot determine genotype format") +}) + diff --git a/tests/testthat/test_genotypeIo.R b/tests/testthat/test_genotypeIo.R index ac1be263..d87dcf13 100644 --- a/tests/testthat/test_genotypeIo.R +++ b/tests/testthat/test_genotypeIo.R @@ -19,13 +19,13 @@ test_that("readBim dummy data works",{ test_that("readFam dummy data works",{ example_path <- "test_data/protocol_example.genotype.bed" - res <- readFam(example_path) + res <- pecotmr:::readFam(example_path) expect_equal(nrow(res), 100) }) test_that("openBed dummy data works",{ example_path <- "test_data/protocol_example.genotype.bed" - res <- openBed(example_path) + res <- pecotmr:::openBed(example_path) expect_equal(res$class, "pgen") }) @@ -741,7 +741,7 @@ test_that("matchVariantsToKeep filters to specified variants", { keep_df <- vi[c(1, 5, 10), c("chrom", "pos", "A2", "A1")] vroom::vroom_write(keep_df, keep_file, delim = "\t") - mask <- matchVariantsToKeep(vi, keep_file) + mask <- pecotmr:::matchVariantsToKeep(vi, keep_file) expect_type(mask, "logical") expect_equal(sum(mask), 3L) expect_true(mask[1]) @@ -761,7 +761,7 @@ test_that("matchVariantsToKeep returns all FALSE for non-matching variants", { A2 = c("A", "C"), A1 = c("T", "G")) vroom::vroom_write(keep_df, keep_file, delim = "\t") - mask <- matchVariantsToKeep(vi, keep_file) + mask <- pecotmr:::matchVariantsToKeep(vi, keep_file) expect_true(all(!mask)) }) @@ -776,7 +776,7 @@ test_that("readVariantMetadata reads 6-column bim file", { "1\trs1\t0\t100\tA\tG", "1\trs2\t0\t200\tC\tT" ), tmp) - res <- readVariantMetadata(tmp) + res <- pecotmr:::readVariantMetadata(tmp) expect_equal(nrow(res), 2) expect_true("gpos" %in% names(res)) expect_equal(as.character(res$chrom), c("1", "1")) @@ -790,14 +790,14 @@ test_that("readVariantMetadata reads 9-column bim file", { "1\trs1\t0\t100\tA\tG\t0.5\t0.3\t100", "1\trs2\t0\t200\tC\tT\t0.4\t0.2\t99" ), tmp) - res <- readVariantMetadata(tmp) + res <- pecotmr:::readVariantMetadata(tmp) expect_equal(nrow(res), 2) expect_true(all(c("variance", "allele_freq", "n_nomiss") %in% names(res))) }) test_that("readVariantMetadata delegates to readPvar for .pvar files", { pvar_path <- test_path("test_data", "test_variants.pvar") - res <- readVariantMetadata(pvar_path) + res <- pecotmr:::readVariantMetadata(pvar_path) expect_true(all(c("chrom", "id", "pos", "A1", "A2") %in% names(res))) expect_false("gpos" %in% names(res)) }) @@ -806,7 +806,7 @@ test_that("readVariantMetadata errors on unexpected column count", { tmp <- tempfile(fileext = ".bim") on.exit(unlink(tmp), add = TRUE) writeLines(c("1\trs1\t0\t100\tA"), tmp) - expect_error(readVariantMetadata(tmp), "Unexpected number of columns") + expect_error(pecotmr:::readVariantMetadata(tmp), "Unexpected number of columns") }) # =========================================================================== @@ -821,7 +821,7 @@ test_that("matchVariantsToKeep works with single-column variant ID file", { on.exit(unlink(keep_file), add = TRUE) writeLines(c("1:100:A:G", "1:300:G:A"), keep_file) - mask <- matchVariantsToKeep(vi, keep_file) + mask <- pecotmr:::matchVariantsToKeep(vi, keep_file) expect_type(mask, "logical") expect_equal(sum(mask), 2L) expect_true(mask[1]) @@ -840,7 +840,7 @@ test_that("matchVariantsToKeep uses position-only matching when no alleles", { keep_df <- vi[c(1, 5), c("chrom", "pos")] vroom::vroom_write(keep_df, keep_file, delim = "\t") - mask <- matchVariantsToKeep(vi, keep_file) + mask <- pecotmr:::matchVariantsToKeep(vi, keep_file) expect_type(mask, "logical") expect_equal(sum(mask), 2L) expect_true(mask[1]) @@ -1404,4 +1404,350 @@ test_that("genoMeta meta-file form matches the named-vector form", { expect_equal(.shardDose(hFile, 1:5), .shardDose(hVec, 1:5)) }) +# =========================================================================== +# Coverage top-ups for R/genotypeIo.R: dispatch/handle error stops, empty- and +# missing-chromosome block paths, format-specific extractor edge cases, the +# GDS LD path, .h2DetectFormat / .plinkStem branches, resolvePlink2Paths +# validation, readAfreq zstd guard, readStochasticMeta validation, the plink1 +# getRefVariantInfo path with a multi-row region, and the non-integer-dosage +# warning in loadGenotypeRegion. +# =========================================================================== + +# --- readGenotypes dispatch + handle-constructor file-not-found stops -------- + +test_that("readGenotypes errors on unsupported format", { + expect_error( + readGenotypes("/whatever/path", format = "bogusFormat"), + "Unsupported genotype format: bogusFormat") +}) + +test_that(".makeGdsHandle errors when GDS file is absent", { + skip_if_not_installed("SNPRelate") + skip_if_not_installed("gdsfmt") + expect_error( + pecotmr:::.makeGdsHandle("/no/such/file.gds"), + "GDS file not found") +}) + +test_that(".makeVcfHandle errors when VCF file is absent", { + skip_if_not_installed("VariantAnnotation") + expect_error( + pecotmr:::.makeVcfHandle("/no/such/file.vcf.gz"), + "VCF file not found") +}) + +test_that(".makePlink1Handle errors when plink1 trio is absent", { + skip_if_not_installed("snpStats") + expect_error( + pecotmr:::.makePlink1Handle(file.path(tempdir(), "missingPlink1Prefix")), + "Plink file not found") +}) + +# --- extractBlockGenotypes: NULL-from-extractor guard (line 237) ------------- + +test_that("extractBlockGenotypes returns NULL when extractor yields NULL", { + skip_if_not_installed("VariantAnnotation") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants.vcf.gz"), format = "vcf") + # Force the format-specific extractor to return NULL so the early-return + # guard in extractBlockGenotypes is exercised. + testthat::local_mocked_bindings( + .extractBlockVcf = function(handle, snpIdx) NULL, .package = "pecotmr") + expect_null(extractBlockGenotypes(handle, 1:3)) +}) + +# --- .extractBlockSharded: empty block + missing-chromosome stop ------------- + +test_that("extractBlockGenotypes on a sharded handle handles an empty block", { + skip_if_not_installed("snpStats") + td <- test_path("test_data") + shard <- GenotypeHandle(genoMeta = c( + "21" = file.path(td, "test_variants"), + "22" = file.path(td, "test_variants_chr22"))) + se <- extractBlockGenotypes(shard, integer(0)) + expect_s4_class(se, "SummarizedExperiment") + dosage <- SummarizedExperiment::assay(se, "dosage") + expect_equal(nrow(dosage), 0L) + expect_equal(ncol(dosage), shard@nSamples) + expect_equal(colnames(dosage), shard@sampleIds) +}) + +test_that("sharded extraction errors for a chromosome with no payload", { + skip_if_not_installed("snpStats") + td <- test_path("test_data") + shard <- GenotypeHandle(genoMeta = c( + "21" = file.path(td, "test_variants"), + "22" = file.path(td, "test_variants_chr22"))) + # Drop the chr22 payload but keep its variants in @snpInfo, so routing a + # chr22 request finds no per-chromosome file. + shard@chromPaths <- shard@chromPaths["21"] + idx22 <- which(pecotmr:::.canonChr(shard@snpInfo$CHR) == "22")[1] + expect_error( + extractBlockGenotypes(shard, idx22), + "no per-chromosome file for chromosome") +}) + +# --- .extractBlockGds: NULL/empty result guard (line 336) -------------------- + +test_that(".extractBlockGds returns NULL when snpgdsGetGeno yields NULL", { + skip_if_not_installed("SNPRelate") + skip_if_not_installed("gdsfmt") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants.gds"), format = "gds") + testthat::local_mocked_bindings( + snpgdsGetGeno = function(...) NULL, .package = "SNPRelate") + expect_null(pecotmr:::.extractBlockGds(handle, 1:3)) +}) + +# --- .extractBlockVcf: missing-genotype ("./.") parsing (line 361) ----------- + +.gioMakeMissingGtVcf <- function() { + dir <- tempfile("gioVcf_") + dir.create(dir) + plain <- file.path(dir, "mini.vcf") + writeLines(c( + "##fileformat=VCFv4.2", + "##contig=", + "##FORMAT=", + paste("#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO", + "FORMAT", "S1", "S2", "S3", sep = "\t"), + paste("21", "1000", "v1", "A", "G", ".", ".", ".", "GT", + "0/1", "./.", "1/1", sep = "\t"), + paste("21", "2000", "v2", "C", "T", ".", ".", ".", "GT", + "0|0", "1|1", "./.", sep = "\t")), plain) + bg <- Rsamtools::bgzip(plain, paste0(plain, ".gz"), overwrite = TRUE) + Rsamtools::indexTabix(bg, format = "vcf") + bg +} + +test_that(".extractBlockVcf parses missing genotypes as NA", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Rsamtools") + bg <- .gioMakeMissingGtVcf() + handle <- readGenotypes(bg, format = "vcf") + expect_equal(nrow(handle@snpInfo), 2L) + # meanImpute=FALSE keeps the "./." dosages as NA + se <- extractBlockGenotypes(handle, 1:2, meanImpute = FALSE) + dosage <- SummarizedExperiment::assay(se, "dosage") + expect_true(any(is.na(dosage))) + # Exactly the two "./." entries become NA (one per variant) + expect_equal(sum(is.na(dosage)), 2L) +}) + +# --- .extractBlockPlink2: stale-pointer reopen path (lines 402-403) ---------- + +test_that("plink2 extraction reopens a stale (deserialized) pgen pointer", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants"), format = "plink2") + ref <- SummarizedExperiment::assay(extractBlockGenotypes(handle, 1:5), "dosage") + # Round-tripping through saveRDS/readRDS staleness the cached @pgenPtr; the + # first ReadList errors and the tryCatch reopen path takes over. + tf <- tempfile(fileext = ".rds") + on.exit(unlink(tf), add = TRUE) + saveRDS(handle, tf) + reloaded <- readRDS(tf) + got <- SummarizedExperiment::assay(extractBlockGenotypes(reloaded, 1:5), "dosage") + expect_equal(got, ref) +}) + +# --- computeBlockLdCor: GDS internal path, NULL guard, single-col, computeLd -- + +test_that("computeBlockLdCor uses the native GDS path for internal backend", { + skip_if_not_installed("SNPRelate") + skip_if_not_installed("gdsfmt") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants.gds"), format = "gds") + R <- computeBlockLdCor(handle, 1:6, backend = "internal") + expect_equal(dim(R), c(6L, 6L)) + expect_true(all(is.finite(R))) + expect_false(anyNA(R)) + expect_equal(diag(R), rep(1, 6), tolerance = 1e-6) +}) + +test_that("computeBlockLdCor returns identity when extraction is NULL", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants"), format = "plink2") + testthat::local_mocked_bindings( + extractBlockGenotypes = function(...) NULL, .package = "pecotmr") + R <- computeBlockLdCor(handle, 1:4, backend = "internal") + expect_equal(R, diag(4)) +}) + +test_that("computeBlockLdCor returns identity for a single-variant block", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants"), format = "plink2") + R <- computeBlockLdCor(handle, 1L, backend = "internal") + expect_equal(R, diag(1)) +}) + +test_that("computeBlockLdCor computes a correlation matrix via the general path", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + handle <- readGenotypes(file.path(td, "test_variants"), format = "plink2") + R <- computeBlockLdCor(handle, 1:5, backend = "internal") + expect_equal(dim(R), c(5L, 5L)) + expect_equal(unname(diag(R)), rep(1, 5), tolerance = 1e-6) +}) + +# --- .h2DetectFormat branches ------------------------------------------------ + +test_that(".h2DetectFormat detects .annot.gz as ldsc_annot", { + expect_equal(pecotmr:::.h2DetectFormat("something.annot.gz"), "ldsc_annot") +}) + +test_that(".h2DetectFormat detects a plink1 prefix by .bed sidecar", { + td <- test_path("test_data") + # protocol_example.genotype has .bed/.bim/.fam but no .pgen + expect_equal( + pecotmr:::.h2DetectFormat(file.path(td, "protocol_example.genotype")), + "plink1") +}) + +test_that(".h2DetectFormat detects a gds-only prefix", { + stem <- tempfile("gioGdsStem_") + file.create(paste0(stem, ".gds")) + on.exit(unlink(paste0(stem, ".gds")), add = TRUE) + expect_equal(pecotmr:::.h2DetectFormat(stem), "gds") +}) + +test_that(".h2DetectFormat errors on an unknown extension", { + expect_error( + pecotmr:::.h2DetectFormat(file.path(tempdir(), "thing.qzx")), + "Cannot detect format from extension") +}) + +test_that(".h2DetectFormat errors on an extensionless path with no sidecars", { + expect_error( + pecotmr:::.h2DetectFormat(file.path(tempdir(), "noSuchStemXyz")), + "Cannot detect genotype format for path") +}) + +# --- .plinkStem strips a recognized plink extension -------------------------- + +test_that(".plinkStem strips a known plink extension", { + td <- test_path("test_data") + expect_equal( + pecotmr:::.plinkStem(file.path(td, "test_variants.bed")), + file.path(td, "test_variants")) +}) + +# --- resolvePlink2Paths: .pvar.zst fallback + missing pvar/psam stops -------- + +.gioMakePlink2Stub <- function(which = c("pgen", "pvar", "pvarZst", "psam")) { + dir <- tempfile("gioP2_") + dir.create(dir) + prefix <- file.path(dir, "g") + if ("pgen" %in% which) file.create(paste0(prefix, ".pgen")) + if ("pvar" %in% which) file.create(paste0(prefix, ".pvar")) + if ("pvarZst" %in% which) file.create(paste0(prefix, ".pvar.zst")) + if ("psam" %in% which) file.create(paste0(prefix, ".psam")) + prefix +} + +test_that("resolvePlink2Paths falls back to .pvar.zst", { + prefix <- .gioMakePlink2Stub(c("pgen", "pvarZst", "psam")) + paths <- pecotmr:::resolvePlink2Paths(prefix) + expect_equal(paths$pvar, paste0(prefix, ".pvar.zst")) + expect_equal(paths$pgen, paste0(prefix, ".pgen")) + expect_equal(paths$psam, paste0(prefix, ".psam")) +}) + +test_that("resolvePlink2Paths errors when no .pvar/.pvar.zst exists", { + prefix <- .gioMakePlink2Stub(c("pgen", "psam")) + expect_error( + pecotmr:::resolvePlink2Paths(prefix), + "PLINK2 .pvar\\[.zst\\] file not found") +}) + +test_that("resolvePlink2Paths errors when .psam is missing", { + prefix <- .gioMakePlink2Stub(c("pgen", "pvar")) + expect_error( + pecotmr:::resolvePlink2Paths(prefix), + "PLINK2 .psam file not found") +}) + +# --- readAfreq: zstd CLI guard (line 655) ------------------------------------ + +test_that("readAfreq errors for .afreq.zst when zstd CLI is unavailable", { + skip_if_not_installed("withr") + td <- test_path("test_data") + zdir <- tempfile("gioZst_") + dir.create(zdir) + file.copy(file.path(td, "test_harmonize_regions.afreq.zst"), + file.path(zdir, "g.afreq.zst")) + # Empty PATH makes Sys.which("zstd") return "" so the guard fires before any + # subprocess is launched. + withr::local_envvar(PATH = "") + expect_error( + readAfreq(file.path(zdir, "g")), + "zstd CLI is required") +}) + +# --- readStochasticMeta: generic format missing required columns ------------- + +test_that("readStochasticMeta errors when generic file lacks required columns", { + tmp <- tempfile(fileext = ".tsv") + on.exit(unlink(tmp), add = TRUE) + writeLines(c("foo\tbar", "1\t2"), tmp) + expect_error( + pecotmr:::readStochasticMeta(tmp), + "must contain columns") +}) + +# --- getRefVariantInfo: plink1 .bed path + multi-row region filter ----------- + +test_that("getRefVariantInfo reads variant info from a plink1 source", { + td <- test_path("test_data") + meta_file <- file.path(td, "ld_meta_refinfo_plink1_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("22", "0", "0", "protocol_example.genotype", sep = "\t"), "\n", + file = meta_file, append = TRUE) + info <- getRefVariantInfo(meta_file) # no region -> all variants + expect_true(is.data.frame(info)) + expect_true(all(c("chrom", "id", "pos", "A2", "A1") %in% names(info))) + expect_gt(nrow(info), 0L) +}) + +test_that("getRefVariantInfo applies a multi-row data.frame region filter", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + meta_file <- file.path(td, "ld_meta_refinfo_multirow_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n", + file = meta_file, append = TRUE) + # A two-row region (both chr21) drives the multi-row branch of the filter + # loop; together the rows cover the full block. + region_df <- data.frame( + chrom = c("21", "21"), + start = c(17513228L, 17550001L), + end = c(17550000L, 17592874L), + stringsAsFactors = FALSE) + info <- getRefVariantInfo(meta_file, region = region_df) + expect_true(is.data.frame(info)) + expect_equal(nrow(info), 349L) + expect_true(all(info$pos >= 17513228 & info$pos <= 17592874)) +}) + +# --- loadGenotypeRegion: non-integer dosages without a sidecar (warning) ----- + +test_that("loadGenotypeRegion warns on non-integer dosages without a sidecar", { + skip_if_not_installed("pgenlibr") + td <- test_path("test_data") + # dummy_data is a plink2 fixture with no .afreq / .stochastic_meta sidecar. + # Inject fractional dosages via the extractor so the no-sidecar branch sees + # non-integer values and emits its warning. + testthat::local_mocked_bindings( + .extractBlockPlink2 = function(handle, snpIdx) + matrix(0.5, nrow = handle@nSamples, ncol = length(snpIdx)), + .package = "pecotmr") + expect_warning( + loadGenotypeRegion(file.path(td, "dummy_data")), + "Non-integer genotype values detected") +}) + diff --git a/tests/testthat/test_h2Annotations.R b/tests/testthat/test_h2Annotations.R index 72f24946..c987e44c 100644 --- a/tests/testthat/test_h2Annotations.R +++ b/tests/testthat/test_h2Annotations.R @@ -434,6 +434,82 @@ test_that("readannotations with multiple files creates multi-column matrix", { expect_equal(as.numeric(result@annotations[, "ann2"]), c(0, 1)) }) +# ============================================================================= +# AnnotationMatrix argument guards, BigWig reader, LDSC CHR/BP guard +# ============================================================================= + +test_that("AnnotationMatrix rejects a non-data.frame annotationMeta", { + gr <- make_test_granges(10) + expect_error( + AnnotationMatrix(matrix(0, 10, 3), gr, annotationMeta = list(a = 1)), + "annotationMeta must be a data.frame" + ) +}) + +test_that("AnnotationMatrix rejects annotationMeta missing required columns", { + gr <- make_test_granges(10) + bad_meta <- data.frame(foo = c("a", "b", "c"), stringsAsFactors = FALSE) + expect_error( + AnnotationMatrix(matrix(0, 10, 3), gr, annotationMeta = bad_meta), + "must have columns: name, tier, type" + ) +}) + +test_that(".readBigwigAtSnps returns the mean BigWig score at each SNP", { + skip_if_not_installed("rtracklayer") + gr <- GenomicRanges::GRanges( + "chr1", + IRanges::IRanges(start = c(1, 201, 401), end = c(200, 400, 600)), + score = c(1.5, 2.5, 3.5)) + GenomeInfoDb::seqlengths(gr) <- c(chr1 = 1000L) + bw <- tempfile(fileext = ".bw") + on.exit(unlink(bw), add = TRUE) + rtracklayer::export(gr, bw, format = "bigWig") + + snp_gr <- GenomicRanges::GRanges( + "chr1", IRanges::IRanges(start = c(100, 300, 500, 800), width = 1)) + result <- pecotmr:::.readBigwigAtSnps(bw, snp_gr) + # Positions 100/300/500 fall in the three scored intervals; 800 has no + # coverage and defaults to 0. + expect_equal(result, c(1.5, 2.5, 3.5, 0)) +}) + +test_that("readAnnotations reads a BigWig file as a continuous annotation", { + skip_if_not_installed("rtracklayer") + gr <- GenomicRanges::GRanges( + "chr1", + IRanges::IRanges(start = c(1, 201, 401), end = c(200, 400, 600)), + score = c(1.5, 2.5, 3.5)) + GenomeInfoDb::seqlengths(gr) <- c(chr1 = 1000L) + bw <- tempfile(fileext = ".bw") + on.exit(unlink(bw), add = TRUE) + rtracklayer::export(gr, bw, format = "bigWig") + + snp_gr <- GenomicRanges::GRanges( + "chr1", IRanges::IRanges(start = c(100, 300, 800), width = 1)) + result <- readAnnotations(c(cons = bw), snp_gr) + expect_s4_class(result, "AnnotationMatrix") + # A .bw file is auto-detected as a continuous annotation. + expect_equal(result@annotationMeta$type, "continuous") + expect_equal(as.numeric(result@annotations[, 1]), c(1.5, 2.5, 0)) +}) + +test_that(".readLdscAnnot errors when CHR/BP columns are absent", { + # The annotation column is present but the positional CHR/BP columns are not. + annot_df <- data.frame( + SNP = c("rs1", "rs2"), CM = 0, my_annot = c(1, 0), + stringsAsFactors = FALSE) + annot_file <- tempfile(fileext = ".annot") + on.exit(unlink(annot_file), add = TRUE) + write.table(annot_df, annot_file, sep = "\t", row.names = FALSE, quote = FALSE) + snp_gr <- GenomicRanges::GRanges( + "chr1", IRanges::IRanges(start = 100, width = 1)) + expect_error( + pecotmr:::.readLdscAnnot(annot_file, snp_gr, "my_annot"), + "must contain CHR and BP columns" + ) +}) + # ============================================================================= # readsumstats edge cases (h2_sumstats.R) # ============================================================================= diff --git a/tests/testthat/test_h2EstimationWrappers.R b/tests/testthat/test_h2EstimationWrappers.R index 42be6b1a..a0c78057 100644 --- a/tests/testthat/test_h2EstimationWrappers.R +++ b/tests/testthat/test_h2EstimationWrappers.R @@ -1364,3 +1364,340 @@ test_that("gldscUnivariate with annotations returns scoreStats", { expect_true("annotationNames" %in% names(res$scoreStats)) expect_equal(res$scoreStats$annotationNames, "candidate1") }) + +# =========================================================================== +# Coverage-gap tests (appended) +# +# Targets the specific uncovered branches in R/h2EstimationWrappers.R: +# bplapplyBlocks, checkGenomeBuild (unknown type), weightedLsRidge +# (vector X ridge path), metaRandomEffects (length mismatch), .fglsSolve +# (diagonal path), .gldscLocal (fine-grained blocks), .hdlLocal (p<3 and +# tau=NULL), .lderLocalH2 (small block + baseline path), .hdlSeFisher- +# Stratified / .hdlJackknifeTau (lambda>0 + singular fallback), the +# multi-candidate (cor) and no-candidate (NULL) paths of the stratified +# score-statistic helpers, h2EstimateToSldscTrait (unnamed tauBlocks), and +# the multi-study estimateH2 dispatch error. +# =========================================================================== + +# Flexible annotation builder: one baseline column (all-ones or spatial +# half-on/half-off) plus `nCand` random binary candidate columns. +makeCoverageAnnot <- function(n_snps, baseline = "ones", nCand = 1L, + seed = 321) { + set.seed(seed) + snp_gr <- GenomicRanges::GRanges( + seqnames = rep("chr1", n_snps), + ranges = IRanges::IRanges( + start = seq(50, by = 100, length.out = n_snps), width = 1L)) + base_col <- if (identical(baseline, "spatial")) { + as.numeric(seq_len(n_snps) <= n_snps / 2) + } else { + rep(1, n_snps) + } + mat <- matrix(base_col, ncol = 1) + meta <- data.frame(name = "baseline1", tier = "baseline", + type = "binary", stringsAsFactors = FALSE) + if (nCand > 0L) { + cand <- vapply(seq_len(nCand), + function(k) as.numeric(rbinom(n_snps, 1, 0.3 + 0.1 * k)), + numeric(n_snps)) + mat <- cbind(mat, cand) + meta <- rbind(meta, data.frame( + name = paste0("cand", seq_len(nCand)), + tier = "candidate", type = "binary", stringsAsFactors = FALSE)) + } + colnames(mat) <- meta$name + AnnotationMatrix(mat, snp_gr, meta, genome = "hg19") +} + +# --------------------------------------------------------------------------- +# bplapplyBlocks +# --------------------------------------------------------------------------- + +test_that("bplapplyBlocks applies FUN per block with default BPPARAM", { + skip_if_not_installed("BiocParallel") + res <- pecotmr:::bplapplyBlocks(list(1, 2, 3), function(i, ...) i^2) + expect_equal(res, list(1, 4, 9)) +}) + +test_that("bplapplyBlocks works with SerialParam and extra args", { + skip_if_not_installed("BiocParallel") + res <- pecotmr:::bplapplyBlocks( + list(2, 3), function(i, k) i + k, + BPPARAM = BiocParallel::SerialParam(), k = 10) + expect_equal(res, list(12, 13)) +}) + +# --------------------------------------------------------------------------- +# checkGenomeBuild — unknown object type branch +# --------------------------------------------------------------------------- + +test_that("checkGenomeBuild errors on an unrecognized object type", { + expect_error( + pecotmr:::checkGenomeBuild(list(genome = "hg19")), + "Unknown object type" + ) +}) + +# --------------------------------------------------------------------------- +# weightedLsRidge — vector X inside the ridge (lambda > 0) path +# --------------------------------------------------------------------------- + +test_that("weightedLsRidge converts a vector X to a matrix in the ridge path", { + set.seed(1) + n <- 40 + x <- rnorm(n) + y <- 2 * x + rnorm(n, sd = 0.1) + w <- rep(1, n) + res <- pecotmr:::weightedLsRidge(y, x, w, lambda = 5) + expect_length(res$coef, 1) + expect_true(is.finite(res$coef[1])) + # single column => penalized (p > 1 is FALSE), so coef is shrunk vs OLS + res0 <- pecotmr:::weightedLsRidge(y, x, w, lambda = 0) + expect_true(abs(res$coef[1]) < abs(res0$coef[1])) +}) + +# --------------------------------------------------------------------------- +# metaRandomEffects — length-mismatch error +# --------------------------------------------------------------------------- + +test_that("metaRandomEffects errors when means and ses differ in length", { + expect_error( + pecotmr:::metaRandomEffects(c(1, 2), c(1, 2, 3)), + "must have the same length" + ) +}) + +# --------------------------------------------------------------------------- +# .fglsSolve — diagonal (numeric) precision path +# --------------------------------------------------------------------------- + +test_that(".fglsSolve with numeric (diagonal) weights matches weightedLs", { + set.seed(2) + n <- 30 + X <- cbind(rnorm(n), 1) + y <- X %*% c(1.5, 0.5) + rnorm(n, sd = 0.1) + w <- rep(1, n) + res <- pecotmr:::.fglsSolve(y, X, w) + ref <- pecotmr:::weightedLs(y, X, w) + expect_equal(res$coef, ref$coef) +}) + +test_that(".fglsSolve converts a vector X to a matrix", { + set.seed(3) + n <- 20 + x <- rnorm(n) + y <- 2 * x + rnorm(n, sd = 0.1) + res <- pecotmr:::.fglsSolve(y, x, rep(1, n)) + expect_length(res$coef, 1) + expect_true(is.finite(res$coef[1])) +}) + +# --------------------------------------------------------------------------- +# .gldscLocal — fine-grained (> 22) LD blocks +# --------------------------------------------------------------------------- + +test_that(".gldscLocal computes per-block local h2 with fine-grained blocks", { + ld_ref <- make_test_score_ref(nSnps = 46, nBlocks = 23) + set.seed(7) + z <- rnorm(46) + res <- pecotmr:::.gldscLocal(z, n = 50000, ldRef = ld_ref, + h2 = 0.3, intercept = 0.001) + expect_s3_class(res, "data.frame") + expect_true(all(c("blockId", "h2Local", "h2LocalSe") %in% colnames(res))) + expect_equal(nrow(res), 23L) + expect_true(all(is.finite(res$h2Local))) + expect_true(all(res$h2LocalSe > 0)) +}) + +# --------------------------------------------------------------------------- +# .hdlLocal — p < 3 (NA row) and tau = NULL (no-baseline) branches +# --------------------------------------------------------------------------- + +test_that(".hdlLocal returns an NA row for blocks with fewer than 3 SNPs", { + bd_small <- list(zRot = c(0.5, -0.3), d = c(1.2, 0.8), + ldAnnot = NULL, snpIdx = 1:2, p = 2) + res <- pecotmr:::.hdlLocal(list(bd_small), n = 50000, M = 100, + tau = 0.3, baselineMat = NULL) + expect_true(is.na(res$h2Local[1])) + expect_true(is.na(res$h2LocalSe[1])) +}) + +test_that(".hdlLocal handles tau = NULL (no baseline sigma2) path", { + set.seed(8) + bd <- list(zRot = rnorm(5), d = c(2, 1.5, 1, 0.8, 0.5), + ldAnnot = NULL, snpIdx = 1:5, p = 5) + res <- pecotmr:::.hdlLocal(list(bd), n = 1000, M = 100, + tau = NULL, baselineMat = NULL) + expect_s3_class(res, "data.frame") + expect_true(is.finite(res$h2Local[1])) + expect_true(is.finite(res$h2LocalSe[1])) +}) + +# --------------------------------------------------------------------------- +# .lderLocalH2 — small block (< 3 eigenvalues) + baseline path via top-level +# --------------------------------------------------------------------------- + +test_that(".lderLocalH2 returns NA for blocks with fewer than 3 eigenvalues", { + bd <- list(list(n_snps = 2, eigenvalues = c(1.1, 0.9), + chi2Rot = c(1.5, 0.8), ldAnnot = NULL)) + res <- pecotmr:::.lderLocalH2(bd, n = 50000, M = 100, tau = 0.3, + aGlobal = 0, baselineMat = NULL) + expect_true(is.na(res$h2Local[1])) + expect_true(is.na(res$h2LocalSe[1])) +}) + +test_that("lderUnivariate with annotations and local = TRUE returns local h2", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 1L) + res <- pecotmr:::lderUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, + annotations = annot, local = TRUE) + expect_true(is.data.frame(res$local)) + expect_true("h2Local" %in% colnames(res$local)) + expect_equal(nrow(res$local), 5L) +}) + +# --------------------------------------------------------------------------- +# .hdlSeFisherStratified / .hdlJackknifeTau — lambda > 0 ridge penalty +# --------------------------------------------------------------------------- + +test_that("hdlUnivariate with annotations and lambda > 0 exercises ridge penalty", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 1L) + suppressWarnings( + res <- pecotmr:::hdlUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, + annotations = annot, lambda = 1) + ) + expect_true(is.finite(res$h2)) + expect_true(is.matrix(res$tauBlocks)) + expect_true(all(is.finite(res$tauSe))) +}) + +# --------------------------------------------------------------------------- +# .hdlSeFisherStratified / .hdlJackknifeTau — singular info-matrix fallback +# (zero eigenvalues => zero Fisher information => solve() fails) +# --------------------------------------------------------------------------- + +test_that(".hdlSeFisherStratified falls back when the info matrix is singular", { + bd <- list(list(zRot = c(0.1, 0.2), d = c(0, 0), + ldAnnot = matrix(1, nrow = 2, ncol = 2))) + baselineMat <- matrix(1, nrow = 2, ncol = 2) + res <- pecotmr:::.hdlSeFisherStratified( + tau = c(0.1, 0.1), blockData = bd, n = 1000, M = 100, + baselineMat = baselineMat, lambda = 0) + expect_true(all(is.finite(res$tauSe))) + expect_true(is.finite(res$h2Se)) +}) + +test_that(".hdlJackknifeTau falls back when the info matrix is singular", { + bd <- list( + list(zRot = c(0.1, 0.2), d = c(0, 0), ldAnnot = matrix(1, 2, 2)), + list(zRot = c(0.3, 0.1), d = c(0, 0), ldAnnot = matrix(1, 2, 2))) + res <- pecotmr:::.hdlJackknifeTau( + tau = c(0.1, 0.1), blockData = bd, n = 1000, M = 100, + baselineMat = matrix(1, nrow = 4, ncol = 2), lambda = 0) + expect_true(is.matrix(res$looEstimates)) + expect_true(all(is.finite(res$tauSe))) +}) + +# --------------------------------------------------------------------------- +# Stratified score helpers — multiple-candidate (cor) branch +# --------------------------------------------------------------------------- + +test_that("lderUnivariate with 2 candidates returns a 2x2 score correlation", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 2L) + res <- pecotmr:::lderUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, annotations = annot) + expect_length(res$scoreStats$z, 2L) + expect_equal(dim(res$scoreStats$R), c(2L, 2L)) +}) + +test_that("hdlUnivariate with 2 candidates returns a 2x2 score correlation", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 2L) + suppressWarnings( + res <- pecotmr:::hdlUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, annotations = annot) + ) + expect_length(res$scoreStats$z, 2L) + expect_equal(dim(res$scoreStats$R), c(2L, 2L)) +}) + +test_that("gldscUnivariate with 2 candidates returns a 2x2 score correlation", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "spatial", nCand = 2L) + res <- pecotmr:::gldscUnivariate(dat_annot$z, dat_annot$n, + dat_annot$ld_score_ref, annotations = annot) + expect_length(res$scoreStats$z, 2L) + expect_equal(dim(res$scoreStats$R), c(2L, 2L)) +}) + +# --------------------------------------------------------------------------- +# Stratified score helpers — no-candidate (NULL scoreStats) branch +# --------------------------------------------------------------------------- + +test_that("lderUnivariate with baseline-only annotations yields NULL scoreStats", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 0L) + res <- pecotmr:::lderUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, annotations = annot) + expect_null(res$scoreStats) +}) + +test_that("hdlUnivariate with baseline-only annotations yields NULL scoreStats", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "ones", nCand = 0L) + suppressWarnings( + res <- pecotmr:::hdlUnivariate(dat_annot$z, dat_annot$n, + dat_annot$eigen_ref, annotations = annot) + ) + expect_null(res$scoreStats) +}) + +test_that("gldscUnivariate with baseline-only annotations yields NULL scoreStats", { + annot <- makeCoverageAnnot(dat_annot$n_snps, baseline = "spatial", nCand = 0L) + res <- pecotmr:::gldscUnivariate(dat_annot$z, dat_annot$n, + dat_annot$ld_score_ref, annotations = annot) + expect_null(res$scoreStats) +}) + +# --------------------------------------------------------------------------- +# h2EstimateToSldscTrait — tauBlocks present but without column names +# --------------------------------------------------------------------------- + +test_that("h2EstimateToSldscTrait assigns category names to unnamed tauBlocks", { + h2_obj <- make_test_h2estimate(with_enrichment = TRUE) + tb <- h2_obj@tauBlocks + colnames(tb) <- NULL + h2_obj@tauBlocks <- tb + result <- h2EstimateToSldscTrait(h2_obj) + expect_equal(colnames(result$tauBlocks), c("annot1", "annot2")) + expect_equal(result$nBlocks, 10L) +}) + +# --------------------------------------------------------------------------- +# estimateH2 dispatch — `study` required for a multi-study collection +# --------------------------------------------------------------------------- + +test_that("estimateH2 errors when study is omitted for a multi-study collection", { + eigen_ref <- make_test_eigen_ref() + n_snps <- nrow(eigen_ref@snpInfo) + set.seed(321) + df <- data.frame( + SNP = eigen_ref@snpInfo$SNP, + CHR = sub("^chr", "", eigen_ref@snpInfo$CHR), + BP = eigen_ref@snpInfo$BP, + A1 = eigen_ref@snpInfo$A1, + A2 = eigen_ref@snpInfo$A2, + Z = rnorm(n_snps), + N = rep(50000, n_snps), + stringsAsFactors = FALSE + ) + gr <- .dfToGwasGr(df) + ss2 <- GwasSumStats( + study = c("studyA", "studyB"), + entry = list(gr, gr), + genome = "hg19", + ldSketch = make_test_gwas_genotype_handle(), + varY = NA_real_ + ) + expect_error( + estimateH2(ss2, eigen_ref, method = "lder"), + "is required when" + ) +}) diff --git a/tests/testthat/test_jointEngine.R b/tests/testthat/test_jointEngine.R new file mode 100644 index 00000000..a1e84a4b --- /dev/null +++ b/tests/testthat/test_jointEngine.R @@ -0,0 +1,1140 @@ +# Tests for R/jointEngine.R — the unified joint-analysis engine. A joint fit over +# N conditions is SLICED into N per-context rows (real study/context/trait) that +# carry the ";"-joined co-fit members in jointStudies/jointContexts/jointTraits +# as provenance — exactly like running the univariate method per context, except +# the per-context rows share PIP/CS (fm) / the joint fit (twas). The fits are +# mocked so these assert the engine wiring (per-context expansion), not the fit. + +.je_mkGroup <- function(tid, n = 10L) { + X <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("v1", "v2"))) + Y <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("c1", "c2"))) + new("IndividualJointGroup", + conditions = data.frame(study = "S", context = c("c1", "c2"), + trait = tid, stringsAsFactors = FALSE), + X = X, Y = Y) +} + +.je_synthCell <- function(groups) { + new("JointDispatchCell", pattern = "context", dataForm = "individual", + enumerate = function(data, scope, args) groups, minGroup = 2L) +} + +# Mock postprocess: called once per condition (the fitter passes conditionIdx); +# returns one FineMappingEntry, so the fitter yields one entry per condition. +.je_mockPostprocess <- function(fit, method, dataX, dataY, coverage, + secondaryCoverage, signalCutoff, minAbsCorr, + csInput = NULL, af = NULL, region = NULL, + conditionIdx = NULL) { + vids <- colnames(dataX) + FineMappingEntry( + variantIds = vids, + susieFit = list(method = method, cond = conditionIdx), + topLoci = data.frame(variant_id = vids, + pip = seq(0.9, by = -0.1, length.out = length(vids)), + stringsAsFactors = FALSE)) +} + +test_that(".runJointCell: cross-context FM expands to per-context rows", { + set.seed(1) + cell <- .je_synthCell(list(.je_mkGroup("G1"), .je_mkGroup("G2"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, cvFolds = 0)) + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings( + fitMvsusie = function(...) list(), + .fmPostprocessOne = .je_mockPostprocess, + .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mvsusie") + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 4L) # 2 genes x 2 ctx + expect_equal(as.character(res$context), c("c1", "c2", "c1", "c2")) # REAL + expect_equal(as.character(res$trait), c("G1", "G1", "G2", "G2")) + expect_equal(as.character(res$method), rep("mvsusie", 4L)) + expect_equal(as.character(res$jointContexts), rep("c1;c2", 4L)) # provenance +}) + +test_that(".runJointCell: cross-context FM uses the per-fold mr.mash CV prior", { + set.seed(2) + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, cvFolds = 2)) + + # Prior mr.mash CV payload. Looked up by the FIXED axes (study=S, trait=G1) + # with context match-any, so a per-context OR a legacy "joint" row both match. + U <- list(K = diag(2)) + fp <- list(dataDrivenPriorMatrices = list(U = U, w = c(K = 1)), + w0 = c(K_grid1 = 1), V = diag(2)) + sp <- data.frame(Sample = paste0("s", 1:10), Fold = rep(1:2, each = 5), + stringsAsFactors = FALSE) + cvPayload <- list(samplePartition = sp, + foldFits = list(fold_1 = fp, fold_2 = fp)) + twEntry <- TwasWeightsEntry(variantIds = c("v1", "v2"), weights = c(0.1, 0.2), + fits = fp, cvResult = cvPayload) + tw <- TwasWeights(study = "S", context = "c1", trait = "G1", + method = "mrmash", entry = list(twEntry)) + + captured <- NULL + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings( + rescaleCovW0 = function(w0) c(K = 1), + fitMvsusie = function(...) list(), + .fmPostprocessOne = .je_mockPostprocess, + .fmCrossValidate = function(X, Y, tokens, methodArgs, fold, + samplePartition = NULL, coverage = 0.95, + pos = NULL, verbose = 1, mvPrior = NULL, + mvPriorCv = NULL) { + captured <<- list(mvPriorCv = mvPriorCv, samplePartition = samplePartition) + list(samplePartition = samplePartition, prediction = list(), + performance = list()) + }, + .fmSliceCv = function(cv, token) cv, + .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mvsusie", args = list(twasWeights = tw)) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 2L) # per-context rows + # Per-fold priors built (one per fold) + threaded into CV, sharing the folds. + expect_false(is.null(captured$mvPriorCv)) + expect_equal(names(captured$mvPriorCv), c("1", "2")) + expect_identical(captured$samplePartition, sp) +}) + +# ---- twas column (mr.mash) -------------------------------------------------- + +.je_fakeMrmashFit <- function() { + list(dataDrivenPriorMatrices = list(U = list(K = diag(2)), w = c(K = 1)), + w0 = c(K_grid1 = 1), V = diag(2)) +} + +.je_mockLearnTwas <- function(X, Y, weightMethods, study, context, trait, + retainFits, retainFitDetail, standardized, + dataType, verbose, ...) { + W <- matrix(0.1, ncol(X), ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + e <- TwasWeightsEntry(variantIds = colnames(X), weights = W, + fits = .je_fakeMrmashFit()) + TwasWeights(study = study, context = context, trait = trait, + method = "mrmash", entry = list(e)) +} + +.je_mockTwasCv <- function(X, Y, fold, samplePartitions = NULL, weightMethods, + retainFits, ..., verbose) { + sp <- data.frame(Sample = rownames(X), + Fold = rep(1:2, length.out = nrow(X)), + stringsAsFactors = FALSE) + metric6 <- c("corr", "rsq", "adj_rsq", "pval", "RMSE", "MAE") + list(samplePartition = sp, + prediction = list(mrmash_predicted = matrix( + 0, nrow(X), ncol(Y), dimnames = list(rownames(X), colnames(Y)))), + performance = list(mrmash_performance = matrix( + 0, ncol(Y), 6, dimnames = list(colnames(Y), metric6))), + foldFits = list(fold_1 = list(mrmash_weights = .je_fakeMrmashFit()), + fold_2 = list(mrmash_weights = .je_fakeMrmashFit()))) +} + +test_that(".runJointCell: cross-context twas expands to per-context weight vectors", { + set.seed(3) + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("TwasJointPipeline", config = list(cvFolds = 0)) + local_mocked_bindings(learnTwasWeights = .je_mockLearnTwas, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$context), c("c1", "c2")) + expect_equal(as.character(res$trait), c("G1", "G1")) + expect_equal(as.character(res$jointContexts), c("c1;c2", "c1;c2")) + # Each row carries that context's weight VECTOR (the matrix column). + w1 <- getWeights(res$entry[[1L]]) + expect_false(is.matrix(w1)); expect_length(w1, 2L) + expect_false(is.null(getFits(res$entry[[1L]]))) # shared joint fit on each +}) + +test_that(".runJointCell: cross-context twas attaches per-condition CV slices", { + set.seed(4) + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("TwasJointPipeline", config = list(cvFolds = 2)) + local_mocked_bindings(learnTwasWeights = .je_mockLearnTwas, + twasWeightsCv = .je_mockTwasCv, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash", + args = list(dataDrivenPriorMatricesCv = list(1, 2))) + expect_equal(nrow(res), 2L) + cv <- getCvResult(res$entry[[1L]]) + expect_equal(names(cv$foldFits), c("fold_1", "fold_2")) # shared per-fold fits + expect_false(is.null(cv$predictions)) # this context's slice + expect_false(is.null(cv$samplePartition)) + expect_false(is.matrix(getWeights(res$entry[[1L]]))) # per-context vector +}) + +test_that(".runJointCell: cross-context twas CV-only rows (fitFullData=FALSE)", { + set.seed(5) + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("TwasJointPipeline", config = list(cvFolds = 2, fitFullData = FALSE)) + local_mocked_bindings(twasWeightsCv = .je_mockTwasCv, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_equal(nrow(res), 2L) + e <- res$entry[[1L]] + expect_length(getVariantIds(e), 0L) # placeholder weights + expect_null(getWeights(e)) + expect_equal(names(getCvResult(e)$foldFits), c("fold_1", "fold_2")) +}) + +# ---- sumstats column (RSS; no sample folds) --------------------------------- + +.je_mkSsGroup <- function(tid, p = 3L, k = 2L) { + Z <- matrix(rnorm(p * k), p, k, + dimnames = list(paste0("v", seq_len(p)), paste0("c", seq_len(k)))) + R <- diag(p); dimnames(R) <- list(paste0("v", seq_len(p)), paste0("v", seq_len(p))) + new("SumStatsJointGroup", + conditions = data.frame(study = "S", context = paste0("c", seq_len(k)), + trait = tid, stringsAsFactors = FALSE), + Z = Z, R = R, N = c(100, 120)) +} + +.je_ssCell <- function(groups) { + new("JointDispatchCell", pattern = "context", dataForm = "sumstats", + enumerate = function(data, scope, args) groups, minGroup = 2L) +} + +test_that(".runJointCell: cross-context FM sumstats (mvsusie_rss) -> per-context", { + set.seed(6) + cell <- .je_ssCell(list(.je_mkSsGroup("G1"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + captured <- NULL + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings( + fitMvsusieRss = function(Z, R, N, prior_variance, coverage, ...) { + captured <<- list(N = N); list() }, + .fmPostprocessOne = .je_mockPostprocess, + .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mvsusie") + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$context), c("c1", "c2")) + expect_equal(as.character(res$jointContexts), c("c1;c2", "c1;c2")) + expect_equal(captured$N, 110) # median(c(100, 120)) passed once to mvsusie_rss +}) + +test_that(".runJointCell: cross-context twas sumstats (mr.mash.rss) -> per-context", { + set.seed(7) + cell <- .je_ssCell(list(.je_mkSsGroup("G1"))) + pipe <- new("TwasJointPipeline", config = list()) + local_mocked_bindings( + mrmashRssWeights = function(stat, LD, retainFit, fitDetail) { + W <- matrix(0.2, nrow(LD), ncol(stat$z), + dimnames = list(rownames(LD), colnames(stat$z))) + attr(W, "fit") <- .je_fakeMrmashFit() + W + }, + .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$context), c("c1", "c2")) + expect_equal(as.character(res$jointContexts), c("c1;c2", "c1;c2")) + expect_false(is.matrix(getWeights(res$entry[[1L]]))) + expect_false(is.null(getFits(res$entry[[1L]]))) +}) + +test_that(".lookupJointCell: present cells resolve, absent cells error", { + for (df in c("individual", "sumstats")) { + expect_s4_class(pecotmr:::.lookupJointCell("context", df), "JointDispatchCell") + expect_s4_class(pecotmr:::.lookupJointCell("trait", df), "JointDispatchCell") + } + expect_s4_class(pecotmr:::.lookupJointCell("study", "sumstats"), + "JointDispatchCell") + expect_error(pecotmr:::.lookupJointCell("study", "individual"), + "No joint dispatch cell") +}) + +# ---- cross-study pattern (study jointed; sumstats-only) --------------------- + +test_that(".runJointCell: cross-study (twas sumstats) -> per-study rows + jointStudies", { + set.seed(10) + Z <- matrix(rnorm(6), 3, 2, dimnames = list(paste0("v", 1:3), c("S1", "S2"))) + R <- diag(3); dimnames(R) <- list(paste0("v", 1:3), paste0("v", 1:3)) + grp <- new("SumStatsJointGroup", + conditions = data.frame(study = c("S1", "S2"), context = "brain", + trait = "G1", stringsAsFactors = FALSE), + Z = Z, R = R, N = c(100, 120)) + cell <- new("JointDispatchCell", pattern = "study", dataForm = "sumstats", + enumerate = function(data, scope, args) list(grp), minGroup = 2L) + pipe <- new("TwasJointPipeline", config = list()) + local_mocked_bindings( + mrmashRssWeights = function(stat, LD, retainFit, fitDetail) { + W <- matrix(0.2, nrow(LD), ncol(stat$z), + dimnames = list(rownames(LD), colnames(stat$z))) + attr(W, "fit") <- .je_fakeMrmashFit(); W + }, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$study), c("S1", "S2")) # study is the jointed axis + expect_equal(as.character(res$context), c("brain", "brain")) + expect_equal(as.character(res$trait), c("G1", "G1")) + expect_equal(as.character(res$jointStudies), c("S1;S2", "S1;S2")) +}) + +# ---- composed pattern (>1 axis varies) -------------------------------------- + +test_that(".runJointCell: composed (context + trait vary) -> per-tuple rows", { + set.seed(11) + n <- 10L + X <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("v1", "v2"))) + Y <- matrix(rnorm(n * 3), n, 3, + dimnames = list(paste0("s", seq_len(n)), c("c1:gA", "c1:gB", "c2:gA"))) + conds <- data.frame(study = "S", context = c("c1", "c1", "c2"), + trait = c("gA", "gB", "gA"), stringsAsFactors = FALSE) + grp <- new("IndividualJointGroup", conditions = conds, X = X, Y = Y) + cell <- new("JointDispatchCell", pattern = "composed", dataForm = "individual", + enumerate = function(data, scope, args) list(grp), minGroup = 2L) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, cvFolds = 0)) + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings(fitMvsusie = function(...) list(), + .fmPostprocessOne = .je_mockPostprocess, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mvsusie") + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 3L) # one row per tuple + expect_equal(as.character(res$study), rep("S", 3L)) # constant -> fixed + expect_equal(as.character(res$context), c("c1", "c1", "c2")) # real per-tuple + expect_equal(as.character(res$trait), c("gA", "gB", "gA")) + # Both varying axes carry their provenance member list on every row. + expect_equal(as.character(res$jointContexts), rep("c1;c2", 3L)) + expect_equal(as.character(res$jointTraits), rep("gA;gB", 3L)) +}) + +# ---- cross-trait pattern (trait jointed, context fixed) --------------------- + +.je_mkTraitGroup <- function(cx, n = 10L) { + X <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("v1", "v2"))) + Y <- matrix(rnorm(n * 2), n, 2, # conditions are traits here + dimnames = list(paste0("s", seq_len(n)), c("G1", "G2"))) + new("IndividualJointGroup", + conditions = data.frame(study = "S", context = cx, + trait = c("G1", "G2"), stringsAsFactors = FALSE), + X = X, Y = Y) +} + +.je_traitCell <- function(groups) { + new("JointDispatchCell", pattern = "trait", dataForm = "individual", + enumerate = function(data, scope, args) groups, minGroup = 2L) +} + +test_that(".runJointCell: cross-trait FM -> per-trait rows (context fixed)", { + set.seed(8) + cell <- .je_traitCell(list(.je_mkTraitGroup("brain"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, cvFolds = 0)) + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings(fitMvsusie = function(...) list(), + .fmPostprocessOne = .je_mockPostprocess, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mvsusie") + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$context), c("brain", "brain")) # fixed + expect_equal(as.character(res$trait), c("G1", "G2")) # real per-trait + expect_equal(as.character(res$jointTraits), c("G1;G2", "G1;G2")) +}) + +test_that(".runJointCell: cross-trait twas -> per-trait weight vectors", { + set.seed(9) + cell <- .je_traitCell(list(.je_mkTraitGroup("brain"))) + pipe <- new("TwasJointPipeline", config = list(cvFolds = 0)) + local_mocked_bindings(learnTwasWeights = .je_mockLearnTwas, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) + expect_equal(as.character(res$context), c("brain", "brain")) + expect_equal(as.character(res$trait), c("G1", "G2")) + expect_equal(as.character(res$jointTraits), c("G1;G2", "G1;G2")) + expect_false(is.matrix(getWeights(res$entry[[1L]]))) +}) + +test_that("fitJointGroup(Individual, Fm): fsusie returns one entry per trait", { + set.seed(12) + n <- 10L + X <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("v1", "v2"))) + Y <- matrix(rnorm(n * 2), n, 2, + dimnames = list(paste0("s", seq_len(n)), c("G1", "G2"))) + grp <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = "brain", + trait = c("G1", "G2"), stringsAsFactors = FALSE), + X = X, Y = Y, pos = c(100, 200)) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + captured <- NULL + local_mocked_bindings( + fitFsusie = function(...) { captured <<- list(...); list() }, + .fmPostprocessOne = .je_mockPostprocess, .package = "pecotmr") + + entries <- pecotmr:::fitJointGroup(grp, pipe, "fsusie", list()) + expect_type(entries, "list") + expect_length(entries, 2L) # one per trait + expect_s4_class(entries[[1L]], "FineMappingEntry") + expect_equal(captured$pos, c(100, 200)) # functional domain threaded +}) + +test_that("fitJointGroup(Individual, Fm): fsusie without pos errors; unknown token errors", { + X <- matrix(0, 6, 2, dimnames = list(paste0("s", 1:6), c("v1", "v2"))) + Y <- matrix(0, 6, 2, dimnames = list(paste0("s", 1:6), c("G1", "G2"))) + grp <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = "brain", + trait = c("G1", "G2"), stringsAsFactors = FALSE), + X = X, Y = Y) # no pos + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + expect_error(pecotmr:::fitJointGroup(grp, pipe, "fsusie", list()), "pos") + expect_error(pecotmr:::fitJointGroup(grp, pipe, "bogus", list()), + "unsupported token") +}) + +test_that(".runJointCell: composed/sumstats (context+trait vary) -> per-tuple rows", { + set.seed(13) + Z <- matrix(rnorm(9), 3, 3, + dimnames = list(paste0("v", 1:3), c("c1:gA", "c1:gB", "c2:gA"))) + R <- diag(3); dimnames(R) <- list(paste0("v", 1:3), paste0("v", 1:3)) + conds <- data.frame(study = "S", context = c("c1", "c1", "c2"), + trait = c("gA", "gB", "gA"), stringsAsFactors = FALSE) + grp <- new("SumStatsJointGroup", conditions = conds, Z = Z, R = R, + N = c(100, 100, 120)) + cell <- new("JointDispatchCell", pattern = "composed", dataForm = "sumstats", + enumerate = function(data, scope, args) list(grp), minGroup = 2L) + pipe <- new("TwasJointPipeline", config = list()) + local_mocked_bindings( + mrmashRssWeights = function(stat, LD, retainFit, fitDetail) { + W <- matrix(0.2, nrow(LD), ncol(stat$z), + dimnames = list(rownames(LD), colnames(stat$z))) + attr(W, "fit") <- .je_fakeMrmashFit(); W + }, .package = "pecotmr") + + res <- pecotmr:::.runJointCell(cell, pipe, data = NULL, scope = NULL, + tokens = "mrmash") + expect_equal(nrow(res), 3L) + expect_equal(as.character(res$context), c("c1", "c1", "c2")) + expect_equal(as.character(res$trait), c("gA", "gB", "gA")) + expect_equal(as.character(res$jointContexts), rep("c1;c2", 3L)) + expect_equal(as.character(res$jointTraits), rep("gA;gB", 3L)) + expect_false(is.matrix(getWeights(res$entry[[1L]]))) +}) + +# ---- SR-TWAS ensemble layer (.twasEnsembleLayer) ---------------------------- +# The ensemble orchestration formerly inside .twasWeightsPipelineMatrix is now a +# LAYER ON TOP of per-method fitting: per condition, read each method's retained +# out-of-fold CV predictions + weights + R^2, drop methods below the cutoff +# (needs >= 2), and stack via `ensembleWeights` per context. + +.je_ensGroup <- function(n = 40L, nCond = 2L) { + samp <- paste0("s", seq_len(n)) + new("IndividualJointGroup", + conditions = data.frame(study = "S", context = paste0("c", seq_len(nCond)), + trait = "g", stringsAsFactors = FALSE), + X = matrix(0, n, 3, dimnames = list(samp, paste0("v", 1:3))), + Y = matrix(rnorm(n * nCond), n, nCond, + dimnames = list(samp, paste0("c", seq_len(nCond))))) +} +# One method's per-condition entries; CV predictions correlate with Y by predCor +# so the R^2 the layer reads is controllable. +.je_ensEntries <- function(group, predCor) { + Y <- group@Y; vars <- colnames(group@X) + lapply(seq_len(ncol(Y)), function(r) { + pr <- predCor * Y[, r] + rnorm(nrow(Y), sd = 0.3); names(pr) <- rownames(Y) + rsq <- stats::cor(Y[, r], pr)^2 + TwasWeightsEntry(variantIds = vars, weights = rnorm(length(vars)), + cvResult = list(samplePartition = NULL, predictions = pr, + metrics = c(corr = sqrt(rsq), rsq = rsq, adj_rsq = rsq, pval = 0.01, + RMSE = 1, MAE = 1), foldFits = NULL)) + }) +} + +test_that(".twasEnsembleLayer: >= 2 methods passing -> per-condition ensemble entries", { + set.seed(1); g <- .je_ensGroup() + pte <- list(lasso = .je_ensEntries(g, 0.85), enet = .je_ensEntries(g, 0.70)) + ens <- pecotmr:::.twasEnsembleLayer(g, pte, list( + ensembleR2Threshold = 0.01, ensembleSolver = "quadprog", + ensembleAlpha = 1, standardized = FALSE)) + expect_length(ens, 2L) + expect_s4_class(ens[[1L]], "TwasWeightsEntry") + expect_length(getWeights(ens[[1L]]), 3L) + coef <- getCvResult(ens[[1L]])$methodCoef + expect_true(all(coef >= -1e-8)); expect_equal(sum(coef), 1, tolerance = 1e-6) +}) + +test_that(".twasEnsembleLayer: < 2 methods pass the R^2 cutoff -> NULL (skip)", { + set.seed(2); g <- .je_ensGroup() + pte <- list(lasso = .je_ensEntries(g, 0.85), enet = .je_ensEntries(g, 0.70)) + ens <- pecotmr:::.twasEnsembleLayer(g, pte, list( + ensembleR2Threshold = 0.999, ensembleSolver = "quadprog", + ensembleAlpha = 1, standardized = FALSE)) + expect_true(all(vapply(ens, is.null, logical(1)))) +}) + +# ---- engine twas fitter: orchestration absorbed from .twasWeightsPipelineMatrix + +test_that("fitJointGroup(twas): leakage warning when a full-data mr.mash prior is reused across folds", { + set.seed(3) + g <- .je_mkGroup("G1") + pipe <- new("TwasJointPipeline", config = list(cvFolds = 2L, ensemble = FALSE)) + local_mocked_bindings(learnTwasWeights = .je_mockLearnTwas, + twasWeightsCv = .je_mockTwasCv, .package = "pecotmr") + expect_warning( + pecotmr:::fitJointGroup(g, pipe, "mrmash", list(methodList = list( + mrmash_weights = list(dataDrivenPriorMatrices = list(U = diag(2)))))), + "information leakage") +}) + +test_that("fitJointGroup(twas): spike-and-slab pi is estimated from an internal mr.ash fit", { + set.seed(4); n <- 30L + X <- matrix(rnorm(n * 3), n, 3, + dimnames = list(paste0("s", 1:n), paste0("v", 1:3))) + Y <- matrix(rnorm(n), n, 1, dimnames = list(paste0("s", 1:n), "c1")) + g <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = "c1", trait = "g", + stringsAsFactors = FALSE), X = X, Y = Y) + pipe <- new("TwasJointPipeline", + config = list(cvFolds = 0L, ensemble = FALSE, estimatePi = TRUE)) + capturedPi <- NULL + local_mocked_bindings( + mrashWeights = function(X, y, ...) { + out <- matrix(0.05, ncol(X), 1L, dimnames = list(colnames(X), NULL)) + attr(out, "fit") <- list(pi = c(0.8, 0.1, 0.1)); out + }, + bayesCWeights = function(X, y, pi, ...) { + capturedPi <<- pi + matrix(0, ncol(X), 1L, dimnames = list(colnames(X), NULL)) + }, + .package = "pecotmr") + pecotmr:::fitJointGroup(g, pipe, "bayes_c", + list(methodList = list(bayes_c_weights = list()))) + expect_false(is.null(capturedPi)) + expect_equal(as.numeric(capturedPi), 1 - 0.8, tolerance = 1e-8) +}) + +test_that("fitJointGroup(twas): FM-derived method reuses fine-mapping's CV (handoff, no re-CV)", { + set.seed(5) + g <- .je_mkGroup("G1") # 2 conditions (c1, c2) + pipe <- new("TwasJointPipeline", config = list(cvFolds = 2L, ensemble = FALSE)) + samp <- rownames(g@X) + fmCv <- list( + samplePartition = data.frame(Sample = samp, + Fold = rep(1:2, length.out = length(samp))), + prediction = list(mvsusie_predicted = matrix( + rnorm(length(samp) * 2), length(samp), 2, + dimnames = list(samp, c("c1", "c2")))), + performance = list(mvsusie_performance = matrix( + 0.5, 2, 6, dimnames = list(c("c1", "c2"), + c("corr", "rsq", "adj_rsq", "pval", "RMSE", "MAE"))))) + cvCalled <- FALSE + local_mocked_bindings( + twasWeightsCv = function(...) { cvCalled <<- TRUE; .je_mockTwasCv(...) }, + .package = "pecotmr") + local_mocked_bindings( + mvsusieWeights = function(X, Y, mvsusieFit = NULL, ...) + matrix(0.1, ncol(X), ncol(Y), dimnames = list(colnames(X), colnames(Y))), + .package = "pecotmr") + entries <- pecotmr:::fitJointGroup(g, pipe, "mvsusie", list( + methodList = list(mvsusie_weights = list()), + fittedModels = list(mvsusie = list(dummy = TRUE)), + fineMappingCv = fmCv)) + expect_false(cvCalled) # handoff used, no re-CV + expect_false(is.null(getCvResult(entries[[1L]])$predictions)) +}) + +# ============================================================================= +# Enumerators (pattern x dataForm -> list) +# ----------------------------------------------------------------------------- +# The dispatch table stores each enumerator in a JointDispatchCell@enumerate +# slot; the pipeline reaches them via `cell@enumerate(...)`. covr cannot trace a +# function invoked through such a stored reference, so these call the enumerators +# DIRECTLY (pecotmr:::.enum*) with the per-group X/Y/Z builders mocked, asserting +# the enumeration wiring (scope gating + per-group conditions). +# ============================================================================= + +# A QtlSumStats-shaped data.frame: the sumstat enumerators only touch +# data$study / data$context / data$trait and nrow(data). +.je_ssDf <- function(studies = "S", contexts = c("c1", "c2"), traits = "t1") { + expand.grid(study = studies, context = contexts, trait = traits, + stringsAsFactors = FALSE) +} +# Mock .buildJointSumstatZMatrix: a (p x k) Z plus n vector, keyed by colLabels. +.je_mockJointZ <- function(data, tupleRows, colLabels, errorLabel) { + p <- 3L + list(Z = matrix(seq_len(p * length(colLabels)), p, length(colLabels), + dimnames = list(paste0("v", seq_len(p)), colLabels)), + nVec = rep(100, length(colLabels)), + variantIds = paste0("v", seq_len(p))) +} +.je_mockLd <- function(sketch, vids) + matrix(0, length(vids), length(vids), dimnames = list(vids, vids)) + +# A real SE (rowRanges carry the trait coordinates fsusie's `pos` needs). +.je_mkSe <- function(traits = c("G1", "G2"), n = 6L) { + rng <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(start = seq(100L, by = 100L, length.out = length(traits)), + width = 1L)) + names(rng) <- traits + SummarizedExperiment::SummarizedExperiment( + assays = list(e = matrix(0, length(traits), n, + dimnames = list(traits, paste0("s", seq_len(n))))), + rowRanges = rng) +} + +test_that(".enumCrossContextIndividual: one group per trait in >= 2 contexts", { + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = c("G1", "G2"))) + local_mocked_bindings( + getStudy = function(data) "S", + .buildIndividualCrossContextXY = function(data, tid, scopedContexts, + cisWindow, verbose, label, + region = NULL) { + if (tid == "G2") return(NULL) # skip branch (461) + X <- matrix(0, 4, 2, dimnames = list(paste0("s", 1:4), c("v1", "v2"))) + Y <- matrix(0, 4, 2, dimnames = list(paste0("s", 1:4), c("c1", "c2"))) + list(X = X, Y = Y, perTraitContexts = c("c1", "c2")) + }, + .package = "pecotmr") + g <- pecotmr:::.enumCrossContextIndividual(NULL, scope) + expect_length(g, 1L) # only G1 survives + expect_s4_class(g[[1L]], "IndividualJointGroup") + expect_equal(as.character(g[[1L]]@conditions$context), c("c1", "c2")) + expect_equal(as.character(g[[1L]]@conditions$trait), c("G1", "G1")) +}) + +test_that(".enumCrossContextIndividual: study not in scope / < 2 contexts -> empty", { + local_mocked_bindings(getStudy = function(data) "S", .package = "pecotmr") + expect_length(pecotmr:::.enumCrossContextIndividual( + NULL, list(studies = "OTHER", contexts = list(), traits = list())), 0L) + expect_length(pecotmr:::.enumCrossContextIndividual( + NULL, list(studies = "S", contexts = list(S = "c1"), + traits = list(S = "G1"))), 0L) +}) + +test_that(".enumCrossContextSumstats: groups per (study, trait) with >= 2 contexts", { + df <- .je_ssDf(studies = "S", contexts = c("c1", "c2"), traits = "t1") + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = "t1")) + local_mocked_bindings( + getLdSketch = function(x) "SKETCH", + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + g <- pecotmr:::.enumCrossContextSumstats(df, scope) + expect_length(g, 1L) + expect_s4_class(g[[1L]], "SumStatsJointGroup") + expect_equal(as.character(g[[1L]]@conditions$context), c("c1", "c2")) +}) + +test_that(".enumCrossContextSumstats: < 2 contexts and < 2 tuple rows skip", { + local_mocked_bindings( + getLdSketch = function(x) "SKETCH", + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + # study scoped to two contexts but only one row present -> < 2 tupleRows skip + df <- .je_ssDf(studies = "S", contexts = "c1", traits = "t1") + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = "t1")) + expect_length(pecotmr:::.enumCrossContextSumstats(df, scope), 0L) + # < 2 scoped contexts -> skip + scope2 <- list(studies = "S", contexts = list(S = "c1"), + traits = list(S = "t1")) + expect_length(pecotmr:::.enumCrossContextSumstats(df, scope2), 0L) +}) + +test_that(".enumCrossTraitIndividual: one group per context with >= 2 traits + pos", { + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = c("G1", "G2"))) + local_mocked_bindings( + getStudy = function(data) "S", + .buildIndividualCrossTraitXY = function(data, cx, scopedTraits, cisWindow, + verbose, label, study, + region = NULL) { + if (cx == "c2") return(NULL) # skip branch (511) + X <- matrix(0, 4, 2, dimnames = list(paste0("s", 1:4), c("v1", "v2"))) + Y <- matrix(0, 4, 2, dimnames = list(paste0("s", 1:4), c("G1", "G2"))) + list(X = X, Y = Y, traitsHere = c("G1", "G2"), se = .je_mkSe()) + }, + .package = "pecotmr") + g <- pecotmr:::.enumCrossTraitIndividual(NULL, scope) + expect_length(g, 1L) + expect_equal(as.character(g[[1L]]@conditions$context), c("c1", "c1")) + expect_equal(as.character(g[[1L]]@conditions$trait), c("G1", "G2")) + expect_equal(g[[1L]]@pos, c(100, 200)) # rowRanges midpoints +}) + +test_that(".enumCrossTraitIndividual: study not in scope -> empty", { + local_mocked_bindings(getStudy = function(data) "S", .package = "pecotmr") + expect_length(pecotmr:::.enumCrossTraitIndividual( + NULL, list(studies = "X", contexts = list(), traits = list())), 0L) +}) + +test_that(".enumCrossTraitSumstats: groups per (study, context) with >= 2 traits", { + df <- .je_ssDf(studies = "S", contexts = "c1", traits = c("t1", "t2")) + scope <- list(studies = "S", contexts = list(S = "c1"), + traits = list(S = c("t1", "t2"))) + local_mocked_bindings( + getLdSketch = function(x) "SKETCH", + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + g <- pecotmr:::.enumCrossTraitSumstats(df, scope) + expect_length(g, 1L) + expect_equal(as.character(g[[1L]]@conditions$trait), c("t1", "t2")) + # < 2 traits present -> skip + df1 <- .je_ssDf(studies = "S", contexts = "c1", traits = "t1") + expect_length(pecotmr:::.enumCrossTraitSumstats(df1, scope), 0L) +}) + +test_that(".enumCrossStudySumstats: group per (context, trait) in >= 2 studies", { + df <- .je_ssDf(studies = c("S1", "S2"), contexts = "c1", traits = "t1") + scope <- list(studies = c("S1", "S2"), + contexts = list(S1 = "c1", S2 = "c1"), + traits = list(S1 = "t1", S2 = "t1")) + local_mocked_bindings( + getLdSketch = function(x) "SKETCH", + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + g <- pecotmr:::.enumCrossStudySumstats(df, scope) + expect_length(g, 1L) + expect_equal(as.character(g[[1L]]@conditions$study), c("S1", "S2")) + # only one study in scope for the tuple -> filtered to < 2 -> skip + scope1 <- list(studies = c("S1", "S2"), + contexts = list(S1 = "c1", S2 = "other"), + traits = list(S1 = "t1", S2 = "t1")) + expect_length(pecotmr:::.enumCrossStudySumstats(df, scope1), 0L) +}) + +test_that(".enumComposedIndividual: one group joining every (context, trait) tuple", { + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = c("gA", "gB"))) + local_mocked_bindings( + getStudy = function(data) "S", + .buildComposedIndividualXY = function(data, scope, study, cisWindow, + verbose, label, region = NULL) { + Y <- matrix(0, 4, 3, dimnames = list(paste0("s", 1:4), + c("c1:gA", "c1:gB", "c2:gA"))) + X <- matrix(0, 4, 2, dimnames = list(paste0("s", 1:4), c("v1", "v2"))) + list(X = X, Y = Y, tuples = list()) + }, + .package = "pecotmr") + g <- pecotmr:::.enumComposedIndividual(NULL, scope) + expect_length(g, 1L) + expect_equal(as.character(g[[1L]]@conditions$context), c("c1", "c1", "c2")) + expect_equal(as.character(g[[1L]]@conditions$trait), c("gA", "gB", "gA")) +}) + +test_that(".enumComposedIndividual: study not in scope / NULL xy -> empty", { + local_mocked_bindings(getStudy = function(data) "S", + .buildComposedIndividualXY = function(...) NULL, .package = "pecotmr") + expect_length(pecotmr:::.enumComposedIndividual( + NULL, list(studies = "X")), 0L) # study not in scope + expect_length(pecotmr:::.enumComposedIndividual( + NULL, list(studies = "S", contexts = list(S = "c1"), + traits = list(S = "gA"))), 0L) # xy NULL +}) + +test_that(".enumUnivariateIndividual: one 1-condition group per (context, trait)", { + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = c("G1", "G2"))) + samp <- paste0("s", 1:5) + local_mocked_bindings( + getStudy = function(data) "S", + getPhenotypes = function(data, contexts) .je_mkSe(c("G1", "G2")), + .fmResidPheno = function(data, contexts, traitId, naAction = "drop") + matrix(0, 5, 1, dimnames = list(samp, traitId)), + .fmResidGeno = function(data, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 5, 2, dimnames = list(samp, c("v1", "v2"))), + .package = "pecotmr") + g <- pecotmr:::.enumUnivariateIndividual(NULL, scope) + expect_length(g, 4L) # 2 ctx x 2 traits + expect_true(all(vapply(g, function(x) nrow(x@conditions), integer(1)) == 1L)) +}) + +test_that(".enumUnivariateIndividual: too few shared samples skips the tuple", { + scope <- list(studies = "S", contexts = list(S = "c1"), + traits = list(S = "G1")) + local_mocked_bindings( + getStudy = function(data) "S", + getPhenotypes = function(data, contexts) .je_mkSe("G1"), + .fmResidPheno = function(data, contexts, traitId, naAction = "drop") + matrix(0, 1, 1, dimnames = list("s1", traitId)), # one sample + .fmResidGeno = function(data, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 1, 2, dimnames = list("s1", c("v1", "v2"))), + .package = "pecotmr") + expect_length(pecotmr:::.enumUnivariateIndividual(NULL, scope), 0L) +}) + +test_that(".enumUnivariateIndividual: study not in scope -> empty", { + local_mocked_bindings(getStudy = function(data) "S", .package = "pecotmr") + expect_length(pecotmr:::.enumUnivariateIndividual( + NULL, list(studies = "OTHER")), 0L) +}) + +test_that(".enumComposedSumstats: one group per fixed-axis row block", { + df <- .je_ssDf(studies = "S", contexts = c("c1", "c2"), traits = "t1") + scope <- list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = "t1")) + local_mocked_bindings( + getLdSketch = function(x) "SKETCH", + .enumerateComposedSumstatGroups = function(spec, data, scope) + list(groups = list(c(1L, 2L)), + studyCol = c("S", "S"), contextCol = c("c1", "c2"), + traitCol = c("t1", "t1")), + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + g <- pecotmr:::.enumComposedSumstats(df, scope, args = list(axes = c("context", "trait"))) + expect_length(g, 1L) + expect_equal(as.character(g[[1L]]@conditions$context), c("c1", "c2")) +}) + +test_that(".enumComposedSumstats: NULL group index and singleton blocks skip", { + local_mocked_bindings(getLdSketch = function(x) "SKETCH", + .enumerateComposedSumstatGroups = function(spec, data, scope) NULL, + .package = "pecotmr") + expect_length(pecotmr:::.enumComposedSumstats( + .je_ssDf(), list(studies = "S")), 0L) # gi NULL (646) + local_mocked_bindings(getLdSketch = function(x) "SKETCH", + .enumerateComposedSumstatGroups = function(spec, data, scope) + list(groups = list(1L), studyCol = "S", contextCol = "c1", + traitCol = "t1"), + .buildJointSumstatZMatrix = .je_mockJointZ, .fmLdFromSketch = .je_mockLd, + .package = "pecotmr") + expect_length(pecotmr:::.enumComposedSumstats( + .je_ssDf(), list(studies = "S"), + args = list(axes = c("context", "trait"))), 0L) # < 2 gIdx (649) +}) + +# ============================================================================= +# fitJointGroup branches not covered by the happy-path tests above +# ============================================================================= + +test_that("fitJointGroup(Individual, Fm): fsusie honest per-fold CV is attached", { + set.seed(20); n <- 8L + X <- matrix(rnorm(n * 2), n, 2, dimnames = list(paste0("s", 1:n), c("v1", "v2"))) + Y <- matrix(rnorm(n * 2), n, 2, dimnames = list(paste0("s", 1:n), c("G1", "G2"))) + grp <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = "brain", + trait = c("G1", "G2"), stringsAsFactors = FALSE), + X = X, Y = Y, pos = c(100, 200)) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, cvFolds = 3)) + cvCalled <- FALSE + local_mocked_bindings( + fitFsusie = function(...) list(), + fsusieWeights = function(fsusieFit, variantIds) NULL, + .fmPostprocessOne = .je_mockPostprocess, + .fmCrossValidate = function(X, Y, token, methodArgs, fold, ...) { + cvCalled <<- TRUE; list(samplePartition = NULL) }, + .fmSliceCv = function(cv, token) list(prediction = NULL), + .fmAttachCv = function(e, cv) e, + .package = "pecotmr") + entries <- pecotmr:::fitJointGroup(grp, pipe, "fsusie", list()) + expect_true(cvCalled) # CV path exercised + expect_length(entries, 2L) +}) + +test_that("fitJointGroup(Individual, Fm): SER pre-screen skips when < 2 survivors", { + g <- .je_mkGroup("G1") + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, verbose = 1)) + local_mocked_bindings( + .fmScreenActive = function(cut) TRUE, + .fmSerScreenColumns = function(X, Y, cut) c(TRUE, FALSE), # 1 survivor + .package = "pecotmr") + entries <- suppressMessages( + pecotmr:::fitJointGroup(g, pipe, "mvsusie", list(pipCutoffToSkip = 0.8))) + expect_length(entries, 2L) # one per ORIGINAL cond + expect_true(all(vapply(entries, is.null, logical(1)))) # all-NULL (skipped) +}) + +test_that("fitJointGroup(Individual, Fm): SER pre-screen keeps a subset of conditions", { + set.seed(21); n <- 10L + X <- matrix(rnorm(n * 2), n, 2, dimnames = list(paste0("s", 1:n), c("v1", "v2"))) + Y <- matrix(rnorm(n * 3), n, 3, + dimnames = list(paste0("s", 1:n), c("c1", "c2", "c3"))) + g <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = c("c1", "c2", "c3"), + trait = "G1", stringsAsFactors = FALSE), X = X, Y = Y) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95, verbose = 1)) + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + local_mocked_bindings( + .fmScreenActive = function(cut) TRUE, + .fmSerScreenColumns = function(X, Y, cut) c(TRUE, FALSE, TRUE), # drop c2 + fitMvsusie = function(...) list(), + .fmPostprocessOne = .je_mockPostprocess, + .package = "pecotmr") + entries <- suppressMessages( + pecotmr:::fitJointGroup(g, pipe, "mvsusie", list(pipCutoffToSkip = 0.8))) + expect_length(entries, 3L) + expect_null(entries[[2L]]) # screened-out condition + expect_s4_class(entries[[1L]], "FineMappingEntry") + expect_s4_class(entries[[3L]], "FineMappingEntry") +}) + +test_that("fitJointGroup(SumStats, Fm): fsusie and unknown tokens error", { + grp <- .je_mkSsGroup("G1") + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + expect_error(pecotmr:::fitJointGroup(grp, pipe, "fsusie", list()), + "no RSS variant") + expect_error(pecotmr:::fitJointGroup(grp, pipe, "bogus", list()), + "unsupported") +}) + +test_that("fitJointGroup(SumStats, Fm): a reweighted-prior residual variance is threaded", { + grp <- .je_mkSsGroup("G1") + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + captured <- NULL + local_mocked_bindings( + .buildMvsusieReweightedPrior = function(fitParts, conditions, ddCut) + list(priorVariance = "PV", residualVariance = diag(2)), + fitMvsusieRss = function(Z, R, N, prior_variance, coverage, + residual_variance = NULL, ...) { + captured <<- residual_variance; list() }, + .fmPostprocessOne = .je_mockPostprocess, + .package = "pecotmr") + pecotmr:::fitJointGroup(grp, pipe, "mvsusie", list()) + expect_false(is.null(captured)) # residual_variance set +}) + +test_that("fitJointGroup(twas): spike-and-slab pi feeds bayes_b probIn", { + set.seed(22); n <- 30L + X <- matrix(rnorm(n * 3), n, 3, dimnames = list(paste0("s", 1:n), paste0("v", 1:3))) + Y <- matrix(rnorm(n), n, 1, dimnames = list(paste0("s", 1:n), "c1")) + g <- new("IndividualJointGroup", + conditions = data.frame(study = "S", context = "c1", trait = "g", + stringsAsFactors = FALSE), X = X, Y = Y) + pipe <- new("TwasJointPipeline", + config = list(cvFolds = 0L, ensemble = FALSE, estimatePi = TRUE)) + capturedProbIn <- NULL + local_mocked_bindings( + mrashWeights = function(X, y, ...) { + out <- matrix(0.05, ncol(X), 1L, dimnames = list(colnames(X), NULL)) + attr(out, "fit") <- list(pi = c(0.7, 0.2, 0.1)); out + }, + bayesBWeights = function(X, y, probIn, ...) { + capturedProbIn <<- probIn + matrix(0, ncol(X), 1L, dimnames = list(colnames(X), NULL)) + }, + .package = "pecotmr") + pecotmr:::fitJointGroup(g, pipe, "bayes_b", + list(methodList = list(bayes_b_weights = list()))) + expect_equal(as.numeric(capturedProbIn), 1 - 0.7, tolerance = 1e-8) +}) + +test_that("fitJointGroup(SumStats, twas): a vector weight without rownames falls back to Z rows", { + grp <- .je_mkSsGroup("G1", p = 3L, k = 2L) + pipe <- new("TwasJointPipeline", config = list()) + local_mocked_bindings( + mrmashRssWeights = function(stat, LD, retainFit, fitDetail) { + # Return a bare numeric vector (one column collapsed) with no names. + w <- as.numeric(rep(0.2, nrow(LD) * ncol(stat$z))) + attr(w, "fit") <- .je_fakeMrmashFit(); w + }, + .package = "pecotmr") + res <- pecotmr:::.runJointCell( + .je_ssCell(list(grp)), pipe, data = NULL, scope = NULL, tokens = "mrmash") + expect_s4_class(res, "TwasWeights") + expect_equal(getVariantIds(res$entry[[1L]]), rownames(grp@Z)) # fallback vids +}) + +# ============================================================================= +# .runJointCell + ensemble-layer branches +# ============================================================================= + +test_that(".runJointCell: empty enumeration -> NULL", { + emptyCell <- new("JointDispatchCell", pattern = "context", + dataForm = "individual", + enumerate = function(data, scope, args) list(), minGroup = 2L) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + expect_null(pecotmr:::.runJointCell(emptyCell, pipe, NULL, NULL, "mvsusie")) +}) + +test_that(".runJointCell: a fitter returning all-NULL entries yields no rows -> NULL", { + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + local_mocked_bindings(create_mixture_prior = function(...) "PRIOR", + .package = "mvsusieR") + # fitJointGroup returns a list of NULLs (every condition screened out). + local_mocked_bindings( + fitJointGroup = function(group, pipeline, token, args) + vector("list", nrow(group@conditions)), + .package = "pecotmr") + expect_null(pecotmr:::.runJointCell(cell, pipe, NULL, NULL, "mvsusie")) +}) + +test_that(".runJointCell: twas ensemble layer adds 'ensemble' rows on top of >= 2 methods", { + set.seed(30) + cell <- .je_synthCell(list(.je_ensGroup(n = 40L, nCond = 2L))) + pipe <- new("TwasJointPipeline", config = list( + cvFolds = 2L, ensemble = TRUE, ensembleR2Threshold = 0.01, + ensembleSolver = "quadprog", ensembleAlpha = 1, standardized = FALSE)) + # Two methods, each returning per-condition entries with CV predictions. + local_mocked_bindings( + fitJointGroup = function(group, pipeline, token, args) + .je_ensEntries(group, if (token == "lasso") 0.85 else 0.7), + .package = "pecotmr") + res <- pecotmr:::.runJointCell(cell, pipe, NULL, NULL, + tokens = c("lasso", "enet")) + expect_s4_class(res, "TwasWeights") + expect_true("ensemble" %in% as.character(res$method)) +}) + +test_that(".twasEnsembleLayer: entries lacking CV predictions are skipped", { + set.seed(31); g <- .je_ensGroup(nCond = 1L) + good <- .je_ensEntries(g, 0.85) + # A method whose entry carries no CV predictions -> contributes nothing. + noCv <- list(TwasWeightsEntry(variantIds = colnames(g@X), + weights = rnorm(ncol(g@X)), cvResult = NULL)) + ens <- pecotmr:::.twasEnsembleLayer( + g, list(a = good, b = noCv, c = NULL), + list(ensembleR2Threshold = 0.01, ensembleSolver = "quadprog", + ensembleAlpha = 1, standardized = FALSE)) + expect_true(all(vapply(ens, is.null, logical(1)))) # < 2 usable -> NULL +}) + +test_that(".twasEnsembleLayer: ensembleWeights returning NULL -> NULL entry", { + set.seed(32); g <- .je_ensGroup(nCond = 1L) + pte <- list(lasso = .je_ensEntries(g, 0.85), enet = .je_ensEntries(g, 0.70)) + local_mocked_bindings(ensembleWeights = function(...) NULL, .package = "pecotmr") + ens <- pecotmr:::.twasEnsembleLayer(g, pte, list( + ensembleR2Threshold = 0.01, ensembleSolver = "quadprog", + ensembleAlpha = 1, standardized = FALSE)) + expect_true(all(vapply(ens, is.null, logical(1)))) +}) + +test_that(".twasEnsembleLayer: unnamed ensemble weights fall back to a method's variant ids", { + set.seed(33); g <- .je_ensGroup(nCond = 1L) + pte <- list(lasso = .je_ensEntries(g, 0.85), enet = .je_ensEntries(g, 0.70)) + local_mocked_bindings( + ensembleWeights = function(cvResults, Y, twasWeightList, contextIndex, + solver, alpha) + list(ensembleTwasWeights = as.numeric(rep(0.1, ncol(g@X))), # no names + methodCoef = c(0.5, 0.5), methodPerformance = c(0.8, 0.7)), + .package = "pecotmr") + ens <- pecotmr:::.twasEnsembleLayer(g, pte, list( + ensembleR2Threshold = 0.01, ensembleSolver = "quadprog", + ensembleAlpha = 1, standardized = FALSE)) + expect_s4_class(ens[[1L]], "TwasWeightsEntry") + expect_equal(getVariantIds(ens[[1L]]), colnames(g@X)) # fallback ids +}) + +# ============================================================================= +# Small slicers, .jointTwasCvResult, .twasFmHandoffCv, construct, .runJointSpecs +# ============================================================================= + +test_that(".fmSliceCvCondition / .sliceTwasCvResultToCondition: NULL passes through", { + expect_null(pecotmr:::.fmSliceCvCondition(NULL, 1L)) + expect_null(pecotmr:::.sliceTwasCvResultToCondition(NULL, 1L)) +}) + +test_that(".jointTwasCvResult: NULL cv and empty/absent payloads degrade gracefully", { + expect_null(pecotmr:::.jointTwasCvResult(NULL, "mrmash")) + # Empty prediction/performance lists and all-NULL foldFits -> NULL components. + cv <- list(samplePartition = data.frame(Sample = "s1", Fold = 1L), + prediction = list(), performance = list(), + foldFits = list(fold_1 = list(other = 1))) + out <- pecotmr:::.jointTwasCvResult(cv, "mrmash") + expect_null(out$predictions) # pickByBase empty (252) + expect_null(out$foldFits) # all-NULL ffKey (260) + # A method token absent from a non-empty prediction list -> NULL (255). + cv2 <- list(samplePartition = NULL, + prediction = list(lasso_predicted = matrix(0, 1, 1)), + performance = list()) + expect_null(pecotmr:::.jointTwasCvResult(cv2, "mrmash")$predictions) +}) + +test_that(".twasFmHandoffCv: a token absent from the FM CV predictions -> NULL", { + fmCv <- list(samplePartition = data.frame(Sample = "s1", Fold = 1L), + prediction = list(susie_predicted = matrix(0, 1, 1)), + performance = list()) + expect_null(pecotmr:::.twasFmHandoffCv(fmCv, "mvsusie")) + expect_null(pecotmr:::.twasFmHandoffCv(NULL, "mvsusie")) +}) + +test_that("construct: empty rows -> NULL for both pipelines", { + empty <- pecotmr:::.jointRows() + expect_null(pecotmr:::construct(new("FmJointPipeline", config = list()), empty)) + expect_null(pecotmr:::construct(new("TwasJointPipeline", config = list()), empty)) +}) + +test_that(".runJointSpecs: no methods or no specs -> NULL", { + pipe <- new("FmJointPipeline", config = list(ldSketch = NULL)) + expect_null(pecotmr:::.runJointSpecs(list(), NULL, "individual", pipe, + jointMethods = "mvsusie", + contexts = NULL, traitIds = NULL)) + expect_null(pecotmr:::.runJointSpecs(list(list(axes = "context")), NULL, + "individual", pipe, + jointMethods = character(0), + contexts = NULL, traitIds = NULL)) +}) + +# ============================================================================= +# Remaining branches: .twasGroupArgs CV-partition handoff, a token whose fit +# is NULL, and .runJointSpecs' region-mode trait restriction. +# ============================================================================= + +test_that(".twasGroupArgs: takes the CV partition from the fine-mapping CV when none is set", { + g <- .je_mkGroup("G1") # IndividualJointGroup + pipe <- new("TwasJointPipeline", config = list(cvFolds = 2L)) + sp <- data.frame(Sample = rownames(g@X), + Fold = rep(1:2, length.out = nrow(g@X)), + stringsAsFactors = FALSE) + local_mocked_bindings( + .twasFineMappingFits = function(fineMappingResult, study, context, trait) + list(), + .twasCvResultFor = function(fmRes, s, c, t) list(samplePartition = sp), + .package = "pecotmr") + out <- pecotmr:::.twasGroupArgs(g, pipe, list(fineMappingResult = "FMR")) + expect_identical(out$samplePartition, sp) # line 692 +}) + +test_that(".runJointCell: a token whose fitter returns NULL is skipped", { + cell <- .je_synthCell(list(.je_mkGroup("G1"))) + pipe <- new("FmJointPipeline", config = list(coverage = 0.95)) + local_mocked_bindings(fitJointGroup = function(...) NULL, .package = "pecotmr") + expect_null(pecotmr:::.runJointCell(cell, pipe, NULL, NULL, "mvsusie")) # 755 +}) + +test_that(".runJointSpecs: region mode without traitId restricts scoped traits to the locus", { + pipe <- new("FmJointPipeline", config = list(ldSketch = NULL)) + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(1, 100)) + captured <- NULL + local_mocked_bindings( + # S2 has no scoped contexts -> the region-restriction loop skips it (873). + .fmResolveSpecScope = function(spec, data, contexts, traitIds) + list(studies = c("S", "S2"), + contexts = list(S = c("c1", "c2"), S2 = character(0)), + traits = list(S = c("g1", "g2"), S2 = "g9")), + getPhenotypes = function(data, contexts) .je_mkSe(c("g1", "g2")), + .fmTraitsInRegion = function(se, traits, region) { captured <<- traits; "g1" }, + .lookupJointCell = function(pattern, dataForm) .je_synthCell(list()), + .package = "pecotmr") + res <- pecotmr:::.runJointSpecs( + list(list(axes = "context", scope = NULL)), data = NULL, + dataForm = "individual", pipeline = pipe, jointMethods = "mvsusie", + contexts = NULL, traitIds = NULL, args = list(region = region)) + expect_null(res) # empty cell -> NULL + expect_equal(captured, c("g1", "g2")) # 871-875 ran +}) diff --git a/tests/testthat/test_jointSpecification.R b/tests/testthat/test_jointSpecification.R index 4379f1e3..cbaa4bed 100644 --- a/tests/testthat/test_jointSpecification.R +++ b/tests/testthat/test_jointSpecification.R @@ -334,15 +334,28 @@ test_that("parseMethods: rejects multi-axis methods at per-trait level", { "per-trait") }) -test_that("parseMethods: rejects user-rejected tokens (mrmash in fineMapping)", { +test_that("parseMethods: a TWAS-only token (mrmash) is unknown to the fine-mapping grammar", { + # mr.mash is not in the fine-mapping capability table (it is TWAS-only), so it + # is rejected as an unknown fine-mapping token. qd <- .js_makeQtlDataset() expect_error( pecotmr:::parseMethods( methods = "mrmash", data = qd, caps = pecotmr:::.fineMappingMethodCapabilities, + multivariateMethods = c("mvsusie", "fsusie")), + "unknown method token") +}) + +test_that("parseMethods: rejectedAtUser tokens are refused", { + qd <- .js_makeQtlDataset() + expect_error( + pecotmr:::parseMethods( + methods = "mvsusie", + data = qd, + caps = pecotmr:::.fineMappingMethodCapabilities, multivariateMethods = c("mvsusie", "fsusie"), - rejectedAtUser = "mrmash"), + rejectedAtUser = "mvsusie"), "cannot be user-requested") }) @@ -649,7 +662,7 @@ context("joint dispatchers (fineMappingDispatcher / twasDispatcher)") .jd_mockPostprocess <- function() { function(fit, method, dataX, dataY, coverage, secondaryCoverage, signalCutoff, minAbsCorr, csInput = NULL, af = NULL, - region = NULL) { + region = NULL, conditionIdx = NULL) { if (is.matrix(dataX)) { vids <- colnames(dataX) } else if (is.list(dataY) && !is.null(dataY$z)) { @@ -708,9 +721,9 @@ test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' fits one joint fineMappingPipeline(ss, methods = "mvsusie", jointSpecification = "context")) expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$context), "joint") - expect_true(grepl("c1;c2|c2;c1", as.character(res$jointContexts))) + expect_equal(nrow(res), 2L) # per-context rows + expect_setequal(as.character(res$context), c("c1", "c2")) # REAL contexts + expect_true(all(grepl("c1;c2|c2;c1", as.character(res$jointContexts)))) }) test_that("fineMappingPipeline(QtlSumStats): jointSpec='context' with only one context skips", { @@ -746,8 +759,8 @@ test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' fits one joint pe fineMappingPipeline(ss, methods = "mvsusie", jointSpecification = "trait")) expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$trait), "joint") + expect_equal(nrow(res), 2L) # per-trait rows + expect_setequal(as.character(res$trait), c("t1", "t2")) }) test_that("fineMappingPipeline(QtlSumStats): jointSpec='trait' with fsusie errors (no RSS variant)", { @@ -775,8 +788,8 @@ test_that("fineMappingPipeline(QtlSumStats): jointSpec='study' fits one joint pe fineMappingPipeline(ss, methods = "mvsusie", jointSpecification = "study")) expect_s4_class(res, "QtlFineMappingResult") - expect_equal(nrow(res), 1L) - expect_equal(as.character(res$study), "joint") + expect_equal(nrow(res), 2L) # per-study rows + expect_setequal(as.character(res$study), c("Q1", "Q2")) }) test_that("fineMappingPipeline(QtlSumStats): composed jointSpec axes={'study','context'} fits", { @@ -797,8 +810,9 @@ test_that("fineMappingPipeline(QtlSumStats): composed jointSpec axes={'study','c expect_s4_class(res, "QtlFineMappingResult") expect_true("jointStudies" %in% names(res)) expect_true("jointContexts" %in% names(res)) - expect_true(any(as.character(res$study) == "joint")) - expect_true(any(as.character(res$context) == "joint")) + expect_equal(nrow(res), 4L) # study x context + expect_setequal(as.character(res$study), c("Q1", "Q2")) # both vary -> real + expect_setequal(as.character(res$context), c("c1", "c2")) }) test_that("fineMappingPipeline(QtlSumStats): composed jointSpec rejects fsusie", { @@ -863,7 +877,8 @@ test_that("twasWeightsPipeline(QtlDataset): jointSpec='context' fits mr.mash per twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, jointSpecification = "context")) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") + expect_equal(nrow(res), 2L) # per-context rows + expect_setequal(as.character(res$context), c("c1", "c2")) expect_true("jointContexts" %in% names(res)) }) @@ -895,7 +910,8 @@ test_that("twasWeightsPipeline(QtlDataset): jointSpec='trait' fits mr.mash per c twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, jointSpecification = "trait")) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$trait), "joint") + expect_equal(nrow(res), 2L) # per-trait rows + expect_setequal(as.character(res$trait), c("t1", "t2")) expect_true("jointTraits" %in% names(res)) }) @@ -922,8 +938,9 @@ test_that("twasWeightsPipeline(QtlDataset): composed jointSpec axes=c('context', twasWeightsPipeline(qd, methods = "mrmash", cisWindow = 1000L, jointSpecification = list(c("context", "trait")))) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") - expect_equal(as.character(res$trait), "joint") + expect_equal(nrow(res), 4L) # context x trait + expect_setequal(as.character(res$context), c("c1", "c2")) + expect_setequal(as.character(res$trait), c("t1", "t2")) }) test_that("twasWeightsPipeline(QtlDataset): composed jointSpec including 'study' errors", { @@ -952,7 +969,8 @@ test_that("twasWeightsPipeline(QtlSumStats): jointSpec='context' fits mr.mash.rs twasWeightsPipeline(ss, methods = "mrmash", jointSpecification = "context")) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$context), "joint") + expect_equal(nrow(res), 2L) # per-context rows + expect_setequal(as.character(res$context), c("c1", "c2")) expect_true("jointContexts" %in% names(res)) }) @@ -967,7 +985,8 @@ test_that("twasWeightsPipeline(QtlSumStats): jointSpec='trait' fits mr.mash.rss twasWeightsPipeline(ss, methods = "mrmash", jointSpecification = "trait")) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$trait), "joint") + expect_equal(nrow(res), 2L) # per-trait rows + expect_setequal(as.character(res$trait), c("t1", "t2")) }) test_that("twasWeightsPipeline(QtlSumStats): jointSpec='study' fits mr.mash.rss per (context, trait)", { @@ -981,7 +1000,8 @@ test_that("twasWeightsPipeline(QtlSumStats): jointSpec='study' fits mr.mash.rss twasWeightsPipeline(ss, methods = "mrmash", jointSpecification = "study")) expect_s4_class(res, "TwasWeights") - expect_equal(as.character(res$study), "joint") + expect_equal(nrow(res), 2L) # per-study rows + expect_setequal(as.character(res$study), c("Q1", "Q2")) }) test_that("twasWeightsPipeline(QtlSumStats): composed jointSpec axes=c('study','context') fits", { @@ -998,4 +1018,661 @@ test_that("twasWeightsPipeline(QtlSumStats): composed jointSpec axes=c('study',' expect_s4_class(res, "TwasWeights") expect_true("jointStudies" %in% names(res)) expect_true("jointContexts" %in% names(res)) + expect_equal(nrow(res), 4L) # study x context + expect_setequal(as.character(res$study), c("Q1", "Q2")) + expect_setequal(as.character(res$context), c("c1", "c2")) +}) + +# ============================================================================ +# Additional coverage: scope helpers, parser error branches, scope resolution, +# X/Y/Z builders' skip paths, multi-region merges, and MultiStudy dispatchers. +# ============================================================================ + +# ----------------------------------------------------------------------------- +# .sp* helpers: QtlSumStats / MultiStudy branches + unsupported-class errors +# ----------------------------------------------------------------------------- + +test_that(".sp* helpers: QtlSumStats study/context/trait listing + dataForm", { + ss <- .js_makeQtlSumStats(studies = c("A", "B"), contexts = c("c1", "c2"), + traits = c("t1", "t2")) + expect_setequal(pecotmr:::.spListStudies(ss), c("A", "B")) + expect_setequal(pecotmr:::.spListContexts(ss), c("c1", "c2")) # study=NULL + expect_setequal(pecotmr:::.spListContexts(ss, "A"), c("c1", "c2")) + expect_setequal(pecotmr:::.spListTraits(ss, study = "A", context = "c1"), + c("t1", "t2")) + expect_equal(pecotmr:::.spStudyDataForm(ss, "A"), "sumstats") + expect_error(pecotmr:::.spStudyDataForm(ss, "missing"), "not in QtlSumStats") +}) + +test_that(".spStudyDataForm: QtlDataset wrong study errors", { + qd <- .js_makeQtlDataset(study = "S1") + expect_error(pecotmr:::.spStudyDataForm(qd, "wrong"), "not in QtlDataset") +}) + +test_that(".sp* helpers: MultiStudy per-study context/trait routing", { + qd1 <- .js_makeQtlDataset(study = "indA", contexts = "brain", + traits = c("g1", "g2")) + ss <- .js_makeQtlSumStats(studies = "ssC", contexts = "DLPFC", + traits = "g3") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qd1), sumStats = ss) + expect_setequal(pecotmr:::.spListContexts(mt, "ssC"), "DLPFC") # ss branch + expect_setequal(pecotmr:::.spListContexts(mt, "indA"), "brain") + expect_setequal(pecotmr:::.spListContexts(mt), c("brain", "DLPFC")) # all + expect_equal(pecotmr:::.spListContexts(mt, "nope"), character(0)) + expect_setequal(pecotmr:::.spListTraits(mt, study = "ssC"), "g3") # ss branch + expect_setequal(pecotmr:::.spListTraits(mt, study = "indA"), c("g1", "g2")) + expect_equal(pecotmr:::.spListTraits(mt, study = "nope"), character(0)) + # study = NULL aggregates ALL traits across individual + sumstats components + # (regression guard: a present sumStats slot must not shadow the QtlDatasets). + expect_setequal(pecotmr:::.spListTraits(mt), c("g1", "g2", "g3")) +}) + +test_that(".spListTraits: study=NULL on a sumstats-free MultiStudy aggregates traits", { + qdA <- .js_makeQtlDataset(study = "indA", contexts = "brain", + traits = c("g1", "g2")) + qdB <- .js_makeQtlDataset(study = "indB", contexts = "liver", traits = "g3") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qdA, indB = qdB)) + expect_setequal(pecotmr:::.spListTraits(mt), c("g1", "g2", "g3")) # aggregate +}) + +test_that(".sp* helpers: unsupported class raises a labelled error", { + expect_error(pecotmr:::.spListStudies(42), "unsupported class") + expect_error(pecotmr:::.spStudyDataForm(42, "x"), "unsupported class") + expect_error(pecotmr:::.spListContexts(42), "unsupported class") + expect_error(pecotmr:::.spListTraits(42), "unsupported class") +}) + +# ----------------------------------------------------------------------------- +# parseJointSpecification / parseContexts / parseTraitIds error branches +# ----------------------------------------------------------------------------- + +test_that("parseJointSpecification: malformed inputs error", { + qd <- .js_makeQtlDataset() + expect_error(pecotmr:::parseJointSpecification(42, qd), "must be NULL") + expect_error(pecotmr:::parseJointSpecification(list(list(scope = NULL)), qd), + "missing `axes`") + expect_error(pecotmr:::parseJointSpecification(list(42), qd), + "character vector or a named list") + expect_error(pecotmr:::parseJointSpecification(list(list(axes = character(0))), + qd), "non-empty character") + expect_error(pecotmr:::parseJointSpecification( + list(list(axes = "context", scope = c("a", "b"))), qd), "named list") + expect_error(pecotmr:::parseJointSpecification( + list(list(axes = "context", scope = list(study = integer(0)))), qd), + "non-empty character vector") +}) + +test_that("parseContexts: malformed inputs error", { + qd <- .js_makeQtlDataset(study = "S1", contexts = c("brain", "liver")) + expect_error(pecotmr:::parseContexts(character(0), qd), "non-empty") + expect_error(pecotmr:::parseContexts(list(brain = "x"), qd), "unknown studies") + expect_error(pecotmr:::parseContexts(list(S1 = character(0)), qd), + "non-empty character vector") + expect_error(pecotmr:::parseContexts(list("brain"), qd), "named list") # unnamed + expect_error(pecotmr:::parseContexts(42, qd), "must be NULL") +}) + +test_that("parseContexts: vector form warns on contexts missing from a study", { + qd <- .js_makeQtlDataset(study = "S1", contexts = c("brain", "liver")) + expect_warning(out <- pecotmr:::parseContexts(c("brain", "absent"), qd), + "missing requested context") + expect_equal(out$S1, "brain") +}) + +test_that("parseTraitIds: malformed inputs error", { + qd <- .js_makeQtlDataset(study = "S1", contexts = "brain", + traits = c("ENSG1", "ENSG2")) + expect_error(pecotmr:::parseTraitIds(character(0), qd), "non-empty") + expect_error(pecotmr:::parseTraitIds(42, qd), "must be NULL") + expect_error(pecotmr:::parseTraitIds(list("ENSG1"), qd), "named by study") + expect_error(pecotmr:::parseTraitIds(list(nope = "ENSG1"), qd), + "unknown studies") + expect_error(pecotmr:::parseTraitIds(list(S1 = character(0)), qd), + "non-empty character vector") + expect_error(pecotmr:::parseTraitIds(list(S1 = list("brain")), qd), + "named by context") + expect_error(pecotmr:::parseTraitIds(list(S1 = list(brain = character(0))), qd), + "non-empty character vector") + expect_error(pecotmr:::parseTraitIds(list(S1 = list(brain = "nope")), qd), + "unknown traits") + expect_error(pecotmr:::parseTraitIds(list(S1 = 42), qd), + "character vector or a named list") +}) +# (The doubly-nested study->context success path is covered by the existing +# "parseTraitIds: doubly-nested study->context validates per context" test.) + +# ----------------------------------------------------------------------------- +# .spWalkMethods / parseMethods / validateMethodsVsJointSpec error branches +# ----------------------------------------------------------------------------- + +test_that(".spWalkMethods: structural errors via parseMethods", { + qd <- .js_makeQtlDataset(study = "S1", contexts = "brain", traits = "ENSG1") + caps <- list(lasso = list(multivariate = FALSE)) + walk <- function(m) pecotmr:::parseMethods(m, data = qd, caps = caps, + multivariateMethods = character(0)) + expect_error(walk(list(S1 = 42)), "character vector or a named list") + expect_error(walk(list(S1 = list(brain = list(ENSG1 = list(x = "lasso"))))), + "cannot nest below the trait level") + expect_error(walk(list("lasso")), "non-empty names") # unnamed list node + expect_error(walk(list(S1 = list())), "non-empty names") # inner empty node + # A named-but-empty list reaches the dedicated "empty named list" guard. + expect_error(pecotmr:::.spWalkMethods(setNames(list(), character(0))), + "empty named list") +}) + +test_that("parseMethods: split-form and leaf validation errors", { + qd <- .js_makeQtlDataset(study = "S1", contexts = "brain", traits = "ENSG1") + caps <- list(lasso = list(multivariate = FALSE), + mrmash = list(multivariate = TRUE)) + expect_error(pecotmr:::parseMethods( + methods = NULL, sumStatsMethods = character(0), qtlDatasetMethods = "lasso", + data = qd, caps = caps, multivariateMethods = "mrmash"), + "non-empty character vector") + expect_error(pecotmr:::parseMethods( + methods = NULL, sumStatsMethods = "lasso", qtlDatasetMethods = character(0), + data = qd, caps = caps, multivariateMethods = "mrmash"), + "non-empty character vector") + expect_error(pecotmr:::parseMethods( + list(S1 = character(0)), data = qd, caps = caps, + multivariateMethods = "mrmash"), "non-empty character vector") + expect_error(pecotmr:::parseMethods( + list(nope = "lasso"), data = qd, caps = caps, + multivariateMethods = "mrmash"), "unknown study") + expect_error(pecotmr:::parseMethods( + list(S1 = list(absent = "lasso")), data = qd, caps = caps, + multivariateMethods = "mrmash"), "unknown context") +}) + +test_that("validateMethodsVsJointSpec: empty spec is a no-op; per-trait nesting + trait axis errors", { + expect_null(pecotmr:::validateMethodsVsJointSpec( + list(shape = "primary", methods = "lasso"), list())) + parsed <- list(list(axes = "trait")) + methodsParsed <- list(shape = "primary", + methods = list(S1 = list(brain = list(ENSG1 = "lasso")))) + expect_error(pecotmr:::validateMethodsVsJointSpec(methodsParsed, parsed), + "nests per-trait") +}) + +# ----------------------------------------------------------------------------- +# .fmResolveSpecScope: scope$study / scope$context / scope$trait + filters +# ----------------------------------------------------------------------------- + +test_that(".fmResolveSpecScope: scope + contexts + traitIds filters intersect", { + qd <- .js_makeQtlDataset(study = "S1", contexts = c("brain", "liver"), + traits = c("ENSG1", "ENSG2")) + spec <- list(scope = list(study = "S1", context = "brain", trait = "ENSG1")) + out <- pecotmr:::.fmResolveSpecScope(spec, qd) + expect_equal(out$studies, "S1") + expect_equal(out$contexts$S1, "brain") + expect_equal(out$traits$S1, "ENSG1") + # Named-list contexts + study-keyed list traitIds filters. + out2 <- pecotmr:::.fmResolveSpecScope( + list(scope = NULL), qd, contexts = list(S1 = "liver"), + traitIds = list(S1 = "ENSG2")) + expect_equal(out2$contexts$S1, "liver") + expect_equal(out2$traits$S1, "ENSG2") +}) + +# ----------------------------------------------------------------------------- +# .buildJointSumstatZMatrix: SNP-order mismatch error +# ----------------------------------------------------------------------------- + +test_that(".buildJointSumstatZMatrix: a mismatched SNP order across entries errors", { + df <- data.frame(study = "S", context = c("c1", "c2"), trait = "t1", + stringsAsFactors = FALSE) + calls <- 0L + local_mocked_bindings( + getSumstatDf = function(x, study, context, trait, require, ...) { + calls <<- calls + 1L + vid <- if (calls == 1L) c("v1", "v2") else c("v2", "v1") # reordered + data.frame(variant_id = vid, z = c(1, 2), N = c(100, 100), + stringsAsFactors = FALSE) + }, + .package = "pecotmr") + expect_error( + pecotmr:::.buildJointSumstatZMatrix(df, c(1L, 2L), c("c1", "c2"), + errorLabel = "TESTLABEL"), + "identical SNP order") +}) + +# ----------------------------------------------------------------------------- +# Individual X/Y builders: the skip paths (return NULL / message branches) +# ----------------------------------------------------------------------------- + +test_that(".buildIndividualCrossContextXY: skips when a trait spans < 2 contexts", { + se1 <- .js_makeSe(traits = "g1") # g1 present + se0 <- .js_makeSe(traits = "other") # g1 absent + local_mocked_bindings( + getPhenotypes = function(data, contexts) + if (identical(contexts, "c1")) se1 else se0, + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossContextXY( + NULL, "g1", c("c1", "c2"), cisWindow = 1000L, verbose = 1, + label = "X"))) +}) + +test_that(".buildIndividualCrossContextXY: region path + complete-case skip", { + se <- .js_makeSe(traits = "g1", samples = paste0("s", 1:6)) + samp <- paste0("s", 1:6) + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(1, 10000)) + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 6, 2, dimnames = list(samp, c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) { + ym <- function(v) matrix(v, 6, 1, dimnames = list(samp, "g1")) + list(c1 = ym(c(NA, NA, NA, NA, NA, 1)), # mostly NA -> < 2 complete + c2 = ym(rnorm(6))) + }, + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossContextXY( + NULL, "g1", c("c1", "c2"), cisWindow = NULL, verbose = 1, + label = "X", region = region))) +}) + +test_that(".buildIndividualCrossContextXY: too few shared samples skips", { + se <- .js_makeSe(traits = "g1", samples = paste0("s", 1:6)) + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 1, 2, dimnames = list("zz", c("v1", "v2"))), # disjoint sample + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + list(c1 = matrix(0, 6, 1, dimnames = list(paste0("s", 1:6), "g1")), + c2 = matrix(0, 6, 1, dimnames = list(paste0("s", 1:6), "g1"))), + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossContextXY( + NULL, "g1", c("c1", "c2"), cisWindow = 1000L, verbose = 1, label = "X"))) +}) + +test_that(".fmTraitsInRegion: filters traits by phenotype overlap with the region", { + se <- .js_makeSe(traits = c("g1", "g2")) # g1@~100, g2@~200 + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(90, 160)) + expect_equal(pecotmr:::.fmTraitsInRegion(se, c("g1", "g2"), region), "g1") + expect_equal(pecotmr:::.fmTraitsInRegion(se, c("g1", "g2"), NULL), + c("g1", "g2")) # NULL region -> unchanged +}) + +test_that(".buildIndividualCrossTraitXY: skip branches (< 2 traits, region, complete)", { + se2 <- .js_makeSe(traits = c("g1", "g2"), samples = paste0("s", 1:6)) + samp <- paste0("s", 1:6) + # < 2 scoped traits in the context -> NULL. + local_mocked_bindings(getPhenotypes = function(data, contexts) se2, + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossTraitXY( + NULL, "cx", "g1", cisWindow = 1000L, verbose = 1, label = "X", + study = "S"))) + # region path + < 2 complete cases. + local_mocked_bindings( + getPhenotypes = function(data, contexts) se2, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 6, 2, dimnames = list(samp, c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + cbind(g1 = c(NA, NA, NA, NA, NA, 1), g2 = rnorm(6)) |> + `rownames<-`(samp), + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossTraitXY( + NULL, "cx", c("g1", "g2"), cisWindow = NULL, verbose = 1, label = "X", + study = "S", region = GenomicRanges::GRanges("chr1", + IRanges::IRanges(1, 9999))))) +}) + +test_that(".buildComposedIndividualXY: skip branches and single-context wrap", { + se <- .js_makeSe(traits = c("g1", "g2"), samples = paste0("s", 1:6)) + samp <- paste0("s", 1:6) + scope <- list(contexts = list(S = "c1"), traits = list(S = c("g1", "g2"))) + # Single context: YresList wrap branch, then a valid 2-tuple build. + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 6, 2, dimnames = list(samp, c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + matrix(rnorm(12), 6, 2, dimnames = list(samp, c("g1", "g2"))), + .package = "pecotmr") + out <- suppressMessages(pecotmr:::.buildComposedIndividualXY( + NULL, scope, "S", cisWindow = 1000L, verbose = 1, label = "X")) + expect_equal(ncol(out$Y), 2L) + expect_setequal(colnames(out$Y), c("c1:g1", "c1:g2")) + # < 2 tuples -> NULL. + scope1 <- list(contexts = list(S = "c1"), traits = list(S = "g1")) + local_mocked_bindings(getPhenotypes = function(data, contexts) + .js_makeSe(traits = "g1"), .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildComposedIndividualXY( + NULL, scope1, "S", cisWindow = 1000L, verbose = 1, label = "X"))) +}) + +# ----------------------------------------------------------------------------- +# .enumerateComposedSumstatGroups: empty scope + all-axes (no complement) +# ----------------------------------------------------------------------------- + +test_that(".enumerateComposedSumstatGroups: empty scope -> NULL; no complement -> one block", { + df <- data.frame(study = "S", context = c("c1", "c2"), trait = "t1", + stringsAsFactors = FALSE) + empty <- pecotmr:::.enumerateComposedSumstatGroups( + list(axes = c("context", "trait")), df, + list(studies = character(0), contexts = list(), traits = list())) + expect_null(empty) + # axes = all three -> complement empty -> a single "__all__" block. + gi <- pecotmr:::.enumerateComposedSumstatGroups( + list(axes = c("study", "context", "trait")), df, + list(studies = "S", contexts = list(S = c("c1", "c2")), + traits = list(S = "t1"))) + expect_equal(length(gi$groups), 1L) + expect_equal(names(gi$groups), "__all__") +}) + +# ----------------------------------------------------------------------------- +# .fmSynthesizeJointSpec +# ----------------------------------------------------------------------------- + +test_that(".fmSynthesizeJointSpec: trait wins over context; single/single -> empty", { + expect_equal(pecotmr:::.fmSynthesizeJointSpec(3L, 2L)[[1L]]$axes, "trait") + expect_equal(pecotmr:::.fmSynthesizeJointSpec(2L, 1L)[[1L]]$axes, "context") + expect_equal(pecotmr:::.fmSynthesizeJointSpec(1L, 1L), list()) +}) + +# ----------------------------------------------------------------------------- +# Multi-region merges: .fmMergeResultsByKey / .twasMergeResultsByKey +# ----------------------------------------------------------------------------- + +.js_fmEntry <- function(vid = "v1") + FineMappingEntry(variantIds = vid, susieFit = list(), + topLoci = data.frame(variant_id = vid, pip = 0.9, + stringsAsFactors = FALSE)) + +test_that(".fmMergeResultsByKey: merges per-region entries by (s,c,t,method)", { + mk <- function() QtlFineMappingResult( + study = "S", context = "c1", trait = "t1", method = "mvsusie", + entry = list(.js_fmEntry())) + local_mocked_bindings( + .fmMergeEntries = function(entries) entries[[1L]], .package = "pecotmr") + out <- pecotmr:::.fmMergeResultsByKey(list(mk(), mk())) + expect_s4_class(out, "QtlFineMappingResult") + expect_equal(nrow(out), 1L) + # n == 0 short-circuit returns the (empty) base unchanged. + empty <- QtlFineMappingResult(study = character(0), context = character(0), + trait = character(0), method = character(0), entry = list()) + expect_equal(nrow(pecotmr:::.fmMergeResultsByKey(list(empty, empty))), 0L) +}) + +test_that(".twasMergeResultsByKey: merges per-region TwasWeights entries", { + mk <- function() TwasWeights( + study = "S", context = "c1", trait = "t1", method = "lasso", + entry = list(TwasWeightsEntry(variantIds = "v1", weights = 0.5))) + out <- pecotmr:::.twasMergeResultsByKey(list(mk(), mk()), c("r1", "r2")) + expect_s4_class(out, "TwasWeights") + expect_equal(nrow(out), 1L) + empty <- TwasWeights(study = character(0), context = character(0), + trait = character(0), method = character(0), entry = list()) + expect_equal(length(pecotmr:::.twasMergeResultsByKey( + list(empty), "r1")$method), 0L) +}) + +# ----------------------------------------------------------------------------- +# Multi-region QtlDataset dispatch (xRegions length 2 -> merge path) +# ----------------------------------------------------------------------------- + +test_that(".twasDispatchJointSpecsQtlDataset: two region blocks are merged by key", { + r1 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(50, 250)) + r2 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(300, 500)) + parsed <- list(list(axes = "context", scope = NULL)) + mkRegionRes <- function() TwasWeights( + study = c("Q1", "Q1"), context = c("c1", "c2"), trait = c("t1", "t1"), + method = c("mrmash", "mrmash"), + entry = list(TwasWeightsEntry(variantIds = "v1", weights = 0.5), + TwasWeightsEntry(variantIds = "v1", weights = 0.5))) + local_mocked_bindings( + .twasDispatchJointSpecsQtlDatasetOneRegion = function(...) mkRegionRes(), + .package = "pecotmr") + res <- pecotmr:::.twasDispatchJointSpecsQtlDataset( + parsed, data = NULL, methods = "mrmash", contexts = NULL, traitIds = NULL, + cisWindow = NULL, dataType = NULL, verbose = 0, xRegions = list(r1, r2)) + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) # per-context, merged regions +}) + +# ----------------------------------------------------------------------------- +# MultiStudy dispatchers: .fmDispatchJointSpecsMultiStudy / +# .twasDispatchJointSpecsMultiStudy (leaf dispatchers mocked) +# ----------------------------------------------------------------------------- + +test_that(".fmDispatchJointSpecsMultiStudy: routes non-study specs to components, study spec to sumstats", { + qd <- .js_makeQtlDataset(study = "indA", contexts = c("c1", "c2"), + traits = "t1") + ss <- .js_makeQtlSumStats(studies = "ssC", contexts = "c1", traits = "t1") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qd), sumStats = ss) + parsed <- list(list(axes = "context", scope = NULL), + list(axes = "study", scope = NULL)) + qdRes <- QtlFineMappingResult(study = "indA", context = "c1", trait = "t1", + method = "mvsusie", entry = list(.js_fmEntry())) + ssRes <- QtlFineMappingResult(study = "ssC", context = "c1", trait = "t1", + method = "mvsusie", entry = list(.js_fmEntry()), + ldSketch = .js_makeGenotypeHandle()) + local_mocked_bindings( + .fmDispatchJointSpecsQtlDataset = function(...) qdRes, + .fmDispatchJointSpecsQtlSumStats = function(...) ssRes, + .package = "pecotmr") + out <- suppressMessages(pecotmr:::.fmDispatchJointSpecsMultiStudy( + parsed, mt, methods = "mvsusie", contexts = NULL, traitIds = NULL, + cisWindow = NULL, coverage = 0.95, secondaryCoverage = 0.5, + signalCutoff = 0.1, minAbsCorr = 0.5, verbose = 1)) + expect_s4_class(out, "QtlFineMappingResult") + expect_equal(nrow(out), 2L) # indA row + ssC row +}) + +test_that(".fmDispatchJointSpecsMultiStudy: study spec with no sumStats slot messages", { + qdA <- .js_makeQtlDataset(study = "indA", contexts = c("c1", "c2"), + traits = "t1") + qdB <- .js_makeQtlDataset(study = "indB", contexts = c("c1", "c2"), + traits = "t1") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qdA, indB = qdB)) # no ss + parsed <- list(list(axes = "study", scope = NULL)) + expect_message( + out <- pecotmr:::.fmDispatchJointSpecsMultiStudy( + parsed, mt, methods = "mvsusie", contexts = NULL, traitIds = NULL, + cisWindow = NULL, coverage = 0.95, secondaryCoverage = 0.5, + signalCutoff = 0.1, minAbsCorr = 0.5, verbose = 1), + "no sumStats slot") + expect_null(out) +}) + +test_that(".twasDispatchJointSpecsMultiStudy: routes components + sumstats and rbinds", { + qd <- .jd_makeQtlDataset(study = "indA", contexts = c("c1", "c2"), + traits = "t1") + ss <- .jd_makeQtlSumStats(studies = "ssC", contexts = "c1", traits = "t1") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qd), sumStats = ss) + parsed <- list(list(axes = "context", scope = NULL), + list(axes = "study", scope = NULL)) + qdRes <- TwasWeights(study = "indA", context = "c1", trait = "t1", + method = "mrmash", entry = list(TwasWeightsEntry(variantIds = "v1", + weights = 0.5))) + ssRes <- TwasWeights(study = "ssC", context = "c1", trait = "t1", + method = "mrmash", entry = list(TwasWeightsEntry(variantIds = "v1", + weights = 0.5)), + ldSketch = .jd_makeHandle()) + local_mocked_bindings( + .twasDispatchJointSpecsQtlDataset = function(...) qdRes, + .twasDispatchJointSpecsQtlSumStats = function(...) ssRes, + .package = "pecotmr") + out <- suppressMessages(pecotmr:::.twasDispatchJointSpecsMultiStudy( + parsed, mt, methods = "mrmash", contexts = NULL, traitIds = NULL, + cisWindow = NULL, dataType = NULL, verbose = 1)) + expect_s4_class(out, "TwasWeights") + expect_equal(nrow(out), 2L) +}) + +test_that(".twasDispatchJointSpecsMultiStudy: study spec, no sumStats -> message + NULL", { + qdA <- .jd_makeQtlDataset(study = "indA", contexts = c("c1", "c2"), + traits = "t1") + qdB <- .jd_makeQtlDataset(study = "indB", contexts = c("c1", "c2"), + traits = "t1") + mt <- MultiStudyQtlDataset(qtlDatasets = list(indA = qdA, indB = qdB)) + parsed <- list(list(axes = "study", scope = NULL)) + expect_message( + out <- pecotmr:::.twasDispatchJointSpecsMultiStudy( + parsed, mt, methods = "mrmash", contexts = NULL, traitIds = NULL, + cisWindow = NULL, dataType = NULL, verbose = 1), + "no sumStats slot") + expect_null(out) +}) + +# ============================================================================ +# Mop-up: remaining .sp* empties, parseContexts unnamed list, validate no-op, +# X/Y builder skip paths, region-missing keys in merges, FM multi-region merge. +# ============================================================================ + +test_that(".sp* QtlDataset: mismatched study / absent context return empty", { + qd <- .js_makeQtlDataset(study = "S1", contexts = "brain", + traits = c("g1", "g2")) + expect_equal(pecotmr:::.spListContexts(qd, "wrong"), character(0)) # 59 + expect_equal(pecotmr:::.spListTraits(qd, study = "wrong"), character(0)) # 93 + expect_equal(pecotmr:::.spListTraits(qd, context = "nope"), character(0)) # 98 +}) + +test_that("validateMethodsVsJointSpec: per-study methods with a context joint passes", { + mp <- list(shape = "primary", methods = list(S1 = "lasso")) # depth 1 + expect_null(pecotmr:::validateMethodsVsJointSpec( + mp, list(list(axes = "context")))) # 542 +}) + +test_that(".buildIndividualCrossTraitXY: disjoint X/Y samples skip the context", { + se <- .js_makeSe(traits = c("g1", "g2"), samples = paste0("s", 1:6)) + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 1, 2, dimnames = list("zz", c("v1", "v2"))), # disjoint sample + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + matrix(0, 6, 2, dimnames = list(paste0("s", 1:6), c("g1", "g2"))), + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildIndividualCrossTraitXY( + NULL, "cx", c("g1", "g2"), cisWindow = 1000L, verbose = 1, label = "X", + study = "S"))) # 730 +}) + +test_that(".buildComposedIndividualXY: disjoint samples / missing trait col / NA rows skip", { + samp <- paste0("s", 1:6) + se <- .js_makeSe(traits = c("g1", "g2"), samples = samp) + scope <- list(contexts = list(S = c("c1", "c2")), + traits = list(S = c("g1", "g2"))) + # (a) disjoint X samples -> < 2 common -> NULL (774). + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 1, 2, dimnames = list("zz", c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + setNames(lapply(c("c1", "c2"), function(.) + matrix(rnorm(12), 6, 2, dimnames = list(samp, c("g1", "g2")))), + c("c1", "c2")), + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildComposedIndividualXY( + NULL, scope, "S", cisWindow = 1000L, verbose = 1, label = "X"))) # 774 + # (b) one context's Y lacks the trait column -> tuple skipped -> < 2 yCols (779/785). + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 6, 2, dimnames = list(samp, c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + list(c1 = matrix(0, 6, 1, dimnames = list(samp, "g1")), + c2 = matrix(0, 6, 1, dimnames = list(samp, "zzz"))), # no g1/g2 + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildComposedIndividualXY( + NULL, list(contexts = list(S = c("c1", "c2")), + traits = list(S = "g1")), + "S", cisWindow = 1000L, verbose = 1, label = "X"))) # 779/785 + # (c) two valid columns but < 2 complete rows (NA) -> NULL (788). + local_mocked_bindings( + getPhenotypes = function(data, contexts) se, + .fmResidGeno = function(x, contexts, traitId = NULL, cisWindow = NULL, + region = NULL) + matrix(0, 6, 2, dimnames = list(samp, c("v1", "v2"))), + .fmResidPheno = function(x, contexts, traitId = NULL, ...) + list(c1 = matrix(c(NA, NA, NA, NA, NA, 1), 6, 1, + dimnames = list(samp, "g1")), + c2 = matrix(c(NA, NA, NA, NA, NA, 1), 6, 1, + dimnames = list(samp, "g1"))), + .package = "pecotmr") + expect_null(suppressMessages(pecotmr:::.buildComposedIndividualXY( + NULL, list(contexts = list(S = c("c1", "c2")), + traits = list(S = "g1")), + "S", cisWindow = 1000L, verbose = 1, label = "X"))) # 788 +}) + +test_that(".fmMergeResultsByKey: a key missing from a later region contributes nothing", { + twoRow <- QtlFineMappingResult( + study = c("S", "S"), context = c("c1", "c2"), trait = c("t1", "t1"), + method = c("mvsusie", "mvsusie"), + entry = list(.js_fmEntry("v1"), .js_fmEntry("v2"))) + oneRow <- QtlFineMappingResult( + study = "S", context = "c1", trait = "t1", method = "mvsusie", + entry = list(.js_fmEntry("v1"))) # missing the c2 key + seen <- 0L + local_mocked_bindings( + .fmMergeEntries = function(entries) { seen <<- seen + length(entries) + entries[[1L]] }, .package = "pecotmr") + out <- pecotmr:::.fmMergeResultsByKey(list(twoRow, oneRow)) + expect_equal(nrow(out), 2L) + expect_equal(seen, 3L) # 2 for c1 row + 1 for c2 row (849 else) +}) + +test_that(".fmDispatchJointSpecsQtlDataset: two region blocks are merged", { + qd <- .jd_makeQtlDataset(study = "Q1", contexts = c("c1", "c2"), + traits = "t1") + r1 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(50, 250)) + r2 <- GenomicRanges::GRanges("chr1", IRanges::IRanges(300, 500)) + mkRes <- function() QtlFineMappingResult( + study = c("Q1", "Q1"), context = c("c1", "c2"), trait = c("t1", "t1"), + method = c("mvsusie", "mvsusie"), + entry = list(.js_fmEntry("v1"), .js_fmEntry("v1"))) + local_mocked_bindings( + .fmDispatchJointSpecsQtlDatasetOneRegion = function(...) mkRes(), + .fmMergeEntries = function(entries) entries[[1L]], + .package = "pecotmr") + res <- pecotmr:::.fmDispatchJointSpecsQtlDataset( + list(list(axes = "context", scope = NULL)), qd, methods = "mvsusie", + contexts = NULL, traitIds = NULL, cisWindow = NULL, coverage = 0.95, + secondaryCoverage = 0.5, signalCutoff = 0.1, minAbsCorr = 0.5, verbose = 0, + xRegions = list(r1, r2)) + expect_s4_class(res, "QtlFineMappingResult") + expect_equal(nrow(res), 2L) # 907-910 +}) + +test_that(".fmDispatchJointSpecsQtlDataset: a single region returns directly; all-NULL -> NULL", { + qd <- .jd_makeQtlDataset(study = "Q1", contexts = c("c1", "c2"), traits = "t1") + res1 <- QtlFineMappingResult(study = "Q1", context = "c1", trait = "t1", + method = "mvsusie", entry = list(.js_fmEntry("v1"))) + local_mocked_bindings( + .fmDispatchJointSpecsQtlDatasetOneRegion = function(...) res1, + .package = "pecotmr") + out <- pecotmr:::.fmDispatchJointSpecsQtlDataset( + list(list(axes = "context", scope = NULL)), qd, methods = "mvsusie", + contexts = NULL, traitIds = NULL, cisWindow = 1000L, coverage = 0.95, + secondaryCoverage = 0.5, signalCutoff = 0.1, minAbsCorr = 0.5, verbose = 0) + expect_identical(out, res1) # length-1 short-circuit (909) + local_mocked_bindings( + .fmDispatchJointSpecsQtlDatasetOneRegion = function(...) NULL, + .package = "pecotmr") + expect_null(pecotmr:::.fmDispatchJointSpecsQtlDataset( + list(list(axes = "context", scope = NULL)), qd, methods = "mvsusie", + contexts = NULL, traitIds = NULL, cisWindow = 1000L, coverage = 0.95, + secondaryCoverage = 0.5, signalCutoff = 0.1, minAbsCorr = 0.5, + verbose = 0)) # all-NULL (908) +}) + +test_that(".twasMergeResultsByKey: a key absent from a later region contributes nothing", { + twoRow <- TwasWeights( + study = c("S", "S"), context = c("c1", "c2"), trait = c("t1", "t1"), + method = c("lasso", "lasso"), + entry = list(TwasWeightsEntry(variantIds = "v1", weights = 0.1), + TwasWeightsEntry(variantIds = "v2", weights = 0.2))) + oneRow <- TwasWeights(study = "S", context = "c1", trait = "t1", + method = "lasso", entry = list(TwasWeightsEntry(variantIds = "v1", + weights = 0.1))) + out <- pecotmr:::.twasMergeResultsByKey(list(twoRow, oneRow), c("rA", "rB")) + expect_equal(nrow(out), 2L) # 1063 else-branch }) diff --git a/tests/testthat/test_ld.R b/tests/testthat/test_ld.R index 6c34a722..d346dcff 100644 --- a/tests/testthat/test_ld.R +++ b/tests/testthat/test_ld.R @@ -2648,6 +2648,725 @@ test_that("computeLd with shrinkage > 0", { }) +# ============================================================================= +# Additional coverage: findIntersectionRows / getRegionalLdMeta edge cases +# ============================================================================= + +test_that("findIntersectionRows errors when region falls in a coverage gap", { + # Chromosome exists but there is a gap between the two blocks; after + # clamping, no single row covers the query start -> stop (line 33). + gd <- data.frame(chrom = c(1, 1), start = c(100, 300), end = c(150, 350)) + expect_error( + pecotmr:::findIntersectionRows(gd, 1, 200, 250), + "not covered by any rows" + ) +}) + +test_that("getRegionalLdMeta handles whole-chromosome 0:0 sentinel rows", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_wholechrom_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("1", "0", "0", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t"), "\n", file = meta_file, append = TRUE) + result <- pecotmr:::getRegionalLdMeta(meta_file, "chr1:1000-1190") + expect_true(length(result$intersections$LD_file_paths) >= 1) + expect_true(all(file.exists(result$intersections$LD_file_paths))) + # 0:0 sentinel row should have had its end set to Inf internally (line 103) + expect_true(is.infinite(max(result$ldMetaData$end))) +}) + +test_that("getRegionalLdMeta validates complete coverage when required", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_complete_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + lines <- c( + paste("chrom", "start", "end", "path", sep = "\t"), + paste("1", "1000", "1200", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t") + ) + writeLines(lines, meta_file) + # Region fully inside block 1 -> validateSelectedRegion passes (line 122) + result <- pecotmr:::getRegionalLdMeta(meta_file, "chr1:1050-1150", + completeCoverageRequired = TRUE) + expect_true(length(result$intersections$LD_file_paths) >= 1) +}) + +# ============================================================================= +# Additional coverage: processLdMatrix variant-file auto-detect / pvar / symm +# ============================================================================= + +test_that("processLdMatrix auto-detects companion .bim when snpFilePath is NULL", { + tmp_xz <- tempfile(fileext = ".txt.xz") + on.exit(unlink(c(tmp_xz, paste0(tmp_xz, ".bim"))), add = TRUE) + file.copy(file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz"), + tmp_xz) + # Place the companion at ".bim" so the auto-detector finds it + file.copy(file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.bim"), + paste0(tmp_xz, ".bim")) + result <- pecotmr:::processLdMatrix(tmp_xz, snpFilePath = NULL) + expect_equal(nrow(result$ldMatrix), 5L) + expect_true(all(grepl("^chr1:", rownames(result$ldMatrix)))) +}) + +test_that("processLdMatrix errors when no companion variant file is found", { + tmp_xz <- tempfile(fileext = ".txt.xz") + on.exit(unlink(tmp_xz), add = TRUE) + file.copy(file.path(geno_test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz"), + tmp_xz) + expect_error( + pecotmr:::processLdMatrix(tmp_xz, snpFilePath = NULL), + "No variant file found" + ) +}) + +test_that("processLdMatrix reads .pvar metadata and symmetrizes an upper-triangular matrix", { + skip_if_not_installed("pgenlibr") + pvar <- file.path(geno_test_data_dir, "test_harmonize_regions.pvar") # 8 variants + # Build an 8x8 upper-triangular matrix (lower triangle exactly zero) so the + # lower.tri == 0 branch fires (line 183). + set.seed(1) + n <- 8 + U <- diag(n) + U[upper.tri(U)] <- round(runif(sum(upper.tri(U)), -0.4, 0.8), 3) + expect_true(all(U[lower.tri(U)] == 0)) + tmp_xz <- tempfile(fileext = ".txt.xz") + on.exit(unlink(tmp_xz), add = TRUE) + con <- xzfile(tmp_xz, "w") + writeLines(paste(as.vector(t(U)), collapse = " "), con) # row-major order + close(con) + + result <- pecotmr:::processLdMatrix(tmp_xz, pvar) + expect_equal(nrow(result$ldMatrix), n) + expect_true(isSymmetric(result$ldMatrix)) + # .pvar metadata has no gpos column; pos is derived from the variant id + expect_false("gpos" %in% names(result$ldVariants)) + expect_true(all(grepl("^chr21:", rownames(result$ldMatrix)))) +}) + +# ============================================================================= +# Additional coverage: createLdMatrix empty entry / loadLdMatrix dedup +# ============================================================================= + +test_that("createLdMatrix skips empty variant-list entries", { + m1 <- matrix(c(1, 0.5, 0.5, 1), 2, 2, + dimnames = list(c("v1", "v2"), c("v1", "v2"))) + variants <- list( + data.frame(variants = character(0)), # empty -> next (line 229) + data.frame(variants = c("v1", "v2")) + ) + result <- pecotmr:::createLdMatrix(list(matrix(0, 0, 0), m1), variants) + expect_equal(nrow(result), 2L) + expect_equal(rownames(result), c("v1", "v2")) + expect_equal(result["v1", "v2"], 0.5) +}) + +test_that("loadLdMatrix removes duplicate variants from the backend result", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_dedup_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("1", "1000", "1200", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t"), "\n", file = meta_file, append = TRUE) + + # Build an LdData whose variant IDs contain a duplicate (index 3 == index 1) + variant_ids <- c("chr1:100:A:G", "chr1:200:C:T", "chr1:100:A:G") + ref <- data.frame(chrom = c(1, 1, 1), pos = c(100, 200, 100), + A2 = c("A", "C", "A"), A1 = c("G", "T", "G"), + variant_id = variant_ids, stringsAsFactors = FALSE) + gr <- pecotmr:::.refPanelToGranges(ref) + R <- matrix(c(1, 0.5, 0.9, 0.5, 1, 0.4, 0.9, 0.4, 1), 3, 3) + rownames(R) <- colnames(R) <- variant_ids + bm <- data.frame(blockId = 1L, chrom = "1", blockStart = 100L, blockEnd = 200L, + size = 3L, startIdx = 1L, endIdx = 3L, stringsAsFactors = FALSE) + dup_ld <- LdData(correlation = R, variants = gr, blockMetadata = bm) + + local_mocked_bindings( + loadLdFromBlocks = function(ldMetaFilePath, region, extractCoordinates = NULL, + nSample = NULL) dup_ld, + .package = "pecotmr" + ) + result <- loadLdMatrix(meta_file, "chr1:100-200") + ids <- getVariantIds(result) + expect_equal(length(ids), 2L) + expect_false(any(duplicated(ids))) + expect_equal(dim(getCorrelation(result)), c(2L, 2L)) +}) + +# ============================================================================= +# Additional coverage: resolveLdSource column check +# ============================================================================= + +test_that("resolveLdSource errors when metadata has fewer than 4 columns", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_3col_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", sep = "\t"), meta_file) + cat(paste("1", "1000", "1200", sep = "\t"), "\n", file = meta_file, append = TRUE) + expect_error(pecotmr:::resolveLdSource(meta_file), "at least 4 columns") +}) + +# ============================================================================= +# Additional coverage: loadLdFromGenotype .afreq mismatch warning +# ============================================================================= + +test_that("loadLdFromGenotype warns when .afreq is missing some variants", { + skip_if_not_installed("pgenlibr") + plink_prefix <- file.path(geno_test_data_dir, "test_variants") + local_mocked_bindings( + readAfreq = function(prefix) { + data.frame(id = "no_such_variant", alt_freq = 0.1, stringsAsFactors = FALSE) + }, + .package = "pecotmr" + ) + expect_warning( + pecotmr:::loadLdFromGenotype(plink_prefix, geno_region_all), + "no allele frequency" + ) +}) + +# ============================================================================= +# Additional coverage: .ldFromSketch onMissing="drop" +# ============================================================================= + +test_that(".ldFromSketch drops variants absent from the panel when onMissing='drop'", { + skip_if_not_installed("pgenlibr") + h <- readGenotypes(file.path(geno_test_data_dir, "test_variants"), format = "plink2") + si <- getSnpInfo(h) + ids <- c(as.character(si$SNP[1]), "bogus:999:A:G", as.character(si$SNP[3])) + m <- pecotmr:::.ldFromSketch(h, ids, onMissing = "drop") + expect_equal(dim(m), c(2L, 2L)) + expect_equal(attr(m, "keptVariantIds"), + c(as.character(si$SNP[1]), as.character(si$SNP[3]))) + expect_equal(unname(diag(m)), c(1, 1)) +}) + +# ============================================================================= +# Additional coverage: .requireMatchingLdSketches error paths +# ============================================================================= + +test_that(".requireMatchingLdSketches errors when slots are not GenotypeHandle", { + expect_error( + pecotmr:::.requireMatchingLdSketches(list(a = 1), list(b = 2), "testPipeline"), + "must both be GenotypeHandle" + ) +}) + +test_that(".requireMatchingLdSketches errors when panels differ in a column", { + skip_if_not_installed("pgenlibr") + h <- readGenotypes(file.path(geno_test_data_dir, "test_variants"), format = "plink2") + si <- getSnpInfo(h) + si2 <- si + si2$A1[1] <- if (identical(si2$A1[1], "A")) "C" else "A" # mutate one allele + h2 <- new("GenotypeHandle", path = h@path, format = h@format, + snpInfo = si2, nSamples = h@nSamples, sampleIds = h@sampleIds, + pgenPtr = NULL, chromPaths = character(0)) + expect_error( + pecotmr:::.requireMatchingLdSketches(h, h2, "testPipeline"), + "differ in column" + ) +}) + +# ============================================================================= +# Additional coverage: loadLdSketch non-LdData guard +# ============================================================================= + +test_that("loadLdSketch errors when loadLdMatrix does not return an LdData", { + local_mocked_bindings( + loadLdMatrix = function(ldMetaFilePath, region, returnGenotype = FALSE, + nSample = NULL, ...) list(), + .package = "pecotmr" + ) + expect_error( + pecotmr::loadLdSketch("fake_path.tsv", "chr1:1-100"), + "must return an LdData" + ) +}) + +# ============================================================================= +# Additional coverage: loadLdFromBlocks empty-block handling +# ============================================================================= + +test_that("loadLdFromBlocks drops empty blocks and keeps non-empty ones", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_emptyblock_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + lines <- c( + paste("chrom", "start", "end", "path", sep = "\t"), + paste("1", "1000", "1200", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t"), + paste("1", "1200", "1400", + "LD_block_2.chr1_1200_1400.float16.txt.xz,LD_block_2.chr1_1200_1400.float16.bim", + sep = "\t") + ) + writeLines(lines, meta_file) + # Region 1180-1260: block 1 (variants 1000..1160) is empty; block 2 keeps 1200,1240 + expect_message( + result <- pecotmr:::loadLdFromBlocks(meta_file, "chr1:1180-1260"), + "Removing 1 empty LD block" + ) + ids <- getVariantIds(result) + expect_true(length(ids) >= 1) + expect_true(all(grepl("^chr1:12", ids))) +}) + +test_that("loadLdFromBlocks errors when no block has variants in the region", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_noblockvar_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + lines <- c( + paste("chrom", "start", "end", "path", sep = "\t"), + paste("1", "1000", "1200", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t") + ) + writeLines(lines, meta_file) + # Region 1165-1175 falls between block-1 variants 1160 and the next block + expect_error( + pecotmr:::loadLdFromBlocks(meta_file, "chr1:1165-1175"), + "No variants found in any LD block" + ) +}) + +test_that("loadLdFromBlocks derives variance from allele_freq + nSample when variance is NA", { + # 9-column bim with NA variance but a present allele_freq column. + bim_file <- file.path(geno_test_data_dir, "LD_block_1_navar_tmp.bim") + meta_file <- file.path(geno_test_data_dir, "ld_meta_navar_tmp.tsv") + on.exit(unlink(c(bim_file, meta_file)), add = TRUE) + bim_lines <- c( + "1\tchr1:1000_A_G\t0\t1000\tA\tG\tNA\t0.3\t500", + "1\tchr1:1040_A_G\t0\t1040\tA\tG\tNA\t0.4\t500", + "1\tchr1:1080_A_G\t0\t1080\tA\tG\tNA\t0.2\t500", + "1\tchr1:1120_A_G\t0\t1120\tA\tG\tNA\t0.5\t500", + "1\tchr1:1160_A_G\t0\t1160\tA\tG\tNA\t0.15\t500" + ) + writeLines(bim_lines, bim_file) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("1", "1000", "1200", + paste0("LD_block_1.chr1_1000_1200.float16.txt.xz,", basename(bim_file)), + sep = "\t"), "\n", file = meta_file, append = TRUE) + + result <- loadLdMatrix(meta_file, "chr1:1000-1190", nSample = 500L) + ref_mcols <- S4Vectors::mcols(getVariantInfo(result)) + expect_true("variance" %in% names(ref_mcols)) + expect_true(all(!is.na(ref_mcols$variance))) + expect_true(all(ref_mcols$variance > 0)) + expect_equal(ref_mcols$n_nomiss, rep(500, length(ref_mcols$variance))) +}) + +# ============================================================================= +# Additional coverage: filterVariantsByLdReference keepIndel = FALSE +# ============================================================================= + +test_that("filterVariantsByLdReference with keepIndel=FALSE drops indels", { + meta_file <- file.path(geno_test_data_dir, "ld_meta_filtind_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + lines <- c( + paste("chrom", "start", "end", "path", sep = "\t"), + paste("1", "1000", "1200", + "LD_block_1.chr1_1000_1200.float16.txt.xz,LD_block_1.chr1_1000_1200.float16.bim", + sep = "\t") + ) + writeLines(lines, meta_file) + # All three positions are on the reference panel, but the middle one is an indel. + variant_ids <- c("chr1:1000:A:G", "chr1:1040:AT:A", "chr1:1080:A:G") + result <- suppressMessages( + filterVariantsByLdReference(variant_ids, meta_file, keepIndel = FALSE) + ) + expect_false("chr1:1040:AT:A" %in% result$data) + expect_true("chr1:1000:A:G" %in% result$data) + expect_true("chr1:1080:A:G" %in% result$data) +}) + +# ============================================================================= +# Additional coverage: partitionLdMatrix / validateBlockStructure / extractBlockMatrices +# ============================================================================= + +test_that("partitionLdMatrix accepts LdBlocks blockMetadata", { + variant_ids <- paste0("chr1:", c(100, 200, 300, 400), ":A:G") + R <- diag(4) + rownames(R) <- colnames(R) <- variant_ids + gr_blocks <- GenomicRanges::GRanges("chr1", + IRanges::IRanges(start = c(100, 300), end = c(200, 400))) + S4Vectors::mcols(gr_blocks) <- S4Vectors::DataFrame( + blockId = c(1L, 2L), chrom = c("1", "1"), size = c(2L, 2L), + startIdx = c(1L, 3L), endIdx = c(2L, 4L), + blockStart = c(100L, 300L), blockEnd = c(200L, 400L) + ) + ldb <- new("LdBlocks", blocks = gr_blocks, genome = "hg38") + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr_vars <- pecotmr:::.refPanelToGranges(ref) + ld <- LdData(correlation = R, variants = gr_vars, blockMetadata = ldb) + result <- pecotmr:::partitionLdMatrix(ld, mergeSmallBlocks = FALSE) + expect_length(result$ldMatrices, 2) + expect_equal(nrow(result$ldMatrices[[1]]), 2L) +}) + +test_that("partitionLdMatrix errors on an empty correlation matrix", { + variant_ids <- "chr1:100:A:G" + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr <- pecotmr:::.refPanelToGranges(ref) + empty_R <- matrix(numeric(0), 0, 0) + bm <- data.frame(blockId = 1L, chrom = "1", blockStart = 100L, blockEnd = 100L, + size = 1L, startIdx = 1L, endIdx = 1L, stringsAsFactors = FALSE) + ld <- LdData(correlation = empty_R, variants = gr, blockMetadata = bm) + expect_error(pecotmr:::partitionLdMatrix(ld), "Empty or NULL LD matrix") +}) + +test_that("partitionLdMatrix errors when all blocks have invalid indices", { + variant_ids <- paste0("chr1:", c(100, 200, 300), ":A:G") + R <- diag(3) + rownames(R) <- colnames(R) <- variant_ids + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr <- pecotmr:::.refPanelToGranges(ref) + bm <- data.frame(blockId = 1L, chrom = "1", blockStart = 100L, blockEnd = 300L, + size = 3L, startIdx = 10L, endIdx = 20L, stringsAsFactors = FALSE) + ld <- LdData(correlation = R, variants = gr, blockMetadata = bm) + expect_error(pecotmr:::partitionLdMatrix(ld), "No valid LD blocks found") +}) + +test_that("partitionLdMatrix removes blocks with invalid indices and reindexes", { + variant_ids <- paste0("chr1:", seq(100, 600, by = 100), ":A:G") # 6 variants + R <- diag(6) + rownames(R) <- colnames(R) <- variant_ids + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr <- pecotmr:::.refPanelToGranges(ref) + bm <- data.frame( + blockId = c(1L, 2L), chrom = c("1", "1"), + blockStart = c(100L, 400L), blockEnd = c(300L, 600L), + size = c(3L, 3L), startIdx = c(1L, 50L), endIdx = c(3L, 60L), + stringsAsFactors = FALSE + ) + ld <- LdData(correlation = R, variants = gr, blockMetadata = bm) + expect_message( + result <- pecotmr:::partitionLdMatrix(ld, mergeSmallBlocks = FALSE), + "Removing 1 LD block" + ) + expect_length(result$ldMatrices, 1) + expect_equal(nrow(result$ldMatrices[[1]]), 3L) +}) + +test_that("validateBlockStructure flags out-of-range block indices", { + mat <- diag(4) + vnames <- paste0("v", 1:4) + rownames(mat) <- colnames(mat) <- vnames + bm <- data.frame(blockId = c(1L, 2L), chrom = c("1", "1"), size = c(2L, 2L), + startIdx = c(1L, 10L), endIdx = c(2L, 12L)) + expect_error( + pecotmr:::validateBlockStructure(mat, bm, vnames), + "Block indices out of range" + ) +}) + +test_that("extractBlockMatrices skips blocks where endIdx < startIdx", { + mat <- diag(4) + vnames <- paste0("v", 1:4) + rownames(mat) <- colnames(mat) <- vnames + bm <- data.frame( + blockId = c(1L, 2L), startIdx = c(1L, 3L), endIdx = c(2L, 2L), + chrom = c("1", "1"), blockStart = c(1L, 3L), blockEnd = c(2L, 4L), + size = c(2L, 1L), stringsAsFactors = FALSE + ) + result <- pecotmr:::extractBlockMatrices(mat, bm, vnames) + valid <- result$ldMatrices[!sapply(result$ldMatrices, is.null)] + expect_length(valid, 1) + expect_equal(nrow(valid[[1]]), 2L) +}) + +# ============================================================================= +# Additional coverage: ldPruneByCorrelation snprelate backend +# ============================================================================= + +test_that("ldPruneByCorrelation snprelate backend prunes correlated columns", { + skip_if_not_installed("SNPRelate") + skip_if_not_installed("gdsfmt") + set.seed(42) + n <- 100; p <- 6 + X <- matrix(rbinom(n * p, 2, 0.3), n, p) + X[, 2] <- X[, 1] # perfect LD between columns 1 and 2 + colnames(X) <- paste0("snp", 1:p) + result <- suppressMessages( + ldPruneByCorrelation(X, corThres = 0.5, backend = "snprelate", verbose = TRUE) + ) + expect_true(ncol(result$X.new) <= p) + expect_equal(length(result$filter.id), ncol(result$X.new)) + expect_true(all(result$filter.id %in% seq_len(p))) +}) + +# ============================================================================= +# Additional coverage: dropCollinearColumns verbose messages +# ============================================================================= + +test_that("dropCollinearColumns prints verbose messages for each strategy", { + set.seed(7) + X <- matrix(rnorm(100 * 4), 100, 4) + colnames(X) <- c("a", "b", "c", "d") + X[, "c"] <- X[, "c"] * 0.001 # lowest variance + y <- X[, "a"] * 2 + rnorm(100, sd = 0.1) + + expect_message( + pecotmr:::dropCollinearColumns(X, "b", strategy = "correlation", verbose = TRUE), + "removing single column" + ) + expect_message( + pecotmr:::dropCollinearColumns(X, c("a", "b", "c"), strategy = "variance", + verbose = TRUE), + "smallest variance" + ) + expect_message( + pecotmr:::dropCollinearColumns(X, c("a", "b"), strategy = "correlation", + verbose = TRUE), + "two candidates" + ) + expect_message( + pecotmr:::dropCollinearColumns(X, c("a", "b", "c"), strategy = "correlation", + verbose = TRUE), + "highest sum" + ) + expect_message( + pecotmr:::dropCollinearColumns(X, c("a", "b", "c"), + strategy = "response_correlation", + response = y, verbose = TRUE), + "smallest .* with response" + ) +}) + +# ============================================================================= +# Additional coverage: enforceDesignFullRank verbose / fallback branches +# ============================================================================= + +test_that("enforceDesignFullRank verbose: batch-removal success + iterative path", { + set.seed(11) + X <- matrix(rnorm(80 * 4), 80, 4) + X[, 4] <- X[, 1] + X[, 2] # rank deficient, fixable by removing one column + colnames(X) <- c("a", "b", "c", "d") + C <- matrix(rnorm(80), 80, 1) + expect_message( + result <- enforceDesignFullRank(X, C, strategy = "variance", verbose = TRUE), + "enforceDesignFullRank" + ) + full_design <- cbind(1, result, C) + expect_equal(qr(full_design)$rank, ncol(full_design)) +}) + +test_that("enforceDesignFullRank verbose: constant covariate triggers fallback", { + set.seed(12) + X <- matrix(rnorm(60 * 3), 60, 3) # X itself is full rank + colnames(X) <- c("a", "b", "c") + C <- matrix(1, 60, 1) # constant -> collinear with intercept, not fixable via X + # Iterative path finds no removable X column (break), then the correlation + # fallback runs over each threshold; design stays rank-deficient throughout. + expect_message( + result <- enforceDesignFullRank(X, C, strategy = "correlation", verbose = TRUE), + "ldPruneByCorrelation fallback" + ) + expect_true(is.matrix(result)) +}) + +test_that("enforceDesignFullRank verbose: batch removal insufficient path", { + set.seed(13) + X <- matrix(rnorm(60 * 3), 60, 3) + X <- cbind(X, X[, 1]) # duplicate of column 1 + colnames(X) <- c("a", "b", "c", "d") + C <- matrix(1, 60, 1) # constant covariate keeps design deficient + # Removing the QR-flagged X column cannot restore full rank (C is constant), + # so the batch-removal "insufficient" branch fires and skips iterative pruning. + expect_message( + result <- enforceDesignFullRank(X, C, strategy = "correlation", verbose = TRUE), + "batch removal insufficient" + ) + expect_true(is.matrix(result)) +}) + +# ============================================================================= +# Additional coverage: ldClumpByScore verbose + FBM input +# ============================================================================= + +test_that("ldClumpByScore prints verbose message for single-variant input", { + skip_if_not_installed("bigsnpr") + skip_if_not_installed("bigstatsr") + X <- matrix(c(0, 1, 2, 1, 0), ncol = 1) + expect_message( + ldClumpByScore(X, score = 1.0, chr = 1L, pos = 100L, verbose = TRUE), + "single variant" + ) +}) + +test_that("ldClumpByScore accepts a pre-built FBM and reports retained count (verbose)", { + skip_if_not_installed("bigsnpr") + skip_if_not_installed("bigstatsr") + set.seed(1) + n <- 200; p <- 8 + X <- matrix(rbinom(n * p, 2, 0.3), n, p) + X[, 2] <- X[, 1] # perfect LD + G <- bigstatsr::FBM.code256(nrow = n, ncol = p, init = X, + code = c(0, 1, 2, rep(NA_real_, 253L))) + score <- c(2, 1, runif(p - 2)) + chr <- rep(1L, p) + pos <- seq_len(p) * 1000L + expect_message( + keep <- ldClumpByScore(G, score = score, chr = chr, pos = pos, r2 = 0.2, + verbose = TRUE), + "variants retained" + ) + expect_true(1L %in% keep) + expect_false(2L %in% keep) +}) + +# ============================================================================= +# Additional coverage: extractLdMatrix +# ============================================================================= + +test_that("extractLdMatrix errors on non-LdData input", { + expect_error(pecotmr:::extractLdMatrix(list()), "must be an LdData object") +}) + +test_that("extractLdMatrix returns the genotype matrix when wantGenotype=TRUE", { + variant_ids <- paste0("chr1:", c(100, 200, 300), ":A:G") + X <- matrix(rnorm(15), 5, 3) + colnames(X) <- variant_ids + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr <- pecotmr:::.refPanelToGranges(ref) + bm <- data.frame(blockId = 1L, chrom = "1", blockStart = 100L, blockEnd = 300L, + size = 3L, startIdx = 1L, endIdx = 3L, stringsAsFactors = FALSE) + ld <- LdData(genotypeHandle = X, variants = gr, blockMetadata = bm) + result <- pecotmr:::extractLdMatrix(ld, wantGenotype = TRUE) + expect_equal(result, X) +}) + +# ============================================================================= +# Additional coverage: ldLoader region-mode subsample/scale + ldInfo auto-detect +# ============================================================================= + +test_that("ldLoader region mode subsamples and scales genotype matrices", { + skip_if_not_installed("pgenlibr") + meta_file <- file.path(test_data_dir, "ld_meta_region_geno_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n", + file = meta_file, append = TRUE) + set.seed(1) + loader <- ldLoader(ldMetaPath = meta_file, regions = geno_region_all, + returnGenotype = TRUE, maxVariants = 20) + mat <- loader(1) + expect_equal(ncol(mat), 20L) + expect_equal(nrow(mat), 100L) # samples + expect_true(all(is.finite(mat))) # scaled, NAs replaced with 0 +}) + +test_that("ldLoader region mode subsamples a correlation matrix", { + skip_if_not_installed("pgenlibr") + meta_file <- file.path(test_data_dir, "ld_meta_region_corr_tmp.tsv") + on.exit(unlink(meta_file), add = TRUE) + writeLines(paste("chrom", "start", "end", "path", sep = "\t"), meta_file) + cat(paste("21", "0", "0", "test_variants", sep = "\t"), "\n", + file = meta_file, append = TRUE) + set.seed(2) + loader <- ldLoader(ldMetaPath = meta_file, regions = geno_region_all, + returnGenotype = FALSE, maxVariants = 15) + mat <- loader(1) + expect_equal(dim(mat), c(15L, 15L)) +}) + +test_that("ldLoader ldInfo auto-detects companion file when SNP_file is absent", { + ld_file <- file.path(test_data_dir, "LD_block_1.chr1_1000_1200.float16.txt.xz") + bim_file <- file.path(test_data_dir, "LD_block_1.chr1_1000_1200.float16.bim") + real_process <- pecotmr:::processLdMatrix + local_mocked_bindings( + processLdMatrix = function(LD_file_path, snp_file_path = NULL) { + # In the SNP_file-absent branch, snp_file_path is NULL (auto-detect). + result <- real_process(LD_file_path, bim_file) + variant_ids <- result$ldVariants$variants + ref <- pecotmr:::parseVariantId(variant_ids) + ref$variant_id <- variant_ids + gr <- pecotmr:::.refPanelToGranges(ref) + bm <- data.frame( + blockId = 1L, chrom = as.character(ref$chrom[1]), + blockStart = min(ref$pos), blockEnd = max(ref$pos), + size = length(variant_ids), startIdx = 1L, + endIdx = length(variant_ids), stringsAsFactors = FALSE + ) + LdData(correlation = result$ldMatrix, variants = gr, blockMetadata = bm) + }, + .package = "pecotmr" + ) + loader <- ldLoader(ldInfo = data.frame(LD_file = ld_file)) # no SNP_file column + mat <- loader(1) + expect_true(is.matrix(mat)) + expect_true(isSymmetric(mat)) + expect_true(nrow(mat) > 0) +}) + +# ============================================================================= +# Additional coverage: computeLd alternative backends + guard rails +# ============================================================================= + +test_that("computeLd snprelate backend returns a valid correlation matrix", { + skip_if_not_installed("SNPRelate") + skip_if_not_installed("gdsfmt") + set.seed(1) + X <- matrix(rbinom(100 * 5, 2, 0.3), 100, 5) + colnames(X) <- paste0("rs", 1:5) + R <- suppressMessages(computeLd(X, method = "sample", backend = "snprelate")) + expect_equal(dim(R), c(5L, 5L)) + expect_equal(unname(diag(R)), rep(1, 5)) + expect_true(all(is.finite(R))) + expect_equal(colnames(R), colnames(X)) +}) + +test_that("computeLd snpstats backend returns a valid correlation matrix", { + skip_if_not_installed("snpStats") + set.seed(1) + X <- matrix(rbinom(100 * 5, 2, 0.3), 100, 5) + colnames(X) <- paste0("rs", 1:5) + R <- computeLd(X, method = "sample", backend = "snpstats") + expect_equal(dim(R), c(5L, 5L)) + expect_equal(unname(diag(R)), rep(1, 5)) + expect_true(all(is.finite(R))) +}) + +test_that("computeLd errors when a non-internal backend is paired with non-sample method", { + set.seed(1) + X <- matrix(rbinom(100 * 3, 2, 0.3), 100, 3) + colnames(X) <- paste0("rs", 1:3) + expect_error( + computeLd(X, method = "population", backend = "snprelate"), + "only supported with method='sample'" + ) + expect_error( + computeLd(X, method = "gcta", backend = "snpstats"), + "only supported with method='sample'" + ) +}) + +# The Rfast-absent fallback branches (computeLd's `R <- cor(X_imp)` and +# ldPruneByCorrelation's `cor.X <- cor(X)`) are reachable by mocking the *base* +# `requireNamespace` (so it reports Rfast missing) for the duration of the call. +test_that("ldPruneByCorrelation and computeLd fall back to base cor() when Rfast is absent", { + with_mocked_bindings( + { + set.seed(1) + X <- matrix(rnorm(50 * 4), 50, 4) + colnames(X) <- paste0("s", 1:4) + pruned <- ldPruneByCorrelation(X, corThres = 0.9) # hits cor(X) at ld.R:1207 + R <- computeLd(X, method = "sample", backend = "internal") # hits cor(X_imp) at ld.R:1816 + expect_true(is.list(pruned)) + expect_true(all(c("X.new", "filter.id") %in% names(pruned))) + expect_equal(dim(R), c(4L, 4L)) + expect_equal(unname(diag(R)), rep(1, 4), tolerance = 1e-8) + expect_true(isSymmetric(unname(R))) + # base cor() fallback must agree with the direct base computation. + expect_equal(unname(R), unname(cor(X)), tolerance = 1e-10) + }, + requireNamespace = function(package, ...) + if (identical(package, "Rfast")) FALSE + else base::requireNamespace(package, ...), + .package = "base") +}) + # ============================================================================= # detectVariantConvention — uncovered line 586 # ============================================================================= diff --git a/tests/testthat/test_ldData.R b/tests/testthat/test_ldData.R index 3ec2ebdb..4f5fa508 100644 --- a/tests/testthat/test_ldData.R +++ b/tests/testthat/test_ldData.R @@ -492,3 +492,33 @@ test_that("LdData: mixtureWeights must be non-negative and sum to 1", { } +# =========================================================================== +# getGenotypeHandle / getMixtureWeights / getSnpIdx accessors +# =========================================================================== + +test_that("LdData: getGenotypeHandle / getMixtureWeights / getSnpIdx return their slots", { + gh1 <- .ld_makeHandle(path = "/tmp/h1.gds") + gh2 <- .ld_makeHandle(path = "/tmp/h2.gds") + ld <- LdData(correlation = NULL, + genotypeHandle = list(gh1, gh2), + snpIdx = 1:4, + variants = .ld_makeVariants(), + blockMetadata = S4Vectors::DataFrame(x = 1), + mixtureWeights = c(0.3, 0.7)) + expect_identical(getGenotypeHandle(ld), list(gh1, gh2)) + expect_equal(getMixtureWeights(ld), c(0.3, 0.7)) + expect_equal(getSnpIdx(ld), 1:4) +}) + +test_that("getCorrelation: errors when neither correlation nor genotypeHandle is set", { + # LdData validity forbids this state at construction, so build a valid + # object and drop the correlation slot post-hoc to reach the defensive + # runtime stop(). + ld <- LdData(correlation = diag(4), variants = .ld_makeVariants(), + blockMetadata = S4Vectors::DataFrame(x = 1)) + ld@correlation <- NULL + expect_error(getCorrelation(ld), + "No correlation matrix or genotype handle available") +}) + + diff --git a/tests/testthat/test_mashPipeline.R b/tests/testthat/test_mashPipeline.R index 903fbb0d..acfbe019 100644 --- a/tests/testthat/test_mashPipeline.R +++ b/tests/testthat/test_mashPipeline.R @@ -284,3 +284,187 @@ test_that("mashPipeline runs end-to-end on qtl_sumstats_multicontext_example", { expect_type(res$w, "double") expect_equal(sum(res$w), 1, tolerance = 1e-6) }) + +# --------------------------------------------------------------------------- +# fitMashContrast — condition grouping (>2 conditions, grouped replicates) +# --------------------------------------------------------------------------- + +test_that("fitMashContrast applies deviation + pairwise group adjustments", { + conds <- c("a", "b", "c", "d") + origMean <- matrix(c(0.5, 0.3, -0.2, 0.4), nrow = 1, + dimnames = list("v1", conds)) + pm <- matrix(c(0.5, 0.3, -0.2, 0.4), nrow = 1, + dimnames = list("v1", conds)) + pv <- array(0, dim = c(4, 4, 1), dimnames = list(conds, conds, NULL)) + pv[, , 1] <- diag(4) * 0.1 + # a,b share group 1 (replicates); c is its own group 2; d ungrouped (0). + # Non-NULL grouping triggers `grouping <- grouping[tested]`, the >2-condition + # deviation re-weighting loop, and the pairwise group-adjustment loop. + grouping <- setNames(c(1L, 1L, 2L, 0L), conds) + out <- fitMashContrast(1L, origMean, pm, pv, grouping = grouping) + expect_s3_class(out, "data.frame") + expect_equal(nrow(out), 1L) + # 4 deviation + choose(4,2)=6 pairwise = 10 contrasts -> 30 columns. + expect_equal(ncol(out), 30L) + contrastSuffix <- sub("^(mean|se|p)_contrast_", "", names(out)) + expect_true(any(grepl("_deviation$", contrastSuffix))) + expect_true(any(grepl("_vs_", contrastSuffix))) + expect_true(all(is.finite(unlist(out)))) +}) + +# --------------------------------------------------------------------------- +# updateMashModelCov — named data-driven cov matrices (the `[samples, samples]` +# else branch, distinct from the no-dimnames positional-slice branch) +# --------------------------------------------------------------------------- + +test_that("updateMashModelCov slices named data-driven cov matrices by sample", { + allSamples <- c("brain", "blood", "muscle") + ddMat <- matrix(seq_len(9), 3, 3, dimnames = list(allSamples, allSamples)) + U <- list(identity = diag(1, 3), dataDriven = ddMat) + pi <- setNames(c(0.5, 0.5), c("identity.scale1", "dataDriven.scale1")) + m <- list(fitted_g = list(Ulist = U, pi = pi)) + m2 <- updateMashModelCov(m, allSamples = allSamples, + samples = c("brain", "muscle")) + # The named data-driven matrix is sliced by name: cov[[d]][samples, samples]. + expect_equal(dim(m2$fitted_g$Ulist$dataDriven), c(2L, 2L)) + expect_equal(m2$fitted_g$Ulist$dataDriven, + ddMat[c("brain", "muscle"), c("brain", "muscle")]) + # identity collapses to a single 1 in the top-left corner. + expect_equal(m2$fitted_g$Ulist$identity[1, 1], 1) + expect_equal(sum(m2$fitted_g$Ulist$identity), 1) +}) + +# --------------------------------------------------------------------------- +# metaAnalysisPerCell — cells matching no contrast column are skipped +# --------------------------------------------------------------------------- + +test_that("metaAnalysisPerCell skips cells whose name matches no column", { + # The condition "x$y" yields a derived cell name "x$y"; used as a grep() + # pattern the embedded `$` anchor matches nothing, exercising the + # `if (length(cellIdx) == 0) next` skip branch. + cols <- "mean_contrast_x$y_vs_z" + es <- matrix(c(0.3, 0.5), nrow = 2, dimnames = list(c("v1", "v2"), cols)) + se <- matrix(c(0.1, 0.1), nrow = 2, dimnames = list(c("v1", "v2"), cols)) + out <- metaAnalysisPerCell(es, se) + # "x$y" is skipped; only the "z" cell survives. + expect_false("x$y" %in% out$cell) + expect_true("z" %in% out$cell) + expect_equal(nrow(out), 1L) +}) + +# --------------------------------------------------------------------------- +# mashPipeline — input validation (errors fire before any mashr call). +# Guarded by skips because the requireNamespace() checks run first; without +# mashr/flashier the function stops with an install message instead. +# --------------------------------------------------------------------------- + +test_that("mashPipeline rejects a sumStatsList that is not a named list", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + expect_error(mashPipeline(list(1, 2), alpha = 0), + "must be a named list") +}) + +test_that("mashPipeline errors when a required entry is missing", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + expect_error(mashPipeline(list(strong = 1), alpha = 0), + "missing required entr") +}) + +test_that("mashPipeline errors on unrecognised entries", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + expect_error( + mashPipeline(list(strong = 1, random = 1, bogus = 1), alpha = 0), + "unrecognised entries") +}) + +test_that("mashPipeline coerces a SimpleList before validating its names", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + skip_if_not_installed("S4Vectors") + # A SimpleList is converted to a base list first (the as.list branch), then + # validation runs; here it is missing both required entries. + expect_error( + mashPipeline(S4Vectors::SimpleList(bogus = 1), alpha = 0), + "missing required entr") +}) + +# --------------------------------------------------------------------------- +# mashPipeline — priorCovariances validation + bypass path. +# Supplying residualCorrelation makes random/null optional and short-circuits +# null-correlation estimation, so the supplied Vhat branch is also exercised. +# --------------------------------------------------------------------------- + +test_that("mashPipeline rejects priorCovariances not a non-empty named list", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + data(qtl_sumstats_multicontext_example) + ss <- qtl_sumstats_multicontext_example + vhat <- diag(3) + # Empty list. + expect_error(suppressMessages(suppressWarnings( + mashPipeline(list(strong = ss), alpha = 0, + residualCorrelation = vhat, priorCovariances = list()))), + "non-empty named") + # Unnamed list. + expect_error(suppressMessages(suppressWarnings( + mashPipeline(list(strong = ss), alpha = 0, + residualCorrelation = vhat, + priorCovariances = list(diag(3))))), + "non-empty named") +}) + +test_that("mashPipeline rejects priorCovariances with wrong dimensions", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + data(qtl_sumstats_multicontext_example) + ss <- qtl_sumstats_multicontext_example + vhat <- diag(3) + expect_error(suppressMessages(suppressWarnings( + mashPipeline(list(strong = ss), alpha = 0, + residualCorrelation = vhat, + priorCovariances = list(myU = diag(2))))), + "3 x 3 matrix") +}) + +test_that("mashPipeline passes supplied residualCorrelation + priorCovariances through", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + data(qtl_sumstats_multicontext_example) + ss <- qtl_sumstats_multicontext_example + vhat <- diag(3) + U0 <- list(identity = diag(3), effectA = diag(c(1, 0, 0))) + res <- suppressMessages(suppressWarnings( + mashPipeline(list(strong = ss), alpha = 0, + residualCorrelation = vhat, priorCovariances = U0))) + expect_named(res, c("U", "w")) + # priorCovariances passed straight through as the covariance list (bypass of + # the cov_canonical / cov_pca / cov_flash / cov_ed chain). + expect_identical(res$U, U0) + expect_type(res$w, "double") + expect_equal(sum(res$w), 1, tolerance = 1e-6) +}) + +# --------------------------------------------------------------------------- +# mashPipeline — null-based Vhat estimation + default nPcs +# --------------------------------------------------------------------------- + +test_that("mashPipeline estimates Vhat from a null set and defaults nPcs", { + skip_if_not_installed("mashr") + skip_if_not_installed("flashier") + data(qtl_sumstats_multicontext_example) + ss <- qtl_sumstats_multicontext_example + # Supplying `null` triggers estimate_null_correlation_simple; leaving nPcs + # NULL exercises the `nPcs <- ncol(Bhat) - 1` default in the cov_* chain. + res <- suppressMessages(suppressWarnings( + mashPipeline(list(strong = ss, random = ss, null = ss), + alpha = 0, setSeed = 1L))) + expect_named(res, c("U", "w")) + expect_gt(length(res$U), 0L) + expect_true(all(vapply(res$U, + function(m) all(dim(m) == c(3L, 3L)), + logical(1)))) + expect_equal(sum(res$w), 1, tolerance = 1e-6) +}) diff --git a/tests/testthat/test_mashWrapper.R b/tests/testthat/test_mashWrapper.R index 8cba2573..cfa3ba47 100644 --- a/tests/testthat/test_mashWrapper.R +++ b/tests/testthat/test_mashWrapper.R @@ -15,304 +15,6 @@ context("mash_wrapper") ) } -# =========================================================================== -# mergeSusieCs -# =========================================================================== - -test_that("mergeSusieCs merges credible sets correctly", { - # Test case 1: No overlapping credible sets - susie_fit_1 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2"), - pip = c(0.8, 0.6), - CS_95_susie = c(1, 1) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant3", "variant4"), - pip = c(0.9, 0.7), - CS_95_susie = c(1, 2) - ) - ) - ) - ) - - expected_output_1 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4"), - credibleSetNames = c("cs_1_1", "cs_1_1", "cs_2_1", "cs_2_2"), - maxPip = c(0.8, 0.6, 0.9, 0.7), - medianPip = c(0.8, 0.6, 0.9, 0.7), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_1), expected_output_1) - - # Test case 2: Overlapping credible sets - susie_fit_2 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2"), - pip = c(0.8, 0.6), - CS_95_susie = c(1, 1) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant2", "variant3"), - pip = c(0.7, 0.9), - CS_95_susie = c(2, 2) - ) - ) - ) - ) - - expected_output_2 <- data.frame( - variant_id = c("variant1", "variant2", "variant3"), - credibleSetNames = c("cs_1_1,cs_2_2", "cs_1_1,cs_2_2", "cs_1_1,cs_2_2"), - maxPip = c(0.8, 0.7, 0.9), - medianPip = c(0.8, 0.65, 0.9), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_2), expected_output_2) - - # Test case 3: Empty input - susie_fit_3 <- list(condition_1 = list(top_loci = data.frame( - variant_id = character(), - credibleSetNames = character(), - maxPip = numeric(), - medianPip = numeric(), - stringsAsFactors = FALSE - ))) - - expected_output_3 <- NULL - - expect_equal(mergeSusieCs(susie_fit_3), expected_output_3) - - # Test case 4: Different coverage parameter - susie_fit_5 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2"), - pip = c(0.8, 0.6), - CS_90_susie = c(1, 1) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant3", "variant4"), - pip = c(0.9, 0.7), - CS_90_susie = c(2, 2) - ) - ) - ) - ) - - expected_output_5 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4"), - credibleSetNames = c("cs_1_1", "cs_1_1", "cs_2_2", "cs_2_2"), - maxPip = c(0.8, 0.6, 0.9, 0.7), - medianPip = c(0.8, 0.6, 0.9, 0.7), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_5, coverage = "CS_90_susie"), expected_output_5) - - # Test case 6: Multiple top_loci tables with mixed coverage indices - susie_fit_6 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2", "variant3"), - pip = c(0.8, 0.6, 0.7), - CS_95_susie = c(1, 1, 2) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant4", "variant5"), - pip = c(0.9, 0.7), - CS_95_susie = c(2, 3) - ) - ), - condition_3 = list( - top_loci = data.frame( - variant_id = c("variant6", "variant7", "variant8"), - pip = c(0.85, 0.75, 0.8), - CS_95_susie = c(1, 3, 2) - ) - ) - ) - ) - - expected_output_6 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4", "variant5", "variant6", "variant7", "variant8"), - credibleSetNames = c("cs_1_1", "cs_1_1", "cs_1_2", "cs_2_2", "cs_2_3", "cs_3_1", "cs_3_3", "cs_3_2"), - maxPip = c(0.8, 0.6, 0.7, 0.9, 0.7, 0.85, 0.75, 0.8), - medianPip = c(0.8, 0.6, 0.7, 0.9, 0.7, 0.85, 0.75, 0.8), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_6), expected_output_6) - - # Test case 7: Multiple top_loci tables with overlapping sets and mixed coverage indices - susie_fit_7 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2", "variant3"), - pip = c(0.8, 0.6, 0.7), - CS_95_susie = c(1, 1, 2) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant2", "variant3", "variant4"), - pip = c(0.7, 0.9, 0.85), - CS_95_susie = c(2, 2, 1) - ) - ), - condition_3 = list( - top_loci = data.frame( - variant_id = c("variant4", "variant5"), - pip = c(0.75, 0.8), - CS_95_susie = c(3, 2) - ) - ) - ) - ) - - expected_output_7 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4", "variant5"), - credibleSetNames = c("cs_1_1,cs_1_2,cs_2_2", "cs_1_1,cs_1_2,cs_2_2", "cs_1_1,cs_1_2,cs_2_2","cs_2_1,cs_3_3", "cs_3_2"), - maxPip = c(0.8, 0.7, 0.9, 0.85, 0.8), - medianPip = c(0.8, 0.65, 0.8, 0.8, 0.8), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_7), expected_output_7) - - # Test case 8: Multiple top_loci tables with different coverage indices and no overlapping sets - susie_fit_8 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2", "variant3"), - pip = c(0.8, 0.6, 0.7), - CS_95_susie = c(1, 2, 3) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant4", "variant5"), - pip = c(0.9, 0.7), - CS_95_susie = c(3, 1) - ) - ), - condition_3 = list( - top_loci = data.frame( - variant_id = c("variant6", "variant7", "variant8"), - pip = c(0.85, 0.75, 0.8), - CS_95_susie = c(2, 3, 1) - ) - ) - ) - ) - - expected_output_8 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4", "variant5", "variant6", "variant7", "variant8"), - credibleSetNames = c("cs_1_1", "cs_1_2", "cs_1_3", "cs_2_3", "cs_2_1", "cs_3_2", "cs_3_3", "cs_3_1"), - maxPip = c(0.8, 0.6, 0.7, 0.9, 0.7, 0.85, 0.75, 0.8), - medianPip = c(0.8, 0.6, 0.7, 0.9, 0.7, 0.85, 0.75, 0.8), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_8), expected_output_8) - - # Test case 9: Single top_loci table with mixed coverage indices - susie_fit_9 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant4", "variant5"), - pip = c(0.8, 0.6, 0.7, 0.9, 0.85), - CS_95_susie = c(1, 1, 2, 3, 2) - ) - ) - ) - ) - - expected_output_9 <- data.frame( - variant_id = c("variant1", "variant2", "variant3", "variant5", "variant4"), - credibleSetNames = c("cs_1_1", "cs_1_1", "cs_1_2", "cs_1_2", "cs_1_3"), - maxPip = c(0.8, 0.6, 0.7, 0.85, 0.9), - medianPip = c(0.8, 0.6, 0.7, 0.85, 0.9), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_9), expected_output_9) - - # Test case 10: Multiple top_loci tables with mixed coverage indices and overlapping sets - susie_fit_10 <- list( - list( - condition_1 = list( - top_loci = data.frame( - variant_id = c("variant1", "variant2", "variant3"), - pip = c(0.8, 0.6, 0.7), - CS_95_susie = c(1, 2, 1) - ) - ), - condition_2 = list( - top_loci = data.frame( - variant_id = c("variant2", "variant4", "variant5"), - pip = c(0.75, 0.9, 0.85), - CS_95_susie = c(2, 1, 3) - ) - ), - condition_3 = list( - top_loci = data.frame( - variant_id = c("variant3", "variant5", "variant6"), - pip = c(0.65, 0.8, 0.7), - CS_95_susie = c(3, 2, 1) - ) - ) - ) - ) - - expected_output_10 <- data.frame( - variant_id = c("variant1", "variant3", "variant2", "variant4", "variant5", "variant6"), - credibleSetNames = c("cs_1_1,cs_3_3", "cs_1_1,cs_3_3", "cs_1_2,cs_2_2", "cs_2_1", "cs_2_3,cs_3_2", "cs_3_1"), - maxPip = c(0.8, 0.7, 0.75, 0.9, 0.85, 0.7), - medianPip = c(0.8, 0.675, 0.675, 0.9, 0.825, 0.7), - stringsAsFactors = FALSE - ) - - expect_equal(mergeSusieCs(susie_fit_10), expected_output_10) -}) - -test_that("mergeSusieCs handles single condition with single CS", { - susie_fit <- list(list( - cond1 = list( - top_loci = data.frame( - variant_id = c("1:100:A:G", "1:200:C:T"), - pip = c(0.9, 0.1), - CS_95_susie = c(1, 1), - stringsAsFactors = FALSE - ) - ) - )) - - result <- pecotmr:::mergeSusieCs(susie_fit) - expect_s3_class(result, "data.frame") - expect_true("variant_id" %in% colnames(result)) - expect_true("maxPip" %in% colnames(result)) -}) - # =========================================================================== # filterInvalidSummaryStat # =========================================================================== @@ -1145,3 +847,106 @@ test_that(".mashSumStatsToMatrices errors when entry lacks SNP mcol", { pecotmr:::.mashSumStatsToMatrices(ss, "strong", inputScale = "auto"), "SNP") }) + +# =========================================================================== +# .mashSumStatsToMatrices — GwasSumStats path + input-validation errors +# =========================================================================== + +test_that(".mashSumStatsToMatrices on GwasSumStats: studies become columns", { + set.seed(11L) + gh <- new("GenotypeHandle", + path = "/tmp/sketch.gds", format = "gds", + snpInfo = data.frame(SNP = paste0("v", 1:3), CHR = "1", + BP = c(100L, 200L, 300L), + A1 = "A", A2 = "G", stringsAsFactors = FALSE), + nSamples = 50L, sampleIds = paste0("s", seq_len(50L)), pgenPtr = NULL) + mkGr <- function(snpIds) { + gr <- GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges( + start = seq(100L, by = 100L, length.out = length(snpIds)), width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = snpIds, A1 = "A", A2 = "G", + Z = rnorm(length(snpIds)), + BETA = rnorm(length(snpIds), sd = 0.1), + SE = rep(0.05, length(snpIds))) + gr + } + # Each study is its own (study) block; columns of the mash matrix are the + # studies, so the result is block-diagonal with NA-fill off the diagonal. + ss <- GwasSumStats( + study = c("studyA", "studyB"), + entry = list(mkGr(paste0("v", 1:3)), mkGr(paste0("v", 1:3))), + genome = "hg19", ldSketch = gh, + qcInfo = list(prebuilt = "synthetic")) + out <- pecotmr:::.mashSumStatsToMatrices(ss, "strong", inputScale = "auto") + expect_equal(ncol(out$b), 2L) + expect_equal(colnames(out$b), c("studyA", "studyB")) + # 2 studies x 3 variants = 6 rows; rownames prefixed by the study block key. + expect_equal(nrow(out$b), 6L) + expect_setequal(rownames(out$b), + c(paste0("studyA::v", 1:3), paste0("studyB::v", 1:3))) + # studyA's rows are absent from studyB's column -> bhat 0 / shat 1000 fill. + studyArows <- grep("^studyA::", rownames(out$b)) + expect_equal(unname(out$b[studyArows, "studyB"]), rep(0, 3)) + expect_equal(unname(out$s[studyArows, "studyB"]), rep(1000, 3)) + # On the BETA scale, the present cells carry the small generated SEs. + expect_true(all(out$s[studyArows, "studyA"] < 1)) +}) + +test_that(".mashSumStatsToMatrices errors on a non-SumStats input", { + expect_error( + pecotmr:::.mashSumStatsToMatrices(list(a = 1), "strong"), + "must be a QtlSumStats or GwasSumStats") +}) + +test_that(".mashSumStatsToMatrices errors when SumStats has empty QC info", { + gh <- new("GenotypeHandle", + path = "/tmp/sketch.gds", format = "gds", + snpInfo = data.frame(SNP = paste0("v", 1:3), CHR = "1", + BP = c(100L, 200L, 300L), + A1 = "A", A2 = "G", stringsAsFactors = FALSE), + nSamples = 50L, sampleIds = paste0("s", seq_len(50L)), pgenPtr = NULL) + gr <- GenomicRanges::GRanges( + seqnames = "chr1", + ranges = IRanges::IRanges(start = c(100L, 200L, 300L), width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("v", 1:3), A1 = "A", A2 = "G", + Z = rnorm(3), BETA = rnorm(3, sd = 0.1), SE = rep(0.05, 3)) + ss <- QtlSumStats(study = "s1", context = "c1", trait = "g1", + entry = list(gr), genome = "hg19", ldSketch = gh, + qcInfo = list()) # empty QC info + expect_error( + pecotmr:::.mashSumStatsToMatrices(ss, "strong"), + "no QC info") +}) + +test_that(".mashSumStatsToMatrices errors when SumStats has zero entries", { + gh <- new("GenotypeHandle", + path = "/tmp/sketch.gds", format = "gds", + snpInfo = data.frame(SNP = paste0("v", 1:3), CHR = "1", + BP = c(100L, 200L, 300L), + A1 = "A", A2 = "G", stringsAsFactors = FALSE), + nSamples = 50L, sampleIds = paste0("s", seq_len(50L)), pgenPtr = NULL) + ss <- QtlSumStats(study = character(0), context = character(0), + trait = character(0), entry = list(), genome = "hg19", + ldSketch = gh, varY = numeric(0), + qcInfo = list(prebuilt = "synthetic")) + expect_error( + pecotmr:::.mashSumStatsToMatrices(ss, "strong"), + "no entries") +}) + +# =========================================================================== +# mergeMashData — oneData-empty branch (returns resData unchanged) +# =========================================================================== + +test_that("mergeMashData returns resData when oneData is NULL", { + d1 <- list(random = data.frame(a = 1:3, b = 4:6)) + expect_equal(mergeMashData(d1, NULL), d1) +}) + +test_that("mergeMashData returns resData when oneData is an empty list", { + d1 <- list(random = data.frame(a = 1:3)) + expect_equal(mergeMashData(d1, list()), d1) +}) diff --git a/tests/testthat/test_pvalCombine.R b/tests/testthat/test_pvalCombine.R index a7bc789e..701e2133 100644 --- a/tests/testthat/test_pvalCombine.R +++ b/tests/testthat/test_pvalCombine.R @@ -219,6 +219,85 @@ test_that("combinePValues: errors when named R is missing entries", { ) }) +# =========================================================================== +# Dispatch branches of .combinePvalSingle, pvalAcat non-finite guard, and +# .combinePvalAlignR matrix guards +# =========================================================================== + +test_that("pvalAcat returns NA when the Cauchy statistic is non-finite", { + # naRm = FALSE keeps the NaN, which propagates to a non-finite mean and + # trips the `!is.finite(stat)` guard. + expect_true(is.na(pecotmr:::pvalAcat(c(0.1, 0.2, NaN), naRm = FALSE))) +}) + +test_that(".combinePvalAlignR rejects a non-matrix R", { + expect_error(pecotmr:::.combinePvalAlignR(5, c("a", "b")), + "must be a matrix") +}) + +test_that(".combinePvalAlignR rejects R whose rownames and colnames differ", { + m <- matrix(1, 2, 2) + rownames(m) <- c("a", "b") + colnames(m) <- c("x", "y") + expect_error(pecotmr:::.combinePvalAlignR(m, c("a", "b")), + "rownames and colnames must be identical") +}) + +test_that(".combinePvalSingle errors on an unknown method", { + # Unreachable through combinePValues (the known-method guard fires first), + # so the default switch arm is exercised directly here. + expect_error( + pecotmr:::.combinePvalSingle("bogus", pvals = c(0.1, 0.2), + zScores = NULL, R = NULL), + "Unknown combination method" + ) +}) + +test_that("combinePValues dispatches HMP through the unified menu", { + skip_if_not_installed("harmonicmeanp") + res <- combinePValues(pvals = c(0.01, 0.05, 0.1), methods = "hmp") + expect_equal(res$results$hmp$method, "hmp") + expect_true(is.finite(res$results$hmp$pval)) + expect_gt(res$results$hmp$pval, 0) + expect_lte(res$results$hmp$pval, 1) +}) + +test_that("combinePValues dispatches poolr methods (fisher/stouffer/invchisq)", { + skip_if_not_installed("poolr") + res <- combinePValues(pvals = c(0.01, 0.05, 0.1), + methods = c("fisher", "stouffer", "invchisq"), + R = diag(3)) + for (m in c("fisher", "stouffer", "invchisq")) { + expect_true(is.finite(res$results[[m]]$pval)) + expect_gt(res$results[[m]]$pval, 0) + expect_lt(res$results[[m]]$pval, 1) + } +}) + +test_that("combinePValues dispatches the GBJ-family methods", { + skip_if_not_installed("GBJ") + z <- c(2.5, 1.8, 3.0) + methods <- c("gbj", "bj", "hc", "ghc", "minp", "gbj_omni") + res <- combinePValues(zScores = z, methods = methods, R = diag(3)) + for (m in methods) { + expect_true(is.numeric(res$results[[m]]$pval)) + expect_gte(res$results[[m]]$pval, 0) + expect_lte(res$results[[m]]$pval, 1) + } +}) + +test_that("combinePValues dispatches aSPU and GATES", { + skip_if_not_installed("aSPU") + set.seed(42) + res <- combinePValues(zScores = c(2.5, 1.8, 3.0), + pvals = c(0.01, 0.05, 0.1), + methods = c("aspu", "gates"), R = diag(3)) + expect_gte(res$results$aspu$pval, 0) + expect_lte(res$results$aspu$pval, 1) + expect_gte(res$results$gates$pval, 0) + expect_lte(res$results$gates$pval, 1) +}) + # =========================================================================== # Tests migrated from test_misc.R (p-value combiners + waldTestPval) # =========================================================================== diff --git a/tests/testthat/test_qtlEnrichmentPipeline.R b/tests/testthat/test_qtlEnrichmentPipeline.R index 94885a15..6ac54fca 100644 --- a/tests/testthat/test_qtlEnrichmentPipeline.R +++ b/tests/testthat/test_qtlEnrichmentPipeline.R @@ -414,3 +414,225 @@ test_that("qtlEnrichment: tracks unmatched QTL variants in the output", { }) +# =========================================================================== +# qtlEnrichment(): verbose messages + alignNames = FALSE branch +# =========================================================================== + +test_that("qtlEnrichment: verbose=TRUE emits 'Estimated piGwas'/'Estimated piQtl' messages", { + # numGwas = NULL and piQtl = NULL both trigger data-estimation paths; + # with verbose = TRUE each emits a message (lines 391 and 407). + fx <- .qep_makeRealKernelInputs(nSnps = 30, causalIdx = c(5, 15)) + msgs <- testthat::capture_messages( + suppressWarnings(qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + impN = 5, numThreads = 1, verbose = TRUE))) + allMsgs <- paste(msgs, collapse = "\n") + expect_match(allMsgs, "Estimated piGwas") + expect_match(allMsgs, "Estimated piQtl") +}) + +test_that("qtlEnrichment: alignNames=FALSE recomputes only the unmatched set", { + # Exercises the cheap set-membership branch (lines 438-444) used by + # qtlEnrichmentPipeline after it has already aligned QTL names against + # the union GWAS panel. Two injected names are absent from gwasPip, so + # the unmatched-variant accumulation (lines 441-442) runs. + fx <- .qep_makeRealKernelInputs(nSnps = 30, causalIdx = c(5, 15)) + newNames <- names(fx$susieQtlRegions$fit1$pip) + newNames[1:2] <- c("1:9999:A:G", "1:9998:A:G") + names(fx$susieQtlRegions$fit1$pip) <- newNames + colnames(fx$susieQtlRegions$fit1$alpha) <- newNames + res <- qtlEnrichment( + gwasPip = fx$gwasPip, susieQtlRegions = fx$susieQtlRegions, + numGwas = 3000, piQtl = 0.5, impN = 5, numThreads = 1, + verbose = FALSE, alignNames = FALSE) + expect_true("unused_xqtl_variants" %in% names(res)) + expect_true(any(c("1:9999:A:G", "1:9998:A:G") %in% + unlist(res$unused_xqtl_variants))) +}) + + +# =========================================================================== +# .enrFlattenEnrichment(): shape coercion + NA fallbacks +# =========================================================================== + +test_that(".enrFlattenEnrichment: numeric scalar falls back to all-NA", { + out <- pecotmr:::.enrFlattenEnrichment(1.5) + expect_equal(out, list(enrichment = NA_real_, enrichmentSe = NA_real_, + enrichmentLogOdds = NA_real_)) +}) + +test_that(".enrFlattenEnrichment: plain list picks named scalar fields", { + out <- pecotmr:::.enrFlattenEnrichment( + list(enrichment = 2.0, enrichmentSe = 0.1, enrichmentLogOdds = log(2))) + expect_equal(out$enrichment, 2.0) + expect_equal(out$enrichmentSe, 0.1) + expect_equal(out$enrichmentLogOdds, log(2)) +}) + +test_that(".enrFlattenEnrichment: non-empty matrix picks columns by name", { + m <- matrix(c(2.0, 0.1, log(2)), nrow = 1) + colnames(m) <- c("enrichment", "enrichmentSe", "enrichmentLogOdds") + out <- pecotmr:::.enrFlattenEnrichment(m) + expect_equal(out$enrichment, 2.0) + expect_equal(out$enrichmentSe, 0.1) + expect_equal(out$enrichmentLogOdds, log(2)) +}) + +test_that(".enrFlattenEnrichment: empty data.frame falls back to all-NA", { + df <- data.frame(enrichment = numeric(0), enrichmentSe = numeric(0), + enrichmentLogOdds = numeric(0)) + out <- pecotmr:::.enrFlattenEnrichment(df) + expect_equal(out, list(enrichment = NA_real_, enrichmentSe = NA_real_, + enrichmentLogOdds = NA_real_)) +}) + +test_that(".enrFlattenEnrichment: non-empty data.frame resolves alternate column names", { + df <- data.frame(Enrichment = 3.0, se = 0.2, log_odds = 1.1) + out <- pecotmr:::.enrFlattenEnrichment(df) + expect_equal(out$enrichment, 3.0) + expect_equal(out$enrichmentSe, 0.2) + expect_equal(out$enrichmentLogOdds, 1.1) +}) + + +# =========================================================================== +# .enrPickColumn(): candidate column resolution +# =========================================================================== + +test_that(".enrPickColumn: returns the first matching column's value", { + df <- data.frame(se = 0.5, enrichment = 2.5) + expect_equal(pecotmr:::.enrPickColumn(df, c("enrichment", "Enrichment")), 2.5) +}) + +test_that(".enrPickColumn: returns NA_real_ when no candidate matches", { + df <- data.frame(foo = 1, bar = 2) + expect_identical( + pecotmr:::.enrPickColumn(df, c("enrichment", "Enrichment")), NA_real_) +}) + + +# =========================================================================== +# .enrBuildGwasPipVector(): empty-index + length-mismatch skip branches +# =========================================================================== + +test_that(".enrBuildGwasPipVector: unknown study yields numeric(0)", { + gfmr <- .qep_makeGwasFmr() + expect_identical(pecotmr:::.enrBuildGwasPipVector(gfmr, "GHOST"), numeric(0)) +}) + +test_that(".enrBuildGwasPipVector: skips a block whose pip length disagrees with variantIds", { + # Unnamed pip of length 2 against 3 variant ids -> ids come from + # getVariantIds and length(ids) != length(pip) -> the block is skipped, + # leaving no pieces -> numeric(0). + badEntry <- FineMappingEntry( + variantIds = c("v1", "v2", "v3"), + susieFit = list(pip = c(0.1, 0.2)), + topLoci = data.frame(variant_id = c("v1", "v2", "v3"), + pip = c(0.5, 0.3, 0.2), + stringsAsFactors = FALSE)) + g <- GwasFineMappingResult( + study = "G1", method = "susie", entry = list(badEntry), + ldSketch = .qep_makeHandle()) + expect_identical(pecotmr:::.enrBuildGwasPipVector(g, "G1"), numeric(0)) +}) + + +# =========================================================================== +# .enrBuildQtlRegionsList(): incomplete-fit / no-prior / unnamed-pip branches +# =========================================================================== + +test_that(".enrBuildQtlRegionsList: skips an entry whose fit lacks alpha/pip", { + badEntry <- FineMappingEntry( + variantIds = "v1", susieFit = list(), + topLoci = data.frame(variant_id = "v1", pip = 0.1, + stringsAsFactors = FALSE)) + qfmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(badEntry), ldSketch = NULL) + expect_equal(length(pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q1", "c1")), 0L) +}) + +test_that(".enrBuildQtlRegionsList: skips a fit with no V and no prior_variance", { + noVfit <- list( + alpha = matrix(c(0.6, 0.4), nrow = 1), + pip = setNames(c(0.6, 0.4), c("v1", "v2"))) + entry <- FineMappingEntry( + variantIds = c("v1", "v2"), susieFit = noVfit, + topLoci = data.frame(variant_id = c("v1", "v2"), pip = c(0.6, 0.4), + stringsAsFactors = FALSE)) + qfmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(entry), ldSketch = NULL) + expect_equal(length(pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q1", "c1")), 0L) +}) + +test_that(".enrBuildQtlRegionsList: names an unnamed pip from the entry's variant ids", { + unnamedPipFit <- list( + alpha = matrix(c(0.6, 0.4), nrow = 1), + pip = c(0.6, 0.4), # unnamed -> names assigned from getVariantIds + V = 0.1) + entry <- FineMappingEntry( + variantIds = c("v1", "v2"), susieFit = unnamedPipFit, + topLoci = data.frame(variant_id = c("v1", "v2"), pip = c(0.6, 0.4), + stringsAsFactors = FALSE)) + qfmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(entry), ldSketch = NULL) + out <- pecotmr:::.enrBuildQtlRegionsList(qfmr, "Q1", "c1") + expect_equal(length(out), 1L) + expect_equal(names(out[[1L]]$pip), c("v1", "v2")) + expect_equal(out[[1L]]$prior_variance, 0.1) +}) + + +# =========================================================================== +# qtlEnrichmentPipeline(): no-triples error, alignTuple cache reuse, +# and the per-tuple "no usable QTL regions" skip +# =========================================================================== + +test_that("qtlEnrichmentPipeline: empty QTL collection errors with the no-triples message", { + gfmr <- .qep_makeGwasFmr() + qfmrEmpty <- QtlFineMappingResult( + study = character(0), context = character(0), trait = character(0), + method = character(0), entry = list(), ldSketch = NULL) + expect_error( + qtlEnrichmentPipeline(gwasFineMappingResult = gfmr, + qtlFineMappingResult = qfmrEmpty), + "triples to compute") +}) + +test_that("qtlEnrichmentPipeline: alignTuple cache is reused across GWAS studies", { + # Two GWAS studies -> each QTL tuple's alignment is computed for the + # first study (cache miss) and served from cache for the second + # (alignTuple early-return branch, line 119). + e1 <- .qep_makeFmEntry(variant_ids = paste0("v", 1:3), pip = c(0.5, 0.2, 0.1)) + e2 <- .qep_makeFmEntry(variant_ids = paste0("v", 1:3), pip = c(0.4, 0.3, 0.2)) + gfmr <- GwasFineMappingResult( + study = c("G1", "G2"), method = c("susie", "susie"), + entry = list(e1, e2), ldSketch = .qep_makeHandle()) + qfmr <- .qep_makeQtlFmr() + local_mocked_bindings(qtlEnrichment = .qep_mockEnrichment(2.0), + .package = "pecotmr") + out <- qtlEnrichmentPipeline(gwasFineMappingResult = gfmr, + qtlFineMappingResult = qfmr) + expect_equal(nrow(out), 2L) # 2 GWAS studies * 1 QTL tuple + expect_setequal(out$gwasStudy, c("G1", "G2")) +}) + +test_that("qtlEnrichmentPipeline: a tuple with no usable QTL regions warns and is skipped", { + gfmr <- .qep_makeGwasFmr() + emptyQtlEntry <- FineMappingEntry( + variantIds = "v1", susieFit = list(), + topLoci = data.frame(variant_id = "v1", pip = 0.1, + stringsAsFactors = FALSE)) + qfmr <- QtlFineMappingResult( + study = "Q1", context = "c1", trait = "t1", method = "susie", + entry = list(emptyQtlEntry), ldSketch = .qep_makeHandle()) + expect_warning( + out <- qtlEnrichmentPipeline(gwasFineMappingResult = gfmr, + qtlFineMappingResult = qfmr), + "no usable QTL regions") + expect_equal(nrow(out), 0L) +}) + + diff --git a/tests/testthat/test_qtlSumStats.R b/tests/testthat/test_qtlSumStats.R index 844c8da9..d053f860 100644 --- a/tests/testthat/test_qtlSumStats.R +++ b/tests/testthat/test_qtlSumStats.R @@ -309,3 +309,16 @@ test_that("show prints entry count and genome build", { expect_output(show(obj), "QtlSumStats: 1 entries, genome build hg19") }) + +# =========================================================================== +# Empty-collection + duplicate-tuple selector error branches +# =========================================================================== + +test_that("getSumStats errors on an empty QtlSumStats collection", { + empty <- QtlSumStats( + study = character(0), context = character(0), trait = character(0), + entry = list(), genome = "hg19", + ldSketch = .qtlMakeGenotypeHandle(), varY = numeric(0)) + expect_equal(nrow(empty), 0L) + expect_error(getSumStats(empty), "has no rows") +}) diff --git a/tests/testthat/test_regularizedRegressionWrappers.R b/tests/testthat/test_regularizedRegressionWrappers.R index 2aed3b06..aa6e0b21 100644 --- a/tests/testthat/test_regularizedRegressionWrappers.R +++ b/tests/testthat/test_regularizedRegressionWrappers.R @@ -4,10 +4,10 @@ context("SS-TWAS: weights, pipeline, and omnibus combination") # `TwasWeights(weights = list(...), variantIds = ..., standardized = ...)`. # The new `TwasWeights` is a DFrame collection class with (study, # context, trait, method, entry) columns where each entry is a -# `TwasWeightsEntry` S4 object carrying weights / fits / cvPerformance. +# `TwasWeightsEntry` S4 object carrying weights / fits / cvResult. # Class-shape tests for the new collection should live alongside the # pipeline tests and assert via accessors (`getWeights`, `getStudy`, -# `getCvPerformance`, etc.) — not against legacy slot shapes. +# `getCvResult`, etc.) — not against legacy slot shapes. # # `twasAnalysis()` was collapsed into the unified `twasZ()` dispatcher # (task #37); its tests are removed here. @@ -23,46 +23,8 @@ context("SS-TWAS: weights, pipeline, and omnibus combination") # SuSiE-RSS weight extraction # ============================================================================= -test_that(".susie_rss_extract_weights returns correct-length vector", { - skip_if_not_installed("susieR") - set.seed(42) - p <- 20 - n <- 500 - R <- diag(p) - z <- rnorm(p) - w <- pecotmr:::.susieRssExtractWeights( - fit = NULL, z = z, R = R, n = n, - requiredFields = c("alpha", "mu", "X_column_scale_factors"), - fitArgs = list(L = 5) - ) - expect_equal(length(w), p) - expect_true(all(is.finite(w))) -}) -test_that("susieRssWeights follows (stat, LD) convention", { - skip_if_not_installed("susieR") - set.seed(42) - p <- 20 - n <- 500 - R <- diag(p) - z <- rnorm(p) - stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) - w <- susieRssWeights(stat, R, methodArgs = list(L = 5)) - expect_equal(length(w), p) - expect_true(all(is.finite(w))) -}) -test_that("susieRssWeights retains fit when retainFit = TRUE", { - skip_if_not_installed("susieR") - set.seed(42) - p <- 20 - n <- 500 - R <- diag(p) - z <- rnorm(p) - stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) - w <- susieRssWeights(stat, R, retainFit = TRUE, methodArgs = list(L = 5)) - expect_false(is.null(attr(w, "fit"))) -}) test_that("mrmashWeights fitDetail: slim default omits the full fit, full keeps it", { skip_if_not_installed("mr.mashr") @@ -88,18 +50,6 @@ test_that("mrmashWeights fitDetail: slim default omits the full fit, full keeps expect_identical(fitFull$w0, fakeFit$w0) # slim fields still present }) -test_that("susieInfRssWeights works", { - skip_if_not_installed("susieR") - set.seed(42) - p <- 20 - n <- 500 - R <- diag(p) - z <- rnorm(p) - stat <- list(b = z / sqrt(n), cor = z / sqrt(n), z = z, n = rep(n, p)) - w <- susieInfRssWeights(stat, R, methodArgs = list(L = 5)) - expect_equal(length(w), p) - expect_true(all(is.finite(w))) -}) # ============================================================================= # Two-stage SuSiE-RSS fitting @@ -410,3 +360,332 @@ test_that("computeCoefficientsUnivGlmnet handles NA in Y", { # The seed-check message on line 107-108 would require removing .Random.seed # from the global environment, which is not safe to do in tests. +# ============================================================================= +# Real-fit coverage for the solver wrappers in R/regularizedRegressionWrappers.R +# ----------------------------------------------------------------------------- +# The fine-mapping / TWAS pipelines MOCK these wrappers, so their bodies are +# otherwise untested. Here we drive each wrapper on a SMALL real fixture and +# assert the return shape (weight length == #variants; matrix for multivariate; +# attr(.,"fit") when retainFit = TRUE). Bayesian/MCMC iterations are kept tiny. +# fsusieWeights and the mock-based mrmash/mvsusie payload tests live in +# test_rrMrmashMvsusie.R and are not duplicated here. +# ============================================================================= + +# (Shared .rrwXy / .rrwStatLd / .rrwMulti fixtures live in helper-rrwFixtures.R +# so test_fineMappingWrappers.R can reuse them for the SuSiE weight extractors.) + +# -------------------------------- individual -------------------------------- + +test_that("lassoWeights / enetWeights (glmnet) return length-p weights", { + skip_if_not_installed("glmnet") + f <- .rrwXy() + expect_length(as.numeric(lassoWeights(f$X, f$y)), f$p) + expect_length(as.numeric(enetWeights(f$X, f$y)), f$p) +}) + +test_that("scadWeights / mcpWeights (ncvreg) return length-p weights", { + skip_if_not_installed("ncvreg") + f <- .rrwXy() + expect_length(as.numeric(scadWeights(f$X, f$y)), f$p) + expect_length(as.numeric(mcpWeights(f$X, f$y)), f$p) +}) + +test_that("l0learnWeights returns length-p weights", { + skip_if_not_installed("L0Learn") + f <- .rrwXy() + expect_length(as.numeric(l0learnWeights(f$X, f$y)), f$p) +}) + +test_that("mrashWeights returns length-p weights and can retain the fit", { + skip_if_not_installed("susieR") + skip_if_not_installed("glmnet") + f <- .rrwXy() + w <- mrashWeights(f$X, f$y, retainFit = TRUE) + expect_length(w, f$p) + expect_false(is.null(attr(w, "fit"))) +}) + +test_that("qgg Bayes-alphabet weights (N/L/A/C/R) return length-p weights", { + skip_if_not_installed("qgg") + f <- .rrwXy() + mc <- list(nit = 200, nburn = 20, nthin = 1) + expect_length(do.call(bayesNWeights, c(list(f$X, f$y), mc)), f$p) + expect_length(do.call(bayesLWeights, c(list(f$X, f$y), mc)), f$p) + expect_length(do.call(bayesAWeights, c(list(f$X, f$y), mc)), f$p) + expect_length(do.call(bayesCWeights, c(list(f$X, f$y), mc)), f$p) + expect_length(do.call(bayesRWeights, c(list(f$X, f$y), mc)), f$p) +}) + +test_that("bayesAlphabetWeights validates matching row counts before fitting", { + skip_if_not_installed("qgg") + f <- .rrwXy() + expect_error(bayesAlphabetWeights(f$X, f$y[-1], method = "bayesN"), + "same number of rows") + expect_error( + bayesAlphabetWeights(f$X, f$y, method = "bayesN", Z = matrix(1, f$n - 1, 1)), + "same number of rows") +}) + +test_that("bayesBWeights / bLassoWeights (BGLR) return length-p weights", { + skip_if_not_installed("BGLR") + f <- .rrwXy() + expect_length(bayesBWeights(f$X, f$y, nIter = 200, burnIn = 20, thin = 1), f$p) + expect_length(bLassoWeights(f$X, f$y, nIter = 200, burnIn = 20, thin = 1), f$p) +}) + +test_that("dprVbWeights returns length-p weights and retains the fit", { + skip_if_not_installed("RcppDPR") + f <- .rrwXy() + w <- dprVbWeights(f$X, f$y, retainFit = TRUE) + expect_length(w, f$p) + expect_false(is.null(attr(w, "fit"))) +}) + +test_that("dprGibbsWeights returns length-p weights", { + skip_if_not_installed("RcppDPR") + f <- .rrwXy() + invisible(capture.output(w <- dprGibbsWeights(f$X, f$y, sStep = 200))) + expect_length(w, f$p) +}) + +test_that("dprAdaptiveGibbsWeights returns length-p weights", { + skip_if_not_installed("RcppDPR") + f <- .rrwXy() + invisible(capture.output(w <- dprAdaptiveGibbsWeights(f$X, f$y, s_step = 100))) + expect_length(w, f$p) +}) + +test_that("mrmashWeights fits from (X, Y) and returns p x K weights", { + skip_if_not_installed("mr.mashr") + skip_if_not_installed("glmnet") + set.seed(3) + m <- .rrwMulti(n = 60, p = 6, K = 3) + w <- suppressMessages(mrmashWeights(X = m$X, Y = m$Y, canonicalPriorMatrices = TRUE)) + expect_equal(dim(w), c(m$p, m$K)) + expect_true(all(is.finite(w))) +}) + + +# ----------------------------- RSS solvers (C++) ---------------------------- + +test_that("lassosumRss returns a p x nlambda beta matrix", { + f <- .rrwStatLd() + out <- lassosumRss(f$stat$b, list(blk1 = f$LD), f$n) + expect_equal(nrow(out$beta), f$p) + expect_equal(ncol(out$beta), length(out$lambda)) + expect_length(out$conv, length(out$lambda)) +}) + +test_that("penalizedRss traces a solution path for MCP / SCAD / L0", { + f <- .rrwStatLd() + for (pen in c("MCP", "SCAD")) { + out <- penalizedRss(f$stat$b, list(blk1 = f$LD), f$n, penalty = pen) + expect_equal(nrow(out$beta), f$p) + } + outL0 <- penalizedRss(f$stat$b, list(blk1 = f$LD), f$n, + penalty = "L0", lambda0 = 0.01, lambda = c(0)) + expect_equal(nrow(outL0$beta), f$p) +}) + +test_that("prsCs returns posterior betaEst of length p", { + f <- .rrwStatLd() + out <- prsCs(f$stat$b, list(blk1 = f$LD), f$n, nIter = 100, nBurnin = 20, thin = 1) + expect_length(out$betaEst, f$p) + expect_true(all(is.finite(out$betaEst))) +}) + +test_that("sdpr returns betaEst of length p", { + f <- .rrwStatLd() + out <- sdpr(f$stat$b, list(blk1 = f$LD), f$n, + iter = 100, burn = 20, thin = 1, verbose = FALSE) + expect_length(out$betaEst, f$p) +}) + +test_that("RSS solvers validate their LD-list / sample-size / length inputs", { + f <- .rrwStatLd() + expect_error(prsCs(f$stat$b, f$LD, f$n), "list of LD blocks") + expect_error(prsCs(f$stat$b, list(blk1 = f$LD), -1), "sample size") + expect_error(prsCs(f$stat$b[-1], list(blk1 = f$LD), f$n), "same as the sum") + expect_error(sdpr(f$stat$b[-1], list(blk1 = f$LD), f$n), "same as the length") + expect_error(sdpr(f$stat$b, list(blk1 = f$LD), f$n, M = 2), "at least 4") + expect_error(lassosumRss(f$stat$b, f$LD, f$n), "list of LD blocks") + expect_error(penalizedRss(f$stat$b, f$LD, f$n), "list of LD blocks") +}) + +# --------------------------- RSS weight wrappers ---------------------------- + +test_that("lassosumRssWeights returns length-p weights and records the selection", { + f <- .rrwStatLd() + w <- lassosumRssWeights(f$stat, f$LD) + expect_length(w, f$p) + expect_equal(unname(attr(w, "lassosum_selection")["mode"]), "ld_quadratic") + expect_length(lassosumRssWeights(f$stat, f$LD, selection = "min_fbeta"), f$p) +}) + +test_that("scadRssWeights / mcpRssWeights / l0learnRssWeights return length-p weights", { + f <- .rrwStatLd() + expect_length(scadRssWeights(f$stat, f$LD), f$p) + expect_length(mcpRssWeights(f$stat, f$LD), f$p) + expect_length(l0learnRssWeights(f$stat, f$LD), f$p) +}) + +test_that("prsCsWeights and sdprWeights follow the (stat, LD) contract", { + f <- .rrwStatLd() + expect_length(prsCsWeights(f$stat, f$LD, nIter = 100, nBurnin = 20, thin = 1), f$p) + expect_length( + sdprWeights(f$stat, f$LD, iter = 100, burn = 20, thin = 1, verbose = FALSE), f$p) +}) + +test_that("mrAshRssWeights returns posterior-mean weights of length p", { + skip_if_not_installed("susieR") + f <- .rrwStatLd() + w <- mrAshRssWeights(f$stat, f$LD, varY = 1, sigma2E = 1, + s0 = c(0, 0.01, 0.1, 0.5, 1), w0 = rep(1 / 5, 5)) + expect_length(w, f$p) + expect_true(all(is.finite(w))) +}) + + +test_that("mrmashRssWeights fits mr.mash.rss and returns p x K weights", { + skip_if_not_installed("mr.mashr") + m <- .rrwMulti(n = 60, p = 6, K = 3) + w <- mrmashRssWeights(m$stat, m$LD) + expect_equal(dim(w), c(m$p, m$K)) + expect_true(all(is.finite(w))) +}) + +test_that("mrmashRssWeights errors on single-context stat$z", { + skip_if_not_installed("mr.mashr") + f <- .rrwStatLd() + oneCol <- list(z = matrix(f$stat$z, ncol = 1), n = f$n) + expect_error(mrmashRssWeights(oneCol, f$LD), ">= 2 columns") +}) + + + +# ------------------------------- pure helpers ------------------------------- + +test_that(".lassosumCorFromStat reads cor / z / b and validates length", { + f <- .rrwStatLd() + expect_length(pecotmr:::.lassosumCorFromStat(list(cor = f$stat$cor), n = f$n, p = f$p), f$p) + expect_equal(pecotmr:::.lassosumCorFromStat(list(z = f$stat$z), n = f$n, p = f$p), + as.numeric(f$stat$z) / sqrt(f$n)) + expect_equal(pecotmr:::.lassosumCorFromStat(list(b = f$stat$b), n = f$n, p = f$p), + as.numeric(f$stat$b)) + expect_error(pecotmr:::.lassosumCorFromStat(list(), n = f$n, p = f$p), "one of") + expect_error(pecotmr:::.lassosumCorFromStat(list(z = f$stat$z[-1]), n = f$n, p = f$p), + "must equal") +}) + +test_that(".lassosumClampCor scales values with |cor| >= 1 below 1", { + expect_equal(pecotmr:::.lassosumClampCor(c(0.1, 0.5)), c(0.1, 0.5)) + expect_lt(max(abs(pecotmr:::.lassosumClampCor(c(0.5, 1.5, -2)))), 1) +}) + +test_that(".lassosumFirstMax returns the first index of the maximum", { + expect_equal(pecotmr:::.lassosumFirstMax(c(1, 3, 3, 2)), 2L) + expect_equal(pecotmr:::.lassosumFirstMax(c(5, 1, 2)), 1L) +}) + +test_that(".lassosumSelectMinFbeta picks the minimum-fbeta candidate", { + set.seed(7) + cb <- matrix(rnorm(6 * 4), 6, 4) + r <- pecotmr:::.lassosumSelectMinFbeta(cb, data.frame(fbeta = c(3, 1, 2, 4))) + expect_equal(r$index, 2) + expect_equal(r$mode, "min_fbeta") + expect_equal(r$beta, cb[, 2]) +}) + +test_that(".lassosumSelectLdQuadratic scores candidates by c'b / sqrt(b'Rb)", { + f <- .rrwStatLd() + set.seed(8) + cb <- matrix(rnorm(f$p * 4), f$p, 4) + r <- pecotmr:::.lassosumSelectLdQuadratic(cb, f$stat$b, f$LD) + expect_equal(r$mode, "ld_quadratic") + expect_true(r$index %in% seq_len(4)) + expect_equal(r$beta, cb[, r$index]) +}) + +test_that("computeCovDiag returns a diagonal condition covariance", { + m <- .rrwMulti(n = 60, p = 6, K = 3) + cv <- computeCovDiag(m$Y) + expect_equal(dim(cv), c(m$K, m$K)) + expect_equal(cv[upper.tri(cv)], rep(0, sum(upper.tri(cv)))) + expect_equal(unname(diag(cv)), unname(apply(m$Y, 2, var))) +}) + +test_that("computeCovFlash returns a finite K x K covariance from FLASH", { + skip_if_not_installed("flashier") + skip_if_not_installed("ebnm") + m <- .rrwMulti(n = 80, p = 6, K = 3) + cv <- computeCovFlash(m$Y) + expect_equal(dim(cv), c(m$K, m$K)) + expect_true(all(is.finite(cv))) +}) + +test_that("buildMrmashPriorMatrices builds an expanded S0 list and a prior grid", { + skip_if_not_installed("mr.mashr") + set.seed(9) + res <- buildMrmashPriorMatrices( + Bhat = matrix(rnorm(18), 6, 3), Shat = matrix(0.2, 6, 3), K = 3) + expect_true(is.list(res$S0)) + expect_gt(length(res$S0), 1) + expect_true(is.numeric(res$priorGrid)) + expect_true(all(vapply(res$S0, function(s) all(dim(s) == c(3, 3)), logical(1)))) +}) + +test_that("buildMrmashPriorMatrices errors without canonical or data-driven priors", { + skip_if_not_installed("mr.mashr") + expect_error( + buildMrmashPriorMatrices(Bhat = matrix(rnorm(6), 3, 2), Shat = matrix(0.2, 3, 2), + K = 2, canonicalPriorMatrices = FALSE), + "dataDrivenPriorMatrices") +}) + + +# =========================================================================== +# mr.mash weight tests (relocated from test_rrMrmashMvsusie.R) +# =========================================================================== + +# ---- mrmashWeights ---- +test_that("mrmashWeights errors when mr.mashr package is not available", { + skip_if(requireNamespace("mr.mashr", quietly = TRUE), + "mr.mashr is installed; skipping missing-package test") + + expect_error( + mrmashWeights(mrmashFit = NULL, X = matrix(1, 10, 5), Y = matrix(1, 10, 3)), + "mr\\.mash\\.alpha" + ) +}) + +test_that("mrmashWeights errors when X and Y are NULL and fit is NULL", { + skip_if_not(requireNamespace("mr.mashr", quietly = TRUE), + "mr.mashr not installed") + expect_error(mrmashWeights(mrmashFit = NULL, X = NULL, Y = NULL), + "Both X and Y must be provided") +}) + +test_that("mrmashWeights(retainFit=TRUE) attaches {dataDrivenPriorMatrices, w0, V}", { + skip_if_not(requireNamespace("mr.mashr", quietly = TRUE), + "mr.mashr not installed") + # These are exactly the parts fineMappingPipeline needs to rebuild the + # mvSuSiE reweighted mixture prior (w0 -> rescaleCovW0, original $U) and the + # residual variance (V); the heavy mu1 coefficient matrix is not retained. + ddpm <- list(U = list(comp = diag(2))) + fakeFit <- structure( + list(w0 = c(null = 0.4, comp_grid1 = 0.6), V = diag(2) * 2), + class = "mr.mash") + fakeCoef <- matrix(0.1, nrow = 5, ncol = 2) + local_mocked_bindings(coef.mr.mash = function(object, ...) fakeCoef, + .package = "mr.mashr") + w <- mrmashWeights(mrmashFit = fakeFit, + dataDrivenPriorMatrices = ddpm, retainFit = TRUE) + fit <- attr(w, "fit") + expect_true(is.list(fit)) + expect_identical(fit$dataDrivenPriorMatrices, ddpm) + expect_identical(fit$w0, fakeFit$w0) + expect_identical(fit$V, fakeFit$V) + # Default (retainFit = FALSE) leaves the weights free of the fit attribute. + expect_null(attr( + mrmashWeights(mrmashFit = fakeFit, dataDrivenPriorMatrices = ddpm), "fit")) +}) diff --git a/tests/testthat/test_relatednessQc.R b/tests/testthat/test_relatednessQc.R index 03439ea1..efa717ba 100644 --- a/tests/testthat/test_relatednessQc.R +++ b/tests/testthat/test_relatednessQc.R @@ -71,11 +71,14 @@ test_that("large component pre-pruning removes individuals", { ) threshold <- 0.125 + # verbose = TRUE exercises the graph pre-pruning progress messages and the + # final exclusion-count message. result <- filterRelatedness( relatedness = rel, relatednessThreshold = threshold, analysisType = "maximize_unrelated", - maxComponentSize = 10 + maxComponentSize = 10, + verbose = TRUE ) expect_type(result, "character") @@ -144,3 +147,68 @@ test_that("maximize_cases errors without pheno_data", { "Must provide phenoData" ) }) + +test_that("maximize_cases excludes a control listed as IID1 paired with a kept case", { + skip_if_not_installed("igraph") + skip_if_not_installed("plinkQC") + + # X1-C1 is a control(IID1)-case(IID2) pair, exercising the mirror branch of + # the case/control exclusion loop. C1-C2 keeps both cases (sub-threshold); + # X2-X3 gives the control-control step a non-empty input. + rel <- data.frame( + IID1 = c("X1", "C1", "X2"), + IID2 = c("C1", "C2", "X3"), + PI_HAT = c(0.25, 0.05, 0.20), + stringsAsFactors = FALSE + ) + pheno <- data.frame( + IID = c("C1", "C2", "X1", "X2", "X3"), + pheno = c(1, 1, 0, 0, 0), + stringsAsFactors = FALSE + ) + + result <- filterRelatedness( + relatedness = rel, + relatednessThreshold = 0.125, + analysisType = "maximize_cases", + phenoData = pheno + ) + + expect_type(result, "character") + # X1 is a control related to the retained case C1, so it must be excluded. + expect_true("X1" %in% result) +}) + +test_that("iterative cleanup loops and warns when related pairs persist", { + skip_if_not_installed("igraph") + skip_if_not_installed("plinkQC") + + # Force plinkQC to exclude nobody, so related pairs survive Phase 2 and the + # Phase-3 iterative cleanup loop runs to exhaustion, emitting the warning. + local_mocked_bindings( + relatednessFilter = function(...) + list(failIDs = data.frame(IID = character(0), stringsAsFactors = FALSE)), + .package = "plinkQC" + ) + + rel <- data.frame( + IID1 = c("A", "B", "C"), + IID2 = c("B", "C", "D"), + PI_HAT = c(0.30, 0.30, 0.30), + stringsAsFactors = FALSE + ) + + expect_warning( + result <- filterRelatedness( + relatedness = rel, + relatednessThreshold = 0.125, + analysisType = "maximize_unrelated", + maxIterations = 2L, + verbose = TRUE + ), + "related pairs remain" + ) + # Nobody is excluded because the (mocked) filter never fails anyone. + expect_type(result, "character") + expect_equal(length(result), 0L) +}) diff --git a/tests/testthat/test_rrMrmashMvsusie.R b/tests/testthat/test_rrMrmashMvsusie.R deleted file mode 100644 index 61468a81..00000000 --- a/tests/testthat/test_rrMrmashMvsusie.R +++ /dev/null @@ -1,193 +0,0 @@ -context("regularized_regression - mrmash / mvsusie / fsusie") - -# ---- mrmashWeights ---- -test_that("mrmashWeights errors when mr.mashr package is not available", { - skip_if(requireNamespace("mr.mashr", quietly = TRUE), - "mr.mashr is installed; skipping missing-package test") - - expect_error( - mrmashWeights(mrmashFit = NULL, X = matrix(1, 10, 5), Y = matrix(1, 10, 3)), - "mr\\.mash\\.alpha" - ) -}) - -test_that("mrmashWeights errors when X and Y are NULL and fit is NULL", { - skip_if_not(requireNamespace("mr.mashr", quietly = TRUE), - "mr.mashr not installed") - expect_error(mrmashWeights(mrmashFit = NULL, X = NULL, Y = NULL), - "Both X and Y must be provided") -}) - -test_that("mrmashWeights(retainFit=TRUE) attaches {dataDrivenPriorMatrices, w0, V}", { - skip_if_not(requireNamespace("mr.mashr", quietly = TRUE), - "mr.mashr not installed") - # These are exactly the parts fineMappingPipeline needs to rebuild the - # mvSuSiE reweighted mixture prior (w0 -> rescaleCovW0, original $U) and the - # residual variance (V); the heavy mu1 coefficient matrix is not retained. - ddpm <- list(U = list(comp = diag(2))) - fakeFit <- structure( - list(w0 = c(null = 0.4, comp_grid1 = 0.6), V = diag(2) * 2), - class = "mr.mash") - fakeCoef <- matrix(0.1, nrow = 5, ncol = 2) - local_mocked_bindings(coef.mr.mash = function(object, ...) fakeCoef, - .package = "mr.mashr") - w <- mrmashWeights(mrmashFit = fakeFit, - dataDrivenPriorMatrices = ddpm, retainFit = TRUE) - fit <- attr(w, "fit") - expect_true(is.list(fit)) - expect_identical(fit$dataDrivenPriorMatrices, ddpm) - expect_identical(fit$w0, fakeFit$w0) - expect_identical(fit$V, fakeFit$V) - # Default (retainFit = FALSE) leaves the weights free of the fit attribute. - expect_null(attr( - mrmashWeights(mrmashFit = fakeFit, dataDrivenPriorMatrices = ddpm), "fit")) -}) - -# ---- mvsusieWeights ---- -test_that("mvsusieWeights errors when mvsusieR package is not available", { - skip_if(requireNamespace("mvsusieR", quietly = TRUE), - "mvsusieR is installed; skipping missing-package test") - - expect_error( - mvsusieWeights(mvsusieFit = NULL, X = matrix(1, 10, 5), Y = matrix(1, 10, 3)), - "mvsusieR" - ) -}) - -test_that("mvsusieWeights errors when X and Y are NULL and fit is NULL", { - skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), - "mvsusieR not installed") - expect_error(mvsusieWeights(mvsusieFit = NULL, X = NULL, Y = NULL), - "Both X and Y must be provided") -}) - -test_that("mvsusieWeights fits model and returns coefficients when fit is NULL", { - skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), - "mvsusieR not installed") - set.seed(42) - n <- 30 - p <- 5 - R <- 3 - X <- matrix(rnorm(n * p), n, p) - Y <- matrix(rnorm(n * R), n, R) - fake_coef <- matrix(rnorm((p + 1) * R), nrow = p + 1, ncol = R) - captured <- list() - - local_mocked_bindings( - create_mixture_prior = function(...) list(), - mvsusie = function(...) { - captured <<- list(...) - "mock_fit" - }, - coef.mvsusie = function(...) fake_coef, - .package = "mvsusieR" - ) - - result <- expect_message( - mvsusieWeights(X = X, Y = Y, L = 12, LGreedy = 4), - "mvsusieFit is not provided" - ) - # Should return coef without intercept row - expect_equal(dim(result), c(p, R)) - expect_equal(result, fake_coef[-1, ]) - expect_equal(captured$L, 12) - expect_equal(captured$L_greedy, 4) -}) - -test_that("mvsusieWeights returns coefficients from provided fit", { - skip_if_not(requireNamespace("mvsusieR", quietly = TRUE), - "mvsusieR not installed") - p <- 5 - R <- 3 - fake_coef <- matrix(rnorm((p + 1) * R), nrow = p + 1, ncol = R) - - local_mocked_bindings( - coef.mvsusie = function(...) fake_coef, - .package = "mvsusieR" - ) - - result <- mvsusieWeights(mvsusieFit = "precomputed_fit") - expect_equal(dim(result), c(p, R)) - expect_equal(result, fake_coef[-1, ]) -}) - -# ---- fsusieWeights ---- -# Collapse a functional SuSiE fit to a variants x features TWAS weight matrix -# (the all-SNP wavelet posterior mean, the coef.susie analog). - -.fw_makeFsusieFit <- function(seed = 1, n = 150L, p = 24L, J = 16L) { - set.seed(seed) - X <- matrix(rnorm(n * p), n, p, - dimnames = list(paste0("s", seq_len(n)), paste0("v", seq_len(p)))) - b1 <- sin(seq(0, 2 * pi, length.out = J)) - b2 <- cos(seq(0, pi, length.out = J)) - Y <- X[, 3] %o% b1 + X[, 10] %o% b2 + - matrix(rnorm(n * J, sd = 0.3), n, J) - colnames(Y) <- paste0("f", seq_len(J)) - list(X = X, Y = Y, - fit = suppressWarnings(fsusieR::susiF( - X = X, Y = Y, pos = seq_len(J), L = 5, - post_processing = "none", verbose = FALSE))) -} - -test_that("fsusieWeights returns a variants x features matrix with variant rownames", { - skip_if_not_installed("fsusieR") - skip_if_not_installed("wavethresh") - obj <- .fw_makeFsusieFit() - W <- fsusieWeights(fsusieFit = obj$fit, variantIds = colnames(obj$X)) - expect_true(is.matrix(W)) - expect_equal(nrow(W), ncol(obj$X)) - expect_equal(ncol(W), ncol(obj$Y)) - expect_equal(rownames(W), colnames(obj$X)) -}) - -test_that("fsusieWeights matches fsusieR's own out_prep reconstruction (post_processing='none')", { - skip_if_not_installed("fsusieR") - skip_if_not_installed("wavethresh") - obj <- .fw_makeFsusieFit() - fit <- obj$fit - # The alpha-weighted sum over SNPs of the per-SNP feature-domain curves that - # fsusieWeights reconstructs must equal fSuSiE's own fitted_func[[l]] (built - # by out_prep.susiF) for every effect l. - csdX <- as.numeric(fit$csd_X) - perScale <- "mixture_normal_per_scale" %in% class(fsusieR::get_G_prior(fit)) - indxLst <- fsusieR::gen_wavelet_indx(log2(length(fit$outing_grid))) - scaleCols <- if (perScale) indxLst[[length(indxLst)]] - else ncol(as.matrix(fit$fitted_wc[[1L]])) - S <- pecotmr:::.fsusieSynthesisMatrix(fit$n_wac, scaleCols) - maxErr <- 0 - for (l in seq_along(fit$fitted_wc)) { - al <- as.numeric(fit$alpha[[l]]) - contrib <- colSums((al * (1 / csdX) * as.matrix(fit$fitted_wc[[l]])) %*% S) - maxErr <- max(maxErr, max(abs(contrib - as.numeric(fit$fitted_func[[l]])))) - } - expect_lt(maxErr, 1e-8) -}) - -test_that("fsusieWeights concentrates weight on the causal SNPs", { - skip_if_not_installed("fsusieR") - skip_if_not_installed("wavethresh") - obj <- .fw_makeFsusieFit() - W <- fsusieWeights(fsusieFit = obj$fit, variantIds = colnames(obj$X)) - rowNorm <- sqrt(rowSums(W^2)) - top2 <- names(sort(rowNorm, decreasing = TRUE))[1:2] - expect_setequal(top2, c("v3", "v10")) -}) - -test_that("fsusieWeights fast path returns precomputed $coef for a trimmed fit", { - # A trimmed fSuSiE fit drops fitted_wc but keeps the precomputed weight - # matrix in $coef; fsusieWeights returns it without touching wavelet slots. - W0 <- matrix(c(1, 0, 2, 0, 0, 3), nrow = 3, - dimnames = list(c("v1", "v2", "v3"), c("f1", "f2"))) - trimmed <- list(coef = W0, pip = c(0.1, 0.2, 0.7)) - class(trimmed) <- c("fsusie", "susie") - W <- fsusieWeights(fsusieFit = trimmed) - expect_identical(W, W0) -}) - -test_that("fsusieWeights errors without a fit and on an unusable (trimmed, no coef) fit", { - expect_error(fsusieWeights(fsusieFit = NULL), "is required") - bad <- list(pip = c(0.1, 0.9)) # no coef, no fitted_wc - class(bad) <- c("fsusie", "susie") - expect_error(fsusieWeights(fsusieFit = bad), "missing required slot") -}) diff --git a/tests/testthat/test_sldscPostprocessingPipeline.R b/tests/testthat/test_sldscPostprocessingPipeline.R new file mode 100644 index 00000000..d133fe25 --- /dev/null +++ b/tests/testthat/test_sldscPostprocessingPipeline.R @@ -0,0 +1,170 @@ +# Tests for R/sldscPostprocessingPipeline.R +# The pipeline is pure computation over an in-memory SldscData: no file I/O, +# no mocks. Fixtures come from helper-sldsc.R (.sldscMkData / .sldscMkRun). + +test_that("pipeline runs end-to-end on a single + joint SldscData", { + sd <- .sldscMkData() + res <- suppressMessages(sldscPostprocessingPipeline(sd, mafCutoff = 0.05)) + + expect_named(res, c("per_trait", "meta", "params")) + expect_named(res$per_trait, c("traitX", "traitY")) + expect_named(res$meta, c("tauStar", "enrichment", "enrichstat")) + expect_true(is.data.frame(res$per_trait$traitX$summary)) + expect_true("isBinary" %in% names(res$per_trait$traitX$summary)) + + # meta tauStar carries both single and joint channels + expect_true(all(c("singleMean", "singleSe", "singleP", + "jointMean", "jointSe", "jointP") %in% names(res$meta$tauStar))) + expect_equal(nrow(res$meta$tauStar), 2L) + + expect_gt(res$params$M_ref, 0) + expect_equal(res$params$maf_cutoff, 0.05) + expect_equal(res$params$target_categories, c("annot_A_0", "annot_B_0")) + expect_equal(res$params$trait_names, c("traitX", "traitY")) + # baseline annotation detected from the joint run + expect_true("baselineLD_0" %in% res$params$baseline_categories) +}) + +test_that("pipeline without joint runs yields NA joint meta", { + sd <- .sldscMkData(withJoint = FALSE) + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetCategories = c("annot_A_0", "annot_B_0"))) + expect_true(all(is.na(res$meta$tauStar$jointMean))) + expect_equal(res$params$n_baseline, 0L) +}) + +test_that("pipeline applies targetLabels", { + sd <- .sldscMkData() + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetLabels = c("Pretty_A", "Pretty_B"))) + expect_equal(res$params$target_categories, c("Pretty_A", "Pretty_B")) + expect_false(is.null(res$params$target_categories_orig)) + expect_setequal(res$meta$tauStar$target, c("Pretty_A", "Pretty_B")) + expect_setequal(res$per_trait$traitX$summary$target, c("Pretty_A", "Pretty_B")) +}) + +test_that("pipeline errors on wrong targetLabels length", { + sd <- .sldscMkData() + expect_error( + suppressMessages(sldscPostprocessingPipeline(sd, targetLabels = c("only_one"))), + "targetLabels") +}) + +test_that("pipeline errors on non-SldscData input", { + expect_error(sldscPostprocessingPipeline(list(a = 1)), + "must be an SldscData object") +}) + +test_that("pipeline errors when the SldscData has no traits", { + sd <- SldscData(annot = data.frame(CHR = 1, SNP = "rs1", annot_A = 1)) + expect_error(suppressMessages(sldscPostprocessingPipeline(sd)), + "no traits") +}) + +test_that("pipeline takes an explicit targetCategories (skips auto-detect)", { + sd <- .sldscMkData() + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetCategories = c("annot_A_0"))) + expect_equal(res$params$target_categories, "annot_A_0") + expect_equal(nrow(res$meta$tauStar), 1L) +}) + +test_that("pipeline falls back to positional rename when names don't match", { + # annot columns -> annot_A_0/annot_B_0, but the runs' categories use the + # polyfun --snp-list "L2" naming, so the intersect is empty and the pipeline + # renames the first length(sdAnnot) .results rows positionally. + annot <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + annot_A = c(1, 0, 1, 0), annot_B = c(2.1, 1.9, 2.4, 2.0), + stringsAsFactors = FALSE) + frq <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + MAF = rep(0.2, 4), stringsAsFactors = FALSE) + mkTrait <- function() list( + single = list(.sldscMkRun(c("L2_1", "base1")), .sldscMkRun(c("L2_2", "base1"))), + joint = .sldscMkRun(c("L2_1", "L2_2", "base1"))) + sd <- SldscData(annot, frq, list(traitX = mkTrait(), traitY = mkTrait())) + res <- suppressMessages(sldscPostprocessingPipeline(sd)) + # target categories were renamed positionally to the first 2 .results rows + expect_equal(res$params$target_categories, c("L2_1", "L2_2")) +}) + +test_that("pipeline breaks the single loop when a trait has fewer runs than targets", { + # traitX has only ONE single run but there are TWO target categories: the + # i > length(singleRuns) break is hit on the 2nd target. + annot <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + annot_A = c(1, 0, 1, 0), annot_B = c(2.1, 1.9, 2.4, 2.0), + stringsAsFactors = FALSE) + frq <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + MAF = rep(0.2, 4), stringsAsFactors = FALSE) + traits <- list(traitX = list( + single = list(.sldscMkRun(c("annot_A_0", "baselineLD_0"))), # only 1 run + joint = .sldscMkRun(c("annot_A_0", "annot_B_0", "baselineLD_0")))) + sd <- SldscData(annot, frq, traits) + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetCategories = c("annot_A_0", "annot_B_0"))) + # The break stops before annot_B_0's single run; the single-keyed summary + # therefore carries only annot_A_0. + sm <- res$per_trait$traitX$summary + expect_equal(sm$target, "annot_A_0") + expect_false(is.na(sm$tauStarSingle)) +}) + +test_that("pipeline warns and skips a single run that fails to standardize", { + # traitX's 2nd single run lacks the target category annot_B_0, so + # standardizeSldscTrait errors and the pipeline's tryCatch warns + skips it. + annot <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + annot_A = c(1, 0, 1, 0), annot_B = c(2.1, 1.9, 2.4, 2.0), + stringsAsFactors = FALSE) + frq <- data.frame(CHR = c(1, 1, 2, 2), SNP = paste0("rs", 1:4), + MAF = rep(0.2, 4), stringsAsFactors = FALSE) + traits <- list(traitX = list( + single = list(.sldscMkRun(c("annot_A_0", "baselineLD_0")), + .sldscMkRun(c("WRONG_0", "baselineLD_0"))), # missing annot_B_0 + joint = .sldscMkRun(c("annot_A_0", "annot_B_0", "baselineLD_0")))) + sd <- SldscData(annot, frq, traits) + expect_warning( + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetCategories = c("annot_A_0", "annot_B_0"))), + "Failed to standardize single") + # run1 (annot_A_0) succeeded; run2 (annot_B_0) failed to standardize and was + # skipped, so the single-keyed summary carries only annot_A_0. + sm <- res$per_trait$traitX$summary + expect_equal(sm$target, "annot_A_0") +}) + +# annotation + frq with enough per-chromosome variance for the compute steps +# to succeed before the branch under test is reached. +.sldscBranchAnnotFrq <- function() { + list( + annot = data.frame(CHR = c(1, 1, 1, 2, 2, 2), SNP = paste0("rs", 1:6), + annot_A = c(1, 0, 1, 0, 1, 0), + annot_B = c(2.1, 1.8, 2.5, 1.9, 2.3, 2.0), + stringsAsFactors = FALSE), + frq = data.frame(CHR = c(1, 1, 1, 2, 2, 2), SNP = paste0("rs", 1:6), + MAF = rep(0.2, 6), stringsAsFactors = FALSE)) +} + +test_that("pipeline errors when targetCategories cannot be auto-detected", { + af <- .sldscBranchAnnotFrq() + # The single trait has no joint run and an empty single list, so there is no + # pivot run to auto-detect categories from. + sd <- SldscData(af$annot, af$frq, + list(traitX = list(single = list(), joint = NULL))) + expect_error(suppressMessages(sldscPostprocessingPipeline(sd)), + "cannot auto-detect") +}) + +test_that("pipeline warns and skips a joint run that fails to standardize", { + af <- .sldscBranchAnnotFrq() + # The joint run lacks the target category annot_A_0, so its standardization + # errors and the pipeline's joint-side tryCatch warns + skips it. + traits <- list(traitX = list( + single = list(.sldscMkRun(c("annot_A_0", "baselineLD_0"))), + joint = .sldscMkRun(c("WRONG_0", "baselineLD_0")))) + sd <- SldscData(af$annot, af$frq, traits) + expect_warning( + res <- suppressMessages(sldscPostprocessingPipeline( + sd, targetCategories = c("annot_A_0"))), + "Failed to standardize joint") + # the single side still produced an estimate + expect_false(is.na(res$per_trait$traitX$summary$tauStarSingle[1])) +}) diff --git a/tests/testthat/test_sldscWrapper.R b/tests/testthat/test_sldscWrapper.R index eb2c0555..3598749f 100644 --- a/tests/testthat/test_sldscWrapper.R +++ b/tests/testthat/test_sldscWrapper.R @@ -1,182 +1,13 @@ # Tests for R/sldscWrapper.R # -# Fixture convention: -# - 2 chromosomes (1, 2), 50 SNPs each → 100 total -# - 2 target annotations: "annot_A" (binary), "annot_B" (continuous) -# - 97 baseline annotations (baselineLD_0 .. baselineLD_96) in joint run -# - 10 jackknife blocks -# - Polyfun appends "_0" to target annotation names in .results - -# ============================================================================= -# Fixture generators -# ============================================================================= - -# Create a single .annot.gz file for one chromosome -# Real polyfun .annot.gz files have CHR, SNP, BP, CM + annotation columns only -# (no MAF/A1/A2 — those come from the .frq / PLINK files). -.make_annot_gz <- function(dir, chrom, nSnps = 50) { - df <- data.frame( - CHR = chrom, - SNP = paste0("rs", (chrom - 1L) * 100L + seq_len(nSnps)), - BP = seq_len(nSnps) * 1000L, - CM = seq_len(nSnps) * 0.01, - annot_A = sample(c(0L, 1L), nSnps, replace = TRUE), - annot_B = rnorm(nSnps, 2, 0.5), - stringsAsFactors = FALSE - ) - path <- file.path(dir, sprintf("target.%d.annot.gz", chrom)) - gz <- gzfile(path, "wb") - vroom::vroom_write(df, gz, delim = "\t") - close(gz) - invisible(df) -} - -# Create a PLINK .frq file for one chromosome -.make_frq <- function(dir, chrom, plinkName ="ref_chr", nSnps = 50) { - df <- data.frame( - CHR = chrom, - SNP = paste0("rs", (chrom - 1L) * 100L + seq_len(nSnps)), - A1 = "A", - A2 = "G", - MAF = runif(nSnps, 0.01, 0.49), - NCHROBS = 200L, - stringsAsFactors = FALSE - ) - path <- file.path(dir, sprintf("%s%d.frq", plinkName, chrom)) - vroom::vroom_write(df, path, delim = "\t") - invisible(df) -} - -# Create the three polyfun output files (.results, .log, .part_delete) -# for a single-target run. Real polyfun output includes baseline categories -# even in single-target mode, so we add 2 dummy baseline categories. -.make_polyfun_single <- function(dir, prefix, target_name, nBlocks = 10, - h2g = 0.3, tau = 1e-7, enrichment = 2.5, - n_baseline = 2) { - target_cat <- paste0(target_name, "_0") - baseline_cats <- paste0("baselineLD_", seq_len(n_baseline) - 1L) - all_cats <- c(target_cat, baseline_cats) - n_cats <- length(all_cats) - - taus_all <- c(tau, rep(1e-8, n_baseline)) - enrichments_all <- c(enrichment, rep(1.0, n_baseline)) - - results <- data.frame( - Category = all_cats, - Coefficient = taus_all, - Coefficient_std_error = abs(taus_all) * 0.3, - Enrichment = enrichments_all, - Enrichment_std_error = enrichments_all * 0.2, - Enrichment_p = rep(0.01, n_cats), - `Prop._h2` = c(0.15, rep(0.425, n_baseline)), - `Prop._SNPs` = c(0.06, rep(0.47, n_baseline)), - check.names = FALSE, - stringsAsFactors = FALSE - ) - vroom::vroom_write(results, paste0(prefix, ".results"), delim = "\t") - - writeLines(c( - "Analysis started at 2024-01-01", - sprintf("Total Observed scale h2: %g (0.05)", h2g), - "Analysis finished" - ), paste0(prefix, ".log")) - - blocks <- matrix(rnorm(nBlocks * n_cats, - mean = rep(taus_all, each = nBlocks), - sd = abs(rep(taus_all, each = nBlocks)) * 0.5), - nrow = nBlocks, ncol = n_cats) - colnames(blocks) <- all_cats - vroom::vroom_write(as.data.frame(blocks), paste0(prefix, ".part_delete"), delim = "\t") - invisible(NULL) -} - -# Create polyfun output files for a joint run (target + baseline annotations) -.make_polyfun_joint <- function(dir, prefix, target_names, - n_baseline = 3, nBlocks = 10, h2g = 0.3) { - target_cats <- paste0(target_names, "_0") - baseline_cats <- paste0("baselineLD_", seq_len(n_baseline) - 1L) - all_cats <- c(target_cats, baseline_cats) - n_cats <- length(all_cats) - - taus <- c(rep(1e-7, length(target_cats)), rep(1e-8, n_baseline)) - enrichments <- c(rep(2.0, length(target_cats)), rep(1.0, n_baseline)) - - results <- data.frame( - Category = all_cats, - Coefficient = taus, - Coefficient_std_error = abs(taus) * 0.3, - Enrichment = enrichments, - Enrichment_std_error = enrichments * 0.2, - Enrichment_p = rep(0.05, n_cats), - `Prop._h2` = rep(1 / n_cats, n_cats), - `Prop._SNPs` = rep(1 / n_cats, n_cats), - check.names = FALSE, - stringsAsFactors = FALSE - ) - vroom::vroom_write(results, paste0(prefix, ".results"), delim = "\t") - - writeLines(c( - "Analysis started at 2024-01-01", - sprintf("Total Observed scale h2: %g (0.05)", h2g), - "Analysis finished" - ), paste0(prefix, ".log")) - - blocks <- matrix(rnorm(nBlocks * n_cats, mean = rep(taus, each = nBlocks), - sd = abs(rep(taus, each = nBlocks)) * 0.5), - nrow = nBlocks, ncol = n_cats) - colnames(blocks) <- all_cats - vroom::vroom_write(as.data.frame(blocks), paste0(prefix, ".part_delete"), delim = "\t") - invisible(NULL) -} - - -# Build a complete fixture directory for the full pipeline -.make_sldsc_fixtures <- function(envir = parent.frame()) { - base_dir <- withr::local_tempdir(.local_envir = envir) - - anno_dir <- file.path(base_dir, "annot") - frq_dir <- file.path(base_dir, "frq") - out_dir <- file.path(base_dir, "output") - dir.create(anno_dir) - dir.create(frq_dir) - dir.create(out_dir) - - plink_name <- "ref_chr" - - # Annotation + freq files for 2 chromosomes - for (chr in 1:2) { - .make_annot_gz(anno_dir, chr) - .make_frq(frq_dir, chr, plinkName =plink_name) - } - - targets <- c("annot_A", "annot_B") - - # Single-target runs: 2 targets x 2 traits - for (trait in c("traitX", "traitY")) { - for (i in seq_along(targets)) { - pref <- file.path(out_dir, sprintf("%s_single_%s", trait, targets[i])) - .make_polyfun_single(out_dir, pref, targets[i], h2g = 0.3 + (i - 1) * 0.05) - } - } - - # Joint runs: 1 per trait - - for (trait in c("traitX", "traitY")) { - pref <- file.path(out_dir, sprintf("%s_joint", trait)) - .make_polyfun_joint(out_dir, pref, targets, h2g = 0.3) - } - - list( - base_dir = base_dir, - anno_dir = anno_dir, - frq_dir = frq_dir, - out_dir = out_dir, - plinkName =plink_name, - targets = targets, - trait_names = c("traitX", "traitY") - ) -} - +# File-based fixture builders (.make_annot_gz / .make_frq / .make_polyfun_single +# / .make_polyfun_joint / .make_sldsc_fixtures) and the in-memory SldscData +# builders (.sldscMkRun / .sldscMkData) live in helper-sldsc.R. +# +# The reader functions (readSldscTrait/readSldscAnnot/readSldscFrq) do file I/O +# and are tested against real fixture files. The compute functions +# (computeSldscMRef/computeSldscAnnotSd/isBinarySldscAnnot/standardizeSldscTrait) +# operate on an in-memory SldscData and are tested with in-memory fixtures. # ============================================================================= # .sldscChromFromFilename @@ -218,7 +49,6 @@ test_that("readSldscTrait reads polyfun outputs correctly", { result <- readSldscTrait(prefix) expect_true(is.list(result)) - # 1 target + 2 baseline categories expect_true("myannot_0" %in% result$categories) expect_equal(result$h2g, 0.25) expect_equal(result$nBlocks, 5L) @@ -237,7 +67,6 @@ test_that("readSldscTrait errors when h2 not in log", { dir <- withr::local_tempdir() prefix <- file.path(dir, "bad_log") .make_polyfun_single(dir, prefix, "a", nBlocks = 3) - # Overwrite the log with no h2 line writeLines("No heritability here", paste0(prefix, ".log")) expect_error(readSldscTrait(prefix), "Total Observed scale h2") }) @@ -246,240 +75,220 @@ test_that("readSldscTrait errors on column mismatch in part_delete", { dir <- withr::local_tempdir() prefix <- file.path(dir, "bad_delete") .make_polyfun_single(dir, prefix, "a", nBlocks = 3) - # Overwrite part_delete with wrong number of columns (need != 3) vroom::vroom_write(data.frame(x = 1:3, y = 4:6, z = 7:9, w = 10:12), paste0(prefix, ".part_delete"), delim = "\t") expect_error(readSldscTrait(prefix), "part_delete") }) +test_that("readSldscTrait errors when the h2g value is non-numeric", { + dir <- withr::local_tempdir() + prefix <- file.path(dir, "bad_h2") + .make_polyfun_single(dir, prefix, "a", nBlocks = 3) + writeLines(c("start", "Total Observed scale h2: abc (0.05)", "end"), + paste0(prefix, ".log")) + expect_error(readSldscTrait(prefix), "failed to parse h2g numeric") +}) + # ============================================================================= -# computeSldscAnnotSd +# readSldscAnnot # ============================================================================= -test_that("computeSldscAnnotSd computes SDs with MAF filtering", { +test_that("readSldscAnnot stacks per-chromosome .annot.gz into one table", { dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - frq_dir <- file.path(dir, "frq") - dir.create(anno_dir) - dir.create(frq_dir) - for (chr in 1:2) { - .make_annot_gz(anno_dir, chr) - .make_frq(frq_dir, chr, plinkName ="ref_chr") - } + for (chr in 1:2) .make_annot_gz(dir, chr) + df <- readSldscAnnot(dir) + expect_s3_class(df, "data.frame") + expect_true(all(c("CHR", "SNP", "annot_A", "annot_B") %in% names(df))) + expect_equal(nrow(df), 100L) # 2 chroms x 50 SNPs + expect_setequal(unique(df$CHR), c(1, 2)) +}) - sds <- computeSldscAnnotSd(anno_dir, frqfileDir =frq_dir, - plinkName ="ref_chr", mafCutoff =0.05) +test_that("readSldscAnnot respects annotCols", { + dir <- withr::local_tempdir() + for (chr in 1:2) .make_annot_gz(dir, chr) + df <- readSldscAnnot(dir, annotCols = "annot_A") + expect_true("annot_A" %in% names(df)) + expect_false("annot_B" %in% names(df)) +}) + +test_that("readSldscAnnot errors on missing dir / no files", { + expect_error(readSldscAnnot("/nonexistent/dir"), "does not exist") + empty <- withr::local_tempdir() + expect_error(readSldscAnnot(empty), "no .annot.gz") +}) + +test_that("readSldscAnnot errors when annotCols resolves to nothing", { + dir <- withr::local_tempdir() + .make_annot_gz(dir, 1) + expect_error(readSldscAnnot(dir, annotCols = character(0)), + "no annotation columns") +}) + + +# ============================================================================= +# readSldscFrq +# ============================================================================= + +test_that("readSldscFrq stacks per-chromosome .frq into one table", { + dir <- withr::local_tempdir() + for (chr in 1:2) .make_frq(dir, chr, plinkName = "ref_chr") + df <- readSldscFrq(dir, plinkName = "ref_chr") + expect_s3_class(df, "data.frame") + expect_true(all(c("CHR", "SNP", "MAF") %in% names(df))) + expect_equal(nrow(df), 100L) +}) + +test_that("readSldscFrq falls back to a generic .frq glob when the prefix misses", { + dir <- withr::local_tempdir() + for (chr in 1:2) .make_frq(dir, chr, plinkName = "other_chr") + df <- readSldscFrq(dir, plinkName = "nomatch_chr") + expect_equal(nrow(df), 100L) +}) + +test_that("readSldscFrq errors on missing dir / no files", { + expect_error(readSldscFrq("/nonexistent/dir"), "does not exist") + empty <- withr::local_tempdir() + expect_error(readSldscFrq(empty), "no .frq") +}) + + +# ============================================================================= +# computeSldscAnnotSd (operates on SldscData) +# ============================================================================= + +test_that("computeSldscAnnotSd computes SDs with MAF filtering", { + sds <- computeSldscAnnotSd(.sldscMkData(), mafCutoff = 0.05) expect_true(is.numeric(sds)) expect_equal(length(sds), 2L) expect_named(sds, c("annot_A", "annot_B")) expect_true(all(sds > 0)) }) -test_that("computeSldscAnnotSd works with mafCutoff =0", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - sds <- computeSldscAnnotSd(anno_dir, mafCutoff =0) +test_that("computeSldscAnnotSd works with mafCutoff = 0 (no frq needed)", { + sds <- computeSldscAnnotSd(.sldscMkData(withFrq = FALSE), mafCutoff = 0) expect_true(all(sds > 0)) }) -test_that("computeSldscAnnotSd respects annot_cols argument (character)", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - sds <- computeSldscAnnotSd(anno_dir, mafCutoff =0, - annotCols ="annot_A") +test_that("computeSldscAnnotSd respects annotCols (character)", { + sds <- computeSldscAnnotSd(.sldscMkData(), mafCutoff = 0, annotCols = "annot_A") expect_equal(length(sds), 1L) expect_named(sds, "annot_A") }) -test_that("computeSldscAnnotSd respects annot_cols argument (numeric)", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - sds <- computeSldscAnnotSd(anno_dir, mafCutoff =0, annotCols =2L) +test_that("computeSldscAnnotSd respects annotCols (numeric)", { + sds <- computeSldscAnnotSd(.sldscMkData(), mafCutoff = 0, annotCols = 2L) expect_equal(length(sds), 1L) expect_named(sds, "annot_B") }) -test_that("computeSldscAnnotSd errors on missing frqfile_dir when maf > 0", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - .make_annot_gz(anno_dir, 1) - expect_error(computeSldscAnnotSd(anno_dir, frqfileDir =NULL, mafCutoff =0.05), - "frqfileDir") +test_that("computeSldscAnnotSd errors when mafCutoff > 0 but no frq data", { + expect_error( + computeSldscAnnotSd(.sldscMkData(withFrq = FALSE), mafCutoff = 0.05), + "requires frq data") }) -test_that("computeSldscAnnotSd errors on missing anno dir", { - expect_error(computeSldscAnnotSd("/nonexistent/dir", mafCutoff =0), - "does not exist") +test_that("computeSldscAnnotSd errors when there are no annotation columns", { + expect_error( + computeSldscAnnotSd(.sldscMkData(), mafCutoff = 0, annotCols = character(0)), + "no annotation columns") }) -test_that("computeSldscAnnotSd errors on empty anno dir", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "empty") - dir.create(anno_dir) - expect_error(computeSldscAnnotSd(anno_dir, mafCutoff =0), "no .annot.gz") +test_that("computeSldscAnnotSd errors on non-SldscData input", { + expect_error(computeSldscAnnotSd(list(a = 1)), "must be an SldscData") }) -test_that("computeSldscAnnotSd errors on missing .frq file", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - frq_dir <- file.path(dir, "frq") - dir.create(anno_dir) - dir.create(frq_dir) - .make_annot_gz(anno_dir, 1) - # No .frq file for chr1 - expect_error(computeSldscAnnotSd(anno_dir, frqfileDir =frq_dir, - plinkName ="ref_chr", mafCutoff =0.05), - "frq file not found") +test_that("computeSldscAnnotSd errors with zero degrees of freedom", { + # One SNP per chromosome: after the per-chromosome split each block has + # nrow <= 1 (the `next`), so no variance accumulates and den stays 0. + annot <- data.frame(CHR = c(1, 2), SNP = c("rs1", "rs2"), + annot_A = c(1, 0), annot_B = c(2.1, 1.9), + stringsAsFactors = FALSE) + frq <- data.frame(CHR = c(1, 2), SNP = c("rs1", "rs2"), MAF = c(0.2, 0.2), + stringsAsFactors = FALSE) + sd <- SldscData(annot, frq, list()) + expect_error(computeSldscAnnotSd(sd, mafCutoff = 0.05), + "zero degrees of freedom") }) # ============================================================================= -# computeSldscMRef +# computeSldscMRef (operates on SldscData) # ============================================================================= -test_that("computeSldscMRef counts SNPs from .frq files with MAF cutoff", { - dir <- withr::local_tempdir() - frq_dir <- file.path(dir, "frq") - dir.create(frq_dir) - for (chr in 1:2) .make_frq(frq_dir, chr, plinkName ="ref_chr") - - M <- computeSldscMRef(frqfileDir =frq_dir, plinkName ="ref_chr", - mafCutoff =0.05) +test_that("computeSldscMRef counts MAF > cutoff SNPs from the frq table", { + M <- computeSldscMRef(.sldscMkData(), mafCutoff = 0.05) expect_true(is.integer(M)) - expect_true(M > 0) - expect_true(M <= 100L) # 2 chroms x 50 SNPs max + expect_equal(M, 6L) # all 6 frq SNPs have MAF 0.2 > 0.05 }) -test_that("computeSldscMRef counts all SNPs when mafCutoff =0", { - dir <- withr::local_tempdir() - frq_dir <- file.path(dir, "frq") - dir.create(frq_dir) - for (chr in 1:2) .make_frq(frq_dir, chr, plinkName ="ref_chr") - - M <- computeSldscMRef(frqfileDir =frq_dir, plinkName ="ref_chr", - mafCutoff =0) - expect_equal(M, 100L) -}) - -test_that("computeSldscMRef falls back to .l2.ldscore with mafCutoff =0", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - # Create a fake .l2.ldscore.gz file with 40 rows - df <- data.frame(CHR = 1, SNP = paste0("rs", 1:40), BP = 1:40, L2 = runif(40)) - gz <- gzfile(file.path(anno_dir, "scores.l2.ldscore.gz"), "wb") - vroom::vroom_write(df, gz, delim = "\t") - close(gz) - - M <- computeSldscMRef(targetAnnoDir =anno_dir, mafCutoff =0) - expect_equal(M, 40L) +test_that("computeSldscMRef counts all SNPs when mafCutoff = 0", { + expect_equal(computeSldscMRef(.sldscMkData(), mafCutoff = 0), 6L) }) -test_that("computeSldscMRef errors when maf > 0 and no frq dir", { - expect_error(computeSldscMRef(mafCutoff =0.05), "frqfileDir") +test_that("computeSldscMRef applies the MAF cutoff", { + annot <- data.frame(CHR = 1, SNP = paste0("rs", 1:4), a = c(1, 0, 1, 0), + stringsAsFactors = FALSE) + frq <- data.frame(CHR = 1, SNP = paste0("rs", 1:4), + MAF = c(0.2, 0.01, 0.3, 0.02), stringsAsFactors = FALSE) + sd <- SldscData(annot, frq, list()) + expect_equal(computeSldscMRef(sd, mafCutoff = 0.05), 2L) # rs1, rs3 }) -test_that("computeSldscMRef errors with no dirs at all", { - expect_error(computeSldscMRef(mafCutoff =0), "need frqfileDir") +test_that("computeSldscMRef falls back to annot row count when frq absent and mafCutoff = 0", { + expect_equal(computeSldscMRef(.sldscMkData(withFrq = FALSE), mafCutoff = 0), 6L) }) -test_that("computeSldscMRef warns on l2.ldscore fallback", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - df <- data.frame(CHR = 1, SNP = paste0("rs", 1:20), BP = 1:20, L2 = runif(20)) - gz <- gzfile(file.path(anno_dir, "scores.l2.ldscore.gz"), "wb") - vroom::vroom_write(df, gz, delim = "\t") - close(gz) - - expect_warning( - computeSldscMRef(targetAnnoDir =anno_dir, mafCutoff =0), - "UNDERCOUNTS" - ) +test_that("computeSldscMRef errors when mafCutoff > 0 but no frq data", { + expect_error(computeSldscMRef(.sldscMkData(withFrq = FALSE), mafCutoff = 0.05), + "requires frq data") }) -test_that("computeSldscMRef uses generic .frq glob when plink_name pattern fails", { - dir <- withr::local_tempdir() - frq_dir <- file.path(dir, "frq") - dir.create(frq_dir) - # Name doesn't match the plink_name pattern - for (chr in 1:2) .make_frq(frq_dir, chr, plinkName ="other_chr") - - M <- computeSldscMRef(frqfileDir =frq_dir, plinkName ="nomatch_chr", - mafCutoff =0) - expect_equal(M, 100L) +test_that("computeSldscMRef errors on non-SldscData input", { + expect_error(computeSldscMRef(list(a = 1)), "must be an SldscData") }) # ============================================================================= -# isBinarySldscAnnot +# isBinarySldscAnnot (operates on SldscData) # ============================================================================= test_that("isBinarySldscAnnot detects binary and continuous annotations", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - result <- isBinarySldscAnnot(anno_dir) + result <- isBinarySldscAnnot(.sldscMkData()) expect_true(is.logical(result)) expect_named(result, c("annot_A", "annot_B")) - expect_true(result[["annot_A"]]) # binary (0/1) - expect_false(result[["annot_B"]]) # continuous + expect_true(result[["annot_A"]]) # binary (0/1) + expect_false(result[["annot_B"]]) # continuous }) -test_that("isBinarySldscAnnot respects annot_cols (character)", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - result <- isBinarySldscAnnot(anno_dir, annotCols ="annot_A") +test_that("isBinarySldscAnnot respects annotCols (character)", { + result <- isBinarySldscAnnot(.sldscMkData(), annotCols = "annot_A") expect_equal(length(result), 1L) expect_true(result[["annot_A"]]) }) -test_that("isBinarySldscAnnot respects annot_cols (numeric)", { - dir <- withr::local_tempdir() - anno_dir <- file.path(dir, "annot") - dir.create(anno_dir) - for (chr in 1:2) .make_annot_gz(anno_dir, chr) - - result <- isBinarySldscAnnot(anno_dir, annotCols =2L) +test_that("isBinarySldscAnnot respects annotCols (numeric)", { + result <- isBinarySldscAnnot(.sldscMkData(), annotCols = 2L) expect_equal(length(result), 1L) expect_named(result, "annot_B") }) -test_that("isBinarySldscAnnot errors on empty dir", { - dir <- withr::local_tempdir() - expect_error(isBinarySldscAnnot(dir), "no .annot.gz") +test_that("isBinarySldscAnnot errors on non-SldscData input", { + expect_error(isBinarySldscAnnot(list(a = 1)), "must be an SldscData") }) # ============================================================================= -# standardizeSldscTrait +# standardizeSldscTrait (operates on SldscData via getTraitRun) # ============================================================================= -# Helper to build a trait_data list (as from readSldscTrait) +# Build a readSldscTrait-shaped run with specific values the tests assert on. .make_trait_data <- function(cats = c("A_0", "B_0"), nBlocks = 10, h2g = 0.3) { n <- length(cats) taus <- rep(1e-7, n) - blocks <- matrix(rnorm(nBlocks * n, mean = rep(taus, each = nBlocks), - sd = 1e-8), + blocks <- matrix(rnorm(nBlocks * n, mean = rep(taus, each = nBlocks), sd = 1e-8), nrow = nBlocks, ncol = n) colnames(blocks) <- cats - list( categories = cats, tau = setNames(taus, cats), @@ -491,16 +300,20 @@ test_that("isBinarySldscAnnot errors on empty dir", { propSnps = setNames(rep(0.06, n), cats), h2g = h2g, tauBlocks = blocks, - nBlocks = nBlocks - ) + nBlocks = nBlocks) +} + +# Wrap a run (and optional joint run) in a minimal SldscData so the new +# standardizeSldscTrait(sldscData, trait, mode, idx, ...) API can reach it. +.wrapRun <- function(run, joint = run) { + SldscData(annot = data.frame(CHR = 1, SNP = "rs1", A = 1, stringsAsFactors = FALSE), + traits = list(t = list(single = list(run), joint = joint))) } test_that("standardizeSldscTrait works in single mode", { td <- .make_trait_data() - sd_annot <- c(A_0 = 0.5, B_0 = 1.2) - M_ref <- 1000L - - result <- standardizeSldscTrait(td, sd_annot, M_ref, mode = "single") + result <- standardizeSldscTrait(.wrapRun(td), "t", mode = "single", idx = 1L, + sdAnnot = c(A_0 = 0.5, B_0 = 1.2), MRef = 1000L) expect_true(is.list(result)) expect_equal(result$mode, "single") expect_equal(result$h2g, 0.3) @@ -511,79 +324,82 @@ test_that("standardizeSldscTrait works in single mode", { expect_equal(nrow(s), 2L) expect_true(all(c("tauStar", "tauStarSe", "enrichment", "enrichmentSe", "enrichmentP", "enrichstat", "enrichstatSe") %in% names(s))) - # tauStar = tau * sd * M_ref / h2g - expected_ts <- unname(td$tau) * c(0.5, 1.2) * 1000 / 0.3 - expect_equal(s$tauStar, expected_ts) - - # tau_star_blocks has correct dimensions + expect_equal(s$tauStar, unname(td$tau) * c(0.5, 1.2) * 1000 / 0.3) expect_true(is.matrix(result$tau_star_blocks)) expect_equal(dim(result$tau_star_blocks), c(10L, 2L)) }) test_that("standardizeSldscTrait works in joint mode", { td <- .make_trait_data() - sd_annot <- c(A_0 = 0.5, B_0 = 1.2) - M_ref <- 1000L - - result <- standardizeSldscTrait(td, sd_annot, M_ref, mode = "joint") + result <- standardizeSldscTrait(.wrapRun(td), "t", mode = "joint", + sdAnnot = c(A_0 = 0.5, B_0 = 1.2), MRef = 1000L) expect_equal(result$mode, "joint") - s <- result$summary - # joint mode should NOT have enrichment columns - expect_false("enrichment" %in% names(s)) - expect_true("tauStar" %in% names(s)) + expect_false("enrichment" %in% names(result$summary)) + expect_true("tauStar" %in% names(result$summary)) }) test_that("standardizeSldscTrait auto-detects target categories", { td <- .make_trait_data() - sd_annot <- c(A_0 = 0.5) # only one overlaps - - result <- standardizeSldscTrait(td, sd_annot, 1000L, mode = "joint") + result <- standardizeSldscTrait(.wrapRun(td), "t", mode = "joint", + sdAnnot = c(A_0 = 0.5), MRef = 1000L) expect_equal(nrow(result$summary), 1L) expect_equal(result$summary$target, "A_0") }) test_that("standardizeSldscTrait errors on empty categories", { td <- .make_trait_data() - sd_annot <- c(X_0 = 0.5) # no overlap - expect_error(standardizeSldscTrait(td, sd_annot, 1000L), "no target categories") + expect_error( + standardizeSldscTrait(.wrapRun(td), "t", mode = "single", idx = 1L, + sdAnnot = c(X_0 = 0.5), MRef = 1000L), + "no target categories") }) test_that("standardizeSldscTrait errors on missing categories", { - td <- .make_trait_data(cats = c("A_0")) - sd_annot <- c(A_0 = 0.5, B_0 = 1.0) + td <- .make_trait_data(cats = "A_0") expect_error( - standardizeSldscTrait(td, sd_annot, 1000L, - targetCategories =c("A_0", "B_0")), - "missing categories" - ) + standardizeSldscTrait(.wrapRun(td), "t", mode = "single", idx = 1L, + sdAnnot = c(A_0 = 0.5, B_0 = 1.0), MRef = 1000L, + targetCategories = c("A_0", "B_0")), + "missing categories") }) test_that("standardizeSldscTrait warns on zero sd", { td <- .make_trait_data(cats = "A_0") - sd_annot <- c(A_0 = 0) expect_warning( - standardizeSldscTrait(td, sd_annot, 1000L, mode = "joint"), - "zero/NA sd" - ) + standardizeSldscTrait(.wrapRun(td), "t", mode = "joint", + sdAnnot = c(A_0 = 0), MRef = 1000L), + "zero/NA sd") }) test_that("standardizeSldscTrait enrichstatSe handles p = 0", { td <- .make_trait_data(cats = "A_0") - td$enrichmentP <- c(A_0 = 0) # p = 0 → abs_z = Inf → se = 0 - sd_annot <- c(A_0 = 0.5) - - result <- standardizeSldscTrait(td, sd_annot, 1000L, mode = "single") - # enrichstatSe should be NA when abs_z is infinite + td$enrichmentP <- c(A_0 = 0) # p = 0 -> abs_z = Inf -> SE = NA + result <- standardizeSldscTrait(.wrapRun(td), "t", mode = "single", idx = 1L, + sdAnnot = c(A_0 = 0.5), MRef = 1000L) expect_true(is.na(result$summary$enrichstatSe)) }) +test_that("standardizeSldscTrait errors when the requested run is absent", { + td <- .make_trait_data() + expect_error( + standardizeSldscTrait(.wrapRun(td), "t", mode = "single", idx = 5L, + sdAnnot = c(A_0 = 0.5), MRef = 1000L), + "no single run") +}) + +test_that("standardizeSldscTrait errors on non-SldscData input", { + expect_error( + standardizeSldscTrait(list(a = 1), "t", mode = "single", idx = 1L, + sdAnnot = c(A_0 = 0.5), MRef = 1000L), + "must be an SldscData") +}) + # ============================================================================= -# metaSldscRandom +# metaSldscRandom (unchanged: operates on standardized per-trait estimates) # ============================================================================= -# Helper to build per_trait_estimates for metaSldscRandom .make_per_trait_meta <- function(nTraits = 3, category = "A_0", means = NULL, ses = NULL) { if (is.null(means)) means <- rnorm(nTraits, 1e-5, 1e-6) @@ -643,7 +459,6 @@ test_that("metaSldscRandom returns NA with < 2 traits", { test_that("metaSldscRandom skips traits with missing category", { pt <- .make_per_trait_meta(nTraits = 3) - # Change category in trait2 pt$trait2$summary$target <- "other" result <- metaSldscRandom(pt, "A_0", "tauStar") expect_equal(result$nTraits, 2L) @@ -656,7 +471,7 @@ test_that("metaSldscRandom skips traits with NA or zero SE", { pt$trait3$summary$tauStarSe <- 0 result <- metaSldscRandom(pt, "A_0", "tauStar") expect_equal(result$nTraits, 1L) - expect_true(is.na(result$mean)) # < 2 valid + expect_true(is.na(result$mean)) }) test_that("metaSldscRandom skips NULL entries", { @@ -673,6 +488,26 @@ test_that("metaSldscRandom generates names for unnamed list", { expect_equal(result$traitsUsed, c("1", "2")) }) +test_that("metaSldscRandom skips NULL entries and NULL summaries", { + valid <- .make_per_trait_meta(nTraits = 2, means = c(1e-5, 2e-5), + ses = c(1e-6, 1e-6)) + # list() keeps explicit NULLs (unlike `x$y <- NULL`), so trait3/trait4 model + # a NULL entry and a NULL-summary entry the loop must skip. + pt <- c(valid, list(trait3 = NULL, trait4 = list(summary = NULL))) + result <- metaSldscRandom(pt, "A_0", "tauStar") + expect_equal(result$nTraits, 2L) +}) + +test_that("metaSldscRandom skips a trait whose summary lacks the quantity columns", { + valid <- .make_per_trait_meta(nTraits = 2, means = c(1e-5, 2e-5), + ses = c(1e-6, 1e-6)) + # trait3's row matches the category but has no tauStar/tauStarSe columns. + pt <- c(valid, list(trait3 = list( + summary = data.frame(target = "A_0", foo = 1, stringsAsFactors = FALSE)))) + result <- metaSldscRandom(pt, "A_0", "tauStar") + expect_equal(result$nTraits, 2L) +}) + # ============================================================================= # .sldscAssembleTraitSummary @@ -692,7 +527,6 @@ test_that(".sldscAssembleTraitSummary combines single and joint", { enrichstat = c(0.001, 0.002), enrichstatSe = c(0.0003, 0.0004), stringsAsFactors = FALSE ) - joint_df <- data.frame( target = targets, tau = c(1.1e-7, 2.1e-7), tauSe = c(3.1e-8, 4.1e-8), @@ -715,8 +549,7 @@ test_that(".sldscAssembleTraitSummary handles NULL single", { fn <- pecotmr:::.sldscAssembleTraitSummary joint_df <- data.frame(target = "A_0", tauStar = 0.01, tauStarSe = 0.003, stringsAsFactors = FALSE) - is_bin <- c(A_0 = TRUE) - result <- fn(NULL, joint_df, "A_0", is_bin) + result <- fn(NULL, joint_df, "A_0", c(A_0 = TRUE)) expect_equal(nrow(result), 1L) expect_true(all(is.na(result$tauStarSingle))) expect_equal(result$tauStarJoint, 0.01) @@ -727,18 +560,15 @@ test_that(".sldscAssembleTraitSummary handles NULL joint", { single_df <- data.frame(target = "A_0", tauStar = 0.01, tauStarSe = 0.003, enrichment = 2.0, enrichmentSe = 0.4, enrichmentP = 0.01, enrichstat = 0.001, - enrichstatSe = 0.0003, - stringsAsFactors = FALSE) - is_bin <- c(A_0 = TRUE) - result <- fn(single_df, NULL, "A_0", is_bin) + enrichstatSe = 0.0003, stringsAsFactors = FALSE) + result <- fn(single_df, NULL, "A_0", c(A_0 = TRUE)) expect_equal(result$tauStarSingle, 0.01) expect_true(all(is.na(result$tauStarJoint))) }) test_that(".sldscAssembleTraitSummary handles both NULL", { fn <- pecotmr:::.sldscAssembleTraitSummary - is_bin <- c(A_0 = TRUE) - result <- fn(NULL, NULL, "A_0", is_bin) + result <- fn(NULL, NULL, "A_0", c(A_0 = TRUE)) expect_equal(nrow(result), 1L) expect_equal(result$target, "A_0") }) @@ -769,8 +599,7 @@ test_that(".sldscViewForMeta extracts single-mode columns", { test_that(".sldscViewForMeta returns NULL for missing summary", { fn <- pecotmr:::.sldscViewForMeta - per_trait <- list(traitX = list(summary = NULL)) - view <- fn(per_trait, "single") + view <- fn(list(traitX = list(summary = NULL)), "single") expect_null(view$traitX) }) @@ -778,168 +607,7 @@ test_that(".sldscViewForMeta returns NULL when no matching columns", { fn <- pecotmr:::.sldscViewForMeta per_trait <- list( traitX = list(summary = data.frame(target = "A_0", other_col = 1, - stringsAsFactors = FALSE)) - ) + stringsAsFactors = FALSE))) view <- fn(per_trait, "single") expect_null(view$traitX) }) - - -# ============================================================================= -# sldscPostprocessingPipeline (integration) -# ============================================================================= - -test_that("sldscPostprocessingPipeline runs end-to-end", { - fix <- .make_sldsc_fixtures() - - trait_single_prefixes <- list( - traitX = c( - file.path(fix$out_dir, "traitX_single_annot_A"), - file.path(fix$out_dir, "traitX_single_annot_B") - ), - traitY = c( - file.path(fix$out_dir, "traitY_single_annot_A"), - file.path(fix$out_dir, "traitY_single_annot_B") - ) - ) - trait_joint_prefix <- c( - traitX = file.path(fix$out_dir, "traitX_joint"), - traitY = file.path(fix$out_dir, "traitY_joint") - ) - - result <- suppressMessages(sldscPostprocessingPipeline( - traitSinglePrefixes =trait_single_prefixes, - traitJointPrefix = trait_joint_prefix, - targetAnnoDir =fix$anno_dir, - frqfileDir =fix$frq_dir, - plinkName =fix$plinkName, - mafCutoff =0.05 - )) - - expect_true(is.list(result)) - expect_named(result, c("per_trait", "meta", "params")) - - # per_trait - expect_equal(length(result$per_trait), 2L) - expect_named(result$per_trait, c("traitX", "traitY")) - - pt <- result$per_trait$traitX - expect_true(is.data.frame(pt$summary)) - expect_true("isBinary" %in% names(pt$summary)) - expect_true(is.numeric(pt$h2g)) - - # meta - expect_named(result$meta, c("tauStar", "enrichment", "enrichstat")) - for (nm in names(result$meta)) { - m <- result$meta[[nm]] - expect_true(is.data.frame(m)) - expect_true("target" %in% names(m)) - expect_true("isBinary" %in% names(m)) - } - - # params - expect_equal(result$params$M_ref > 0, TRUE) - expect_equal(result$params$maf_cutoff, 0.05) - expect_equal(length(result$params$target_categories), 2L) - expect_equal(result$params$trait_names, c("traitX", "traitY")) -}) - -test_that("sldscPostprocessingPipeline works without joint runs", { - fix <- .make_sldsc_fixtures() - - trait_single_prefixes <- list( - traitX = c( - file.path(fix$out_dir, "traitX_single_annot_A"), - file.path(fix$out_dir, "traitX_single_annot_B") - ), - traitY = c( - file.path(fix$out_dir, "traitY_single_annot_A"), - file.path(fix$out_dir, "traitY_single_annot_B") - ) - ) - - # No joint prefix - result <- suppressMessages(sldscPostprocessingPipeline( - traitSinglePrefixes =trait_single_prefixes, - traitJointPrefix = NULL, - targetAnnoDir =fix$anno_dir, - frqfileDir =fix$frq_dir, - plinkName =fix$plinkName, - mafCutoff =0.05 - )) - - expect_true(is.list(result)) - expect_true(all(is.na(result$meta$tauStar$jointMean))) -}) - -test_that("sldscPostprocessingPipeline applies target_labels", { - fix <- .make_sldsc_fixtures() - - trait_single_prefixes <- list( - traitX = c( - file.path(fix$out_dir, "traitX_single_annot_A"), - file.path(fix$out_dir, "traitX_single_annot_B") - ), - traitY = c( - file.path(fix$out_dir, "traitY_single_annot_A"), - file.path(fix$out_dir, "traitY_single_annot_B") - ) - ) - trait_joint_prefix <- c( - traitX = file.path(fix$out_dir, "traitX_joint"), - traitY = file.path(fix$out_dir, "traitY_joint") - ) - - result <- suppressMessages(sldscPostprocessingPipeline( - traitSinglePrefixes =trait_single_prefixes, - traitJointPrefix = trait_joint_prefix, - targetAnnoDir =fix$anno_dir, - frqfileDir =fix$frq_dir, - plinkName =fix$plinkName, - mafCutoff =0.05, - targetLabels =c("Pretty_A", "Pretty_B") - )) - - # Check relabeling - expect_equal(result$params$target_categories, c("Pretty_A", "Pretty_B")) - expect_true(!is.null(result$params$target_categories_orig)) - expect_true(all(result$meta$tauStar$target %in% c("Pretty_A", "Pretty_B"))) - expect_true(all(result$per_trait$traitX$summary$target %in% c("Pretty_A", "Pretty_B"))) -}) - -test_that("sldscPostprocessingPipeline errors on wrong target_labels length", { - fix <- .make_sldsc_fixtures() - - trait_single_prefixes <- list( - traitX = c( - file.path(fix$out_dir, "traitX_single_annot_A"), - file.path(fix$out_dir, "traitX_single_annot_B") - ) - ) - trait_joint_prefix <- c( - traitX = file.path(fix$out_dir, "traitX_joint") - ) - - expect_error( - suppressMessages(sldscPostprocessingPipeline( - traitSinglePrefixes =trait_single_prefixes, - traitJointPrefix = trait_joint_prefix, - targetAnnoDir =fix$anno_dir, - frqfileDir =fix$frq_dir, - plinkName =fix$plinkName, - targetLabels =c("only_one") - )), - "targetLabels" - ) -}) - -test_that("sldscPostprocessingPipeline errors on unnamed prefixes", { - expect_error( - sldscPostprocessingPipeline( - traitSinglePrefixes =list(c("a", "b")), - traitJointPrefix = NULL, - targetAnnoDir ="." - ), - "named list" - ) -}) diff --git a/tests/testthat/test_sumstatsQc.R b/tests/testthat/test_sumstatsQc.R index a91de8ae..0c29c5c9 100644 --- a/tests/testthat/test_sumstatsQc.R +++ b/tests/testthat/test_sumstatsQc.R @@ -4476,3 +4476,902 @@ test_that("summaryStatsQc: preserves optional nCase/nControl columns through QC" expect_equal(out$nCase, 500) expect_equal(out$nControl, 1500) }) + + +# =========================================================================== +# mergeVariantInfo (data.frame + GRanges, flip-aware, all = TRUE/FALSE) +# =========================================================================== + +test_that("mergeVariantInfo (data.frame): all = TRUE returns flip-corrected union", { + v1 <- data.frame(chrom = c("1", "1", "2"), pos = c(100, 200, 300), + alt = c("A", "C", "G"), ref = c("G", "T", "A"), + stringsAsFactors = FALSE) + # row1 exact; row2 alt/ref swapped vs v1 (flip); row3 brand new variant. + v2 <- data.frame(chrom = c("1", "1", "3"), pos = c(100, 200, 400), + alt = c("A", "T", "C"), ref = c("G", "C", "A"), + stringsAsFactors = FALSE) + out <- mergeVariantInfo(v1, v2, all = TRUE) + expect_s3_class(out, "data.frame") + expect_setequal(colnames(out), c("chrom", "pos", "alt", "ref")) + # The flipped row2 collapses onto v1's orientation, so only the genuinely new + # variant (chrom 3) is added to the 3 from v1. + expect_equal(nrow(out), 4L) + flipped <- out[out$chrom == "1" & out$pos == 200, ] + expect_equal(flipped$alt, "C") + expect_equal(flipped$ref, "T") + expect_true(any(out$chrom == "3" & out$pos == 400)) +}) + +test_that("mergeVariantInfo (data.frame): all = FALSE returns only flip-corrected variants2", { + v1 <- data.frame(chrom = c("1", "1"), pos = c(100, 200), + alt = c("A", "C"), ref = c("G", "T"), + stringsAsFactors = FALSE) + v2 <- data.frame(chrom = c("1", "1"), pos = c(100, 200), + alt = c("A", "T"), ref = c("G", "C"), + stringsAsFactors = FALSE) + out <- mergeVariantInfo(v1, v2, all = FALSE) + expect_equal(nrow(out), 2L) + # row2 was a flip of v1; mergeVariantInfo rewrites it to v1's orientation. + expect_equal(out$alt, c("A", "C")) + expect_equal(out$ref, c("G", "T")) +}) + +test_that("mergeVariantInfo (GRanges): converts GRanges inputs and detects flips", { + ssqcMakeGr <- function(chrom, pos, alt, ref) { + gr <- GenomicRanges::GRanges( + seqnames = paste0("chr", chrom), + ranges = IRanges::IRanges(start = pos, width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame(alt = alt, ref = ref) + gr + } + g1 <- ssqcMakeGr(c(1, 1, 2), c(100, 200, 300), + c("A", "C", "G"), c("G", "T", "A")) + g2 <- ssqcMakeGr(c(1, 1, 3), c(100, 200, 400), + c("A", "T", "C"), c("G", "C", "A")) + out <- mergeVariantInfo(g1, g2, all = TRUE) + expect_s3_class(out, "data.frame") + expect_equal(nrow(out), 4L) + flipped <- out[out$chrom == "chr1" & out$pos == 200, ] + expect_equal(flipped$alt, "C") + expect_equal(flipped$ref, "T") +}) + +test_that("mergeVariantInfo does not warn on length-mismatch recycling", { + # Regression guard: the flip vector must be full-length so `hasMatch & ...` + # never recycles a subset-length comparison. + v1 <- data.frame(chrom = c("1", "1", "2"), pos = c(100, 200, 300), + alt = c("A", "C", "G"), ref = c("G", "T", "A"), + stringsAsFactors = FALSE) + v2 <- data.frame(chrom = c("1", "1", "3"), pos = c(100, 200, 400), + alt = c("A", "T", "C"), ref = c("G", "C", "A"), + stringsAsFactors = FALSE) + expect_no_warning(out <- mergeVariantInfo(v1, v2, all = TRUE)) + # Behaviour is unchanged: the flipped row collapses onto v1's orientation. + flipped <- out[out$chrom == "1" & out$pos == 200, ] + expect_equal(flipped$alt, "C") + expect_equal(flipped$ref, "T") +}) + +# =========================================================================== +# .matchRefPanel uncovered branches +# =========================================================================== + +test_that(".matchRefPanel: accepts a bare variant-id character vector (targetData)", { + res <- pecotmr:::.matchRefPanel( + c("chr1:100:A:G", "chr1:200:C:T"), + c("chr1:100:A:G", "chr1:200:C:T"), + matchMinProp = 0) + expect_equal(nrow(res$harmonizedData), 2L) +}) + +test_that(".matchRefPanel: strips merge-conflicting columns (variant_id) from targetData", { + target <- data.frame(chrom = c(1, 1), pos = c(100, 200), + A2 = c("A", "C"), A1 = c("G", "T"), + variant_id = c("old1", "old2"), z = c(1, 2), + stringsAsFactors = FALSE) + ref <- data.frame(chrom = c(1, 1), pos = c(100, 200), + A2 = c("A", "C"), A1 = c("G", "T"), + stringsAsFactors = FALSE) + res <- pecotmr:::.matchRefPanel(target, ref, colToFlip = "z", matchMinProp = 0) + expect_equal(nrow(res$harmonizedData), 2L) + # The input variant_id was stripped; the returned id is rebuilt from QC'd alleles. + expect_false(any(c("old1", "old2") %in% res$harmonizedData$variant_id)) +}) + +test_that(".matchRefPanel: errors when colToFlip column is absent", { + target <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", z = 1, + stringsAsFactors = FALSE) + ref <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.matchRefPanel(target, ref, colToFlip = "missingCol", matchMinProp = 0), + "not found in targetData") +}) + +test_that(".matchRefPanel: errors when colToComplement column is absent", { + target <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", z = 1, + stringsAsFactors = FALSE) + ref <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.matchRefPanel(target, ref, colToComplement = "missingAf", + matchMinProp = 0), + "not found in targetData") +}) + +test_that(".matchRefPanel: complements colToComplement (1 - af) on an allele swap", { + # pos 100 exact; pos 200 allele-swapped -> sign_flip => z negated, af -> 1-af. + target <- data.frame(chrom = c(1, 1), pos = c(100, 200), + A2 = c("A", "A"), A1 = c("G", "G"), + z = c(1, 2), af = c(0.3, 0.4), + stringsAsFactors = FALSE) + ref <- data.frame(chrom = c(1, 1), pos = c(100, 200), + A2 = c("A", "G"), A1 = c("G", "A"), + stringsAsFactors = FALSE) + res <- pecotmr:::.matchRefPanel(target, ref, colToFlip = "z", + colToComplement = "af", matchMinProp = 0) + hd <- res$harmonizedData + row200 <- hd[hd$pos == 200, ] + expect_equal(row200$af, 0.6) # 1 - 0.4 + expect_equal(row200$z, -2) # sign-flipped +}) + +test_that(".matchRefPanel: removeDups = TRUE warns and drops duplicate variants", { + target <- data.frame(chrom = c(1, 1), pos = c(100, 100), + A2 = c("A", "A"), A1 = c("G", "G"), + stringsAsFactors = FALSE) + ref <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", + stringsAsFactors = FALSE) + expect_warning( + res <- pecotmr:::.matchRefPanel(target, ref, removeDups = TRUE, + matchMinProp = 0), + "Removed 1 duplicate") + expect_equal(nrow(res$harmonizedData), 1L) +}) + +test_that(".matchRefPanel: errors when duplicated variant IDs remain (removeDups = FALSE)", { + target <- data.frame(chrom = c(1, 1), pos = c(100, 100), + A2 = c("A", "A"), A1 = c("G", "G"), + stringsAsFactors = FALSE) + ref <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.matchRefPanel(target, ref, removeDups = FALSE, matchMinProp = 0), + "Duplicated variant IDs remain") +}) + +test_that(".matchRefPanel: errors when too few variants match (matchMinProp)", { + target <- data.frame(chrom = 1, pos = 100, A2 = "A", A1 = "G", + stringsAsFactors = FALSE) + ref <- data.frame(chrom = rep(1, 10), pos = seq(100, 1000, 100), + A2 = rep("A", 10), A1 = rep("G", 10), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.matchRefPanel(target, ref, matchMinProp = 0.9), + "Not enough variants") +}) + +# =========================================================================== +# addDupsBackDentist: dimension-mismatch stops +# =========================================================================== + +test_that("addDupsBackDentist stops when dentistOutput nrow != count of non-duplicates", { + dentistOutput <- data.frame( + original_z = c(1, 2, 3), imputed_z = c(1, 2, 3), + iter_to_correct = c(1, 1, 1), rsq = c(0.5, 0.5, 0.5), + z_diff = c(0.1, 0.1, 0.1)) + # 4 non-duplicate markers but only 3 rows in dentistOutput. + findDupOutput <- list(dupBearer = c(-1, -1, -1, -1), sign = rep(1, 4)) + expect_error( + pecotmr:::addDupsBackDentist(rep(0, 4), dentistOutput, findDupOutput), + "does not match the occurrences") +}) + +test_that("addDupsBackDentist stops on inconsistent zScore / findDupOutput length", { + dentistOutput <- data.frame( + original_z = c(1, 2), imputed_z = c(1, 2), + iter_to_correct = c(1, 1), rsq = c(0.5, 0.5), z_diff = c(0.1, 0.1)) + findDupOutput <- list(dupBearer = c(-1, -1), sign = c(1, 1)) + # nrow(dentistOutput) == sum(dupBearer == -1) == 2, but zScore has length 3. + expect_error( + pecotmr:::addDupsBackDentist(rep(0, 3), dentistOutput, findDupOutput), + "inconsistent dimension") +}) + +# =========================================================================== +# slalom: non-matrix X coerced to matrix +# =========================================================================== + +test_that("slalom coerces a non-matrix X (data.frame) to a matrix", { + set.seed(11) + n_samples <- 80 + n_snps <- 6 + Xdf <- as.data.frame(matrix(sample(0:2, n_samples * n_snps, replace = TRUE), + nrow = n_samples, ncol = n_snps)) + z <- rnorm(n_snps) + result <- slalom(zScore = z, X = Xdf) + expect_equal(nrow(result$data), n_snps) +}) + +# =========================================================================== +# getSusieResult: trimmed fit is empty +# =========================================================================== + +test_that("getSusieResult returns NULL when the trimmed susie fit is empty", { + conData <- list(finemappingEntry = FineMappingEntry( + variantIds = c("1:100:A:G", "1:200:C:T"), + susieFit = list(), + topLoci = data.frame(variant_id = character(0), pip = numeric(0), + stringsAsFactors = FALSE))) + expect_null(getSusieResult(conData)) +}) + +# =========================================================================== +# autoDecision: high-correlation tagging branch is reached +# =========================================================================== + +test_that("autoDecision evaluates the high-corr tagging expression for non-top CS", { + # A non-top CS with a small p-value forces evaluation of the highCorrCols + # branch (the `..col` accessor errors without data.table; we only need the + # line to be exercised). + df <- data.frame(cs_name = c("L1", "L2"), + top_z = c(5.0, 3.0), + p_value = c(1e-10, 1e-10), + stringsAsFactors = FALSE) + expect_error(suppressWarnings(autoDecision(df, highCorrCols = c("cs_corr_1")))) +}) + +# =========================================================================== +# raissSingleMatrix / raissSingleMatrixFromX uncovered branches +# =========================================================================== + +test_that("raissSingleMatrix coerces a data.frame LD matrix and is verbose", { + set.seed(99) + p <- 8 + ref_panel <- data.frame(chrom = rep(1, p), pos = seq(10, p * 10, 10), + variant_id = paste0("rs", 1:p), + A1 = rep("A", p), A2 = rep("G", p), + stringsAsFactors = FALSE) + known_idx <- c(1, 3, 5) + known_zscores <- data.frame(chrom = rep(1, 3), pos = ref_panel$pos[known_idx], + variant_id = ref_panel$variant_id[known_idx], + A1 = rep("A", 3), A2 = rep("G", 3), + z = rnorm(3), stringsAsFactors = FALSE) + ldDf <- as.data.frame(diag(p)) + res <- pecotmr:::raissSingleMatrix(ref_panel, known_zscores, ldDf, + r2Threshold = 0, minimumLd = 0, + verbose = FALSE) + expect_true(is.list(res)) + expect_equal(nrow(res$resultNofilter), p) +}) + +test_that("raissSingleMatrix emits verbose 'No known variants' message", { + ref_panel <- data.frame(chrom = rep(1, 4), pos = seq(10, 40, 10), + variant_id = paste0("rs", 1:4), + A1 = rep("A", 4), A2 = rep("G", 4), + stringsAsFactors = FALSE) + known_zscores <- data.frame(chrom = rep(1, 2), pos = c(500, 600), + variant_id = c("ghost1", "ghost2"), + A1 = rep("A", 2), A2 = rep("G", 2), + z = rnorm(2), stringsAsFactors = FALSE) + expect_message( + res <- pecotmr:::raissSingleMatrix(ref_panel, known_zscores, diag(4), + verbose = TRUE), + "No known variants") + expect_null(res) +}) + +test_that("raissSingleMatrix emits verbose 'No unknown variants' message", { + ref_panel <- data.frame(chrom = rep(1, 4), pos = seq(10, 40, 10), + variant_id = paste0("rs", 1:4), + A1 = rep("A", 4), A2 = rep("G", 4), + stringsAsFactors = FALSE) + known_zscores <- data.frame(chrom = rep(1, 4), pos = seq(10, 40, 10), + variant_id = paste0("rs", 1:4), + A1 = rep("A", 4), A2 = rep("G", 4), + z = rnorm(4), stringsAsFactors = FALSE) + expect_message( + res <- pecotmr:::raissSingleMatrix(ref_panel, known_zscores, diag(4), + verbose = TRUE), + "No unknown variants") + expect_equal(res$resultNofilter, known_zscores) +}) + +test_that("raissSingleMatrixFromX stops on unsorted positions", { + ref_panel <- data.frame(chrom = rep(1, 4), pos = c(40, 30, 20, 10), + variant_id = paste0("rs", 1:4), + A1 = rep("A", 4), A2 = rep("G", 4), + stringsAsFactors = FALSE) + known_zscores <- data.frame(chrom = 1, pos = 10, variant_id = "rs4", + A1 = "A", A2 = "G", z = 1, stringsAsFactors = FALSE) + X <- matrix(rnorm(20 * 4), nrow = 20) + colnames(X) <- ref_panel$variant_id + expect_error( + pecotmr:::raissSingleMatrixFromX(ref_panel, known_zscores, X, verbose = FALSE), + "increasing order of pos") +}) + +test_that("raissSingleMatrixFromX emits verbose no-known / no-unknown messages", { + set.seed(7) + p <- 5 + ref_panel <- data.frame(chrom = rep(1, p), pos = seq(10, p * 10, 10), + variant_id = paste0("rs", 1:p), + A1 = rep("A", p), A2 = rep("G", p), + stringsAsFactors = FALSE) + X <- scale(matrix(sample(0:2, 40 * p, replace = TRUE), nrow = 40)) + X[is.na(X)] <- 0 + colnames(X) <- ref_panel$variant_id + ghost <- data.frame(chrom = rep(1, 2), pos = c(900, 1000), + variant_id = c("g1", "g2"), A1 = rep("A", 2), + A2 = rep("G", 2), z = rnorm(2), stringsAsFactors = FALSE) + expect_message( + res_no_known <- pecotmr:::raissSingleMatrixFromX(ref_panel, ghost, X, + verbose = TRUE), + "No known variants") + expect_null(res_no_known) + + all_known <- data.frame(chrom = rep(1, p), pos = seq(10, p * 10, 10), + variant_id = paste0("rs", 1:p), A1 = rep("A", p), + A2 = rep("G", p), z = rnorm(p), stringsAsFactors = FALSE) + expect_message( + res_no_unknown <- pecotmr:::raissSingleMatrixFromX(ref_panel, all_known, X, + verbose = TRUE), + "No unknown variants") + expect_equal(res_no_unknown$resultNofilter, all_known) +}) + +# =========================================================================== +# raiss: genotypeMatrix dispatch verbose / error branches +# =========================================================================== + +test_that("raiss genotypeMatrix path: single-matrix, list, all-fail, and bad-type", { + data <- generate_X_test_data(n = 60, p = 20, n_known = 10, seed = 3) + + # Single matrix, verbose -> "Processing genotype matrix via SVD..." + expect_message( + res_single <- raiss(data$ref_panel, data$known_zscores, + genotypeMatrix = data$X, r2Threshold = 0, + minimumLd = 0, verbose = TRUE), + "Processing genotype matrix") + expect_equal(nrow(res_single$resultNofilter), nrow(data$ref_panel)) + + # List of (one) matrix, verbose -> "Processing multiple genotype matrix blocks" + expect_message( + res_list <- raiss(data$ref_panel, data$known_zscores, + genotypeMatrix = list(data$X), r2Threshold = 0, + minimumLd = 0, verbose = TRUE), + "Processing multiple genotype matrix blocks") + expect_true(is.list(res_list)) + + # List where every block fails (foreign known variants) -> NULL + message + ghost <- data.frame(chrom = 1, pos = 99999, variant_id = "zzz", + A1 = "A", A2 = "G", z = 1, stringsAsFactors = FALSE) + expect_message( + res_fail <- raiss(data$ref_panel, ghost, + genotypeMatrix = list(data$X[, 1:10, drop = FALSE], + data$X[, 11:20, drop = FALSE]), + verbose = TRUE), + "No blocks could be processed") + expect_null(res_fail) + + # Neither matrix nor list -> hard error + expect_error( + raiss(data$ref_panel, data$known_zscores, genotypeMatrix = 42), + "must be a matrix or a list") +}) + +# =========================================================================== +# raiss: multi-LD-block verbose / merge / error branches +# =========================================================================== + +# Build a 2-block LD list whose shared boundary variant is IMPUTED (not known) +# in both blocks, so its raissR2 is a finite value on both sides and the +# boundary-merge R2 comparison branches are exercised. +ssqcOverlapImputedBlocks <- function(seed = 5) { + set.seed(seed) + vid <- paste0("v", 1:8) + pos <- seq(10, 80, by = 10) + ref_panel <- data.frame(chrom = rep(1, 8), pos = pos, variant_id = vid, + A1 = rep("A", 8), A2 = rep("G", 8), + stringsAsFactors = FALSE) + # v4 (the boundary) is left out of the known set so it is imputed in both blocks. + known_idx <- c(1, 2, 3, 5, 6, 7, 8) + known_zscores <- data.frame(chrom = rep(1, length(known_idx)), + pos = pos[known_idx], variant_id = vid[known_idx], + A1 = rep("A", length(known_idx)), + A2 = rep("G", length(known_idx)), + z = rnorm(length(known_idx)), + stringsAsFactors = FALSE) + mkBlock <- function(ids) { + nb <- length(ids) + m <- matrix(0, nb, nb) + for (a in 1:nb) for (b in 1:nb) m[a, b] <- if (a == b) 1 else 0.9^abs(a - b) + rownames(m) <- colnames(m) <- ids + m + } + block1_ids <- vid[1:4] + block2_ids <- vid[4:8] + variantIndices <- rbind( + data.frame(variant_id = block1_ids, blockId = 1L, stringsAsFactors = FALSE), + data.frame(variant_id = block2_ids, blockId = 2L, stringsAsFactors = FALSE)) + blockMetadata <- data.frame(blockId = c(1L, 2L), chrom = c(1, 1), + size = c(4L, 5L), startIdx = c(1L, 4L), + endIdx = c(4L, 8L), stringsAsFactors = FALSE) + ldBlocks <- list( + ldMatrices = list(mkBlock(block1_ids), mkBlock(block2_ids)), + variantIndices = variantIndices, + blockMetadata = blockMetadata, + ldVariants = vid) + list(ref_panel = ref_panel, known_zscores = known_zscores, + LD_matrix_blocks = ldBlocks) +} + +test_that("raiss multi-LD-block: verbose messages and an imputed boundary merge", { + td <- ssqcOverlapImputedBlocks(seed = 5) + expect_message( + res <- raiss(td$ref_panel, td$known_zscores, + ldMatrix = td$LD_matrix_blocks, lamb = 0.01, rcond = 0.01, + r2Threshold = 0, minimumLd = 0, verbose = TRUE), + "Processing multiple LD blocks") + expect_true(is.list(res)) + # Boundary variant v4 appears exactly once after the merge. + expect_equal(sum(res$resultNofilter$variant_id == "v4"), 1L) +}) + +test_that("raiss multi-LD-block: stops on a block dimension mismatch", { + td <- generate_block_diagonal_test_data(seed = 2, + block_structure = "non_overlapping", + n_variants = 30) + blocks <- td$LD_matrix_blocks + # Shrink block 1's matrix so it no longer matches its variant count. + blocks$ldMatrices[[1]] <- blocks$ldMatrices[[1]][-1, -1, drop = FALSE] + expect_error( + raiss(td$ref_panel, td$known_zscores, ldMatrix = blocks, verbose = FALSE), + "LD matrix dimension does not match") +}) + +test_that("raiss multi-LD-block: returns NULL when no block has known variants", { + td <- generate_block_diagonal_test_data(seed = 3, + block_structure = "non_overlapping", + n_variants = 30) + ghost <- data.frame(chrom = 1, pos = 99999, variant_id = "zzz", + A1 = "A", A2 = "G", z = 1, stringsAsFactors = FALSE) + expect_message( + res <- raiss(td$ref_panel, ghost, ldMatrix = td$LD_matrix_blocks, + verbose = TRUE), + "No blocks could be processed") + expect_null(res) +}) + +# =========================================================================== +# raissModel: batch = FALSE with condition-number reporting +# =========================================================================== + +test_that("raissModel batch = FALSE reports the condition number", { + zt <- c(1.2, 0.5) + sig_t <- matrix(c(1, 0.5, 0.5, 1), nrow = 2) + sig_i_t <- matrix(c(0.5, 0.2), nrow = 1) # single unknown -> 1 x 2 + res <- pecotmr:::raissModel(zt, sig_t, sig_i_t, batch = FALSE, + reportConditionNumber = TRUE) + expect_true(is.numeric(res$conditionNumber)) + expect_true(is.finite(res$conditionNumber)) + # checkInversion() returns all.equal()'s value: TRUE when the inverse + # reproduces sigT within tolerance, otherwise a character diff string. + expect_true(isTRUE(res$correctInversion) || is.character(res$correctInversion)) +}) + +# =========================================================================== +# krigingOutlierQc: non-square LD stop + variantIds defaulting to rownames +# =========================================================================== + +test_that("krigingOutlierQc requires a square LD matrix aligned to zScore", { + expect_error(krigingOutlierQc(c(1, 2, 3), diag(2), n = 100), + "square LD matrix") +}) + +test_that("krigingOutlierQc defaults variantIds to rownames(R)", { + skip_if_not("kriging_rss" %in% getNamespaceExports("susieR"), + "installed susieR has no kriging_rss") + m <- 6 + R <- matrix(0.6, m, m); diag(R) <- 1 + ids <- paste0("1:", seq_len(m) * 100, ":A:G") + rownames(R) <- colnames(R) <- ids + z <- rep(2, m) + kr <- krigingOutlierQc(z, R, n = 1000) # no variantIds passed + expect_equal(kr$diagnostics$variant_id, ids) +}) + +# =========================================================================== +# .safeSvd: all singular values below tolerance +# =========================================================================== + +test_that(".safeSvd stops when all singular values fall below tolerance", { + set.seed(1) + mat <- matrix(rnorm(20), nrow = 5, ncol = 4) + # tol >= 1 forces even the largest (ratio == 1) singular value below threshold. + expect_error(pecotmr:::.safeSvd(mat, tol = 2), + "below the tolerance threshold") +}) + +# =========================================================================== +# .applyContentFilters (MAF / FRQ, INFO, N-MAD filters + missing-column stops) +# =========================================================================== + +test_that(".applyContentFilters: MAF filter drops low-frequency variants", { + df <- data.frame(SNP = paste0("rs", 1:4), + MAF = c(0.30, 0.001, 0.20, 0.0005), + stringsAsFactors = FALSE) + out <- pecotmr:::.applyContentFilters(df, mafCutoff = 0.01, nCutoff = 0) + expect_equal(nrow(out$df), 2L) + expect_equal(out$audit$mafDropped, 2L) +}) + +test_that(".applyContentFilters: FRQ is normalized to MAF via min(af, 1 - af)", { + df <- data.frame(SNP = paste0("rs", 1:3), + FRQ = c(0.5, 0.995, 0.001), + stringsAsFactors = FALSE) + out <- pecotmr:::.applyContentFilters(df, mafCutoff = 0.01, nCutoff = 0) + # FRQ 0.995 -> MAF 0.005 (dropped); 0.001 dropped; 0.5 kept. + expect_equal(nrow(out$df), 1L) + expect_equal(out$audit$mafDropped, 2L) +}) + +test_that(".applyContentFilters: mafCutoff > 0 without MAF/FRQ column errors", { + df <- data.frame(SNP = paste0("rs", 1:3), stringsAsFactors = FALSE) + expect_error(pecotmr:::.applyContentFilters(df, mafCutoff = 0.01), + "requires a MAF or FRQ column") +}) + +test_that(".applyContentFilters: INFO filter drops low-INFO variants", { + df <- data.frame(SNP = paste0("rs", 1:4), + INFO = c(0.99, 0.10, 0.80, 0.30), + stringsAsFactors = FALSE) + out <- pecotmr:::.applyContentFilters(df, infoCutoff = 0.5, nCutoff = 0) + expect_equal(nrow(out$df), 2L) + expect_equal(out$audit$infoDropped, 2L) +}) + +test_that(".applyContentFilters: infoCutoff > 0 without INFO column errors", { + df <- data.frame(SNP = paste0("rs", 1:3), stringsAsFactors = FALSE) + expect_error(pecotmr:::.applyContentFilters(df, infoCutoff = 0.5), + "requires an INFO column") +}) + +test_that(".applyContentFilters: N-outlier (MAD) filter drops extreme N", { + df <- data.frame(SNP = paste0("rs", 1:5), + N = c(1000, 1010, 1005, 995, 100000), + stringsAsFactors = FALSE) + out <- pecotmr:::.applyContentFilters(df, nCutoff = 3) + expect_equal(nrow(out$df), 4L) + expect_equal(out$audit$nDropped, 1L) +}) + +test_that(".applyContentFilters: NA N values are always dropped", { + df <- data.frame(SNP = paste0("rs", 1:4), + N = c(1000, NA, 1005, 1010), + stringsAsFactors = FALSE) + out <- pecotmr:::.applyContentFilters(df, nCutoff = 5) + expect_false(any(is.na(out$df$N))) + expect_equal(out$audit$nDropped, 1L) +}) + +# =========================================================================== +# .runEntrySummaryStatsQc / summaryStatsQc deep branches +# =========================================================================== + +test_that("summaryStatsQc: emit() uses the no-label form for an empty study id", { + # An empty study id resolves the per-entry label to NA, exercising the + # unlabeled emit() branch. + ss <- GwasSumStats(study = "", entry = list(.ssQ_makeEntryGr()), + genome = "hg19", ldSketch = .ssQ_makeHandle()) + msgs <- capture_messages(summaryStatsQc(ss, nCutoff = 0)) + joined <- paste(msgs, collapse = "") + expect_match(joined, "QC summary:") + # No bracketed label prefix appears on the rollup line. + expect_true(any(grepl("^QC summary:", msgs))) +}) + +test_that("summaryStatsQc: content (N) filter emits its 'kept N of M' message + rollup nCutoff segment", { + gr <- .ssQ_makeEntryGr() + S4Vectors::mcols(gr)$N <- c(1000L, 1010L, 1005L, 100000L) # last is an N outlier + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle()) + msgs <- capture_messages(res <- summaryStatsQc(ss, nCutoff = 3)) + joined <- paste(msgs, collapse = "") + expect_match(joined, "MAF/INFO/N filters kept") + expect_match(joined, "nCutoff 1") + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_equal(ea$contentFilters$nDropped, 1L) +}) + +test_that("summaryStatsQc: derives BETA/SE from Z+MAF+N and records the audit", { + gr <- GenomicRanges::GRanges(seqnames = rep("chr1", 4), + ranges = IRanges::IRanges(start = c(100L, 200L, 300L, 400L), width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("rs", 1:4), A1 = rep("A", 4), A2 = rep("G", 4), + Z = c(1, 2, 3, 4), N = rep(1000L, 4), MAF = c(0.2, 0.3, 0.4, 0.25)) + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle()) + res <- summaryStatsQc(ss, nCutoff = 0) + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_equal(ea$betaSeFromZ$nDerived, 4L) +}) + +test_that("summaryStatsQc: clamps tiny Z-derived P values and accumulates the audit", { + gr <- .ssQ_makeEntryGr() + S4Vectors::mcols(gr)$Z <- c(50, 1, 2, 3) # |Z| = 50 underflows P to 0 + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle()) + res <- summaryStatsQc(ss, nCutoff = 0) + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_true(!is.null(ea$sanityChecks$smallPClamped)) + expect_gte(ea$sanityChecks$smallPClamped, 1L) +}) + +test_that("summaryStatsQc: early-exits when fewer than two variants survive pre-harmonization QC", { + gr <- .ssQ_makeEntryGr(snp_ids = "rs1", positions = 100L) + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle()) + res <- summaryStatsQc(ss, nCutoff = 0) + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_match(ea$earlyExit, "fewer than two variants") + expect_equal(length(res$entry[[1L]]), 1L) +}) + +test_that("summaryStatsQc: kriging prefilter runs, records outliers, and adds the rollup segment", { + skip_if_not("kriging_rss" %in% getNamespaceExports("susieR"), + "installed susieR has no kriging_rss") + ss <- GwasSumStats( + study = "g1", + entry = list(.ssQ_makeEntryGr( + snp_ids = paste0("rs", 1:8), + positions = seq(100L, by = 100L, length.out = 8L))), + genome = "hg19", + ldSketch = .ssQ_makeHandleVid(snp_n = 8L)) + local_mocked_bindings( + extractBlockGenotypes = .ssQ_mockExtractor(), + .package = "pecotmr") + msgs <- capture_messages( + res <- summaryStatsQc(ss, alleleFlipKriging = TRUE, pipCutoffToSkip = 0, + nCutoff = 0)) + joined <- paste(msgs, collapse = "") + expect_match(joined, "kriging prefilter removed") + expect_match(joined, "kriging ") + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_true("krigingOutliersDropped" %in% names(ea)) +}) + +test_that("summaryStatsQc: impute = TRUE assembles BETA/SE/N and median-fills missing N", { + gr <- GenomicRanges::GRanges(seqnames = rep("chr1", 4), + ranges = IRanges::IRanges(start = c(100L, 200L, 300L, 400L), width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("rs", 1:4), A1 = rep("A", 4), A2 = rep("G", 4), + Z = c(1, 2, 3, 4), N = rep(1000L, 4), + BETA = c(0.1, 0.2, 0.3, 0.4), SE = rep(0.1, 4)) + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle(snp_n = 8L, n_samples = 60L)) + local_mocked_bindings( + extractBlockGenotypes = .ssQ_mockExtractor(), + raiss = function(refPanel, knownZscores, genotypeMatrix, ...) { + added <- refPanel[refPanel$variant_id %in% c("rs5", "rs6"), , drop = FALSE] + added$z <- c(1.5, -2.0) + added$n <- c(1000, NA) # NA triggers the median fill + added$beta <- c(0.11, -0.22) + added$se <- c(0.05, 0.06) + list(resultFilter = rbind(knownZscores, added)) + }, + .package = "pecotmr") + res <- summaryStatsQc(ss, impute = TRUE) + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_equal(ea$raissTotalVariants, 6L) + expect_equal(ea$raissImputedVariants, 2L) + mc <- S4Vectors::mcols(res$entry[[1L]]) + expect_true(all(c("BETA", "SE", "N") %in% colnames(mc))) + expect_false(any(is.na(mc$N))) # median-filled +}) + +test_that("summaryStatsQc: per-entry rollup enumerates every removed-step segment", { + # One entry that trips each sanity / content / harmonization drop so the + # rollup segment strings are all assembled. + df <- data.frame( + seqnames = c("chr1", "chr1", "chr1", "chr1", "chr99", "chr1", "chr1", + "chr1", "chr1", "chr1", "chr1", "chr1"), + pos = c(100L, 200L, 300L, 9999L, 100L, 500L, 600L, 700L, 800L, + 150L, 250L, 350L), + SNP = c("rs1", "rs2", "rs3", "rsOff", "rsChr", "rsMiss", "rsBadP", + "rsZero", "rsBadSE", "rsMaf", "rsInfo", "rsN"), + A1 = c("A", "A", "A", "A", "A", NA, "A", "A", "A", "A", "A", "A"), + A2 = rep("G", 12), + Z = c(5, 4, 3, 6, 2, 2, 2, 2, 2, 2, 2, 2), + N = c(1000, 1001, 1002, 1000, 1000, 1000, 1000, 1000, 1000, 1000, + 1000, 100000), + MAF = c(0.30, 0.30, 0.30, 0.30, 0.30, 0.30, 0.30, 0.30, 0.30, 0.001, + 0.30, 0.30), + INFO = c(0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, + 0.10, 0.95), + P = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.5, 0.5, 0.5, 0.5, 0.5, 0.5), + BETA = c(0.5, 0.4, 0.3, 0.6, 0.2, 0.2, 0.2, 0.0, 0.2, 0.2, 0.2, 0.2), + SE = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, -1.0, 0.1, 0.1, 0.1), + stringsAsFactors = FALSE) + gr <- GenomicRanges::GRanges( + seqnames = df$seqnames, + ranges = IRanges::IRanges(start = df$pos, width = 1L)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = df$SNP, A1 = df$A1, A2 = df$A2, Z = df$Z, N = df$N, + MAF = df$MAF, INFO = df$INFO, P = df$P, BETA = df$BETA, SE = df$SE) + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandle()) + msgs <- capture_messages( + res <- summaryStatsQc(ss, mafCutoff = 0.01, infoCutoff = 0.5, nCutoff = 3)) + joined <- paste(msgs, collapse = "") + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_equal(ea$sanityChecks$nonstandardChrDropped, 1L) + expect_equal(ea$sanityChecks$missDataDropped, 1L) + expect_equal(ea$sanityChecks$pOutOfRangeDropped, 1L) + expect_equal(ea$sanityChecks$zeroEffectDropped, 1L) + expect_equal(ea$sanityChecks$nonpositiveSeDropped, 1L) + expect_equal(ea$contentFilters$mafDropped, 1L) + expect_equal(ea$contentFilters$infoDropped, 1L) + expect_equal(ea$contentFilters$nDropped, 1L) + # The rollup line names each removed step. + for (seg in c("nonstdChr", "missData", "badP", "zeroEffect", "badSE", + "maf ", "info ", "nCutoff", "harmonization")) { + expect_match(joined, seg, fixed = TRUE) + } +}) + +test_that("dentistSingleWindow: rsq-warning capture path on a near-singular LD block", { + # Highly collinear LD with a moderately strong signal can push the C++ + # adjusted rsq_eigen above 1, exercising the withCallingHandlers capture / + # summary-warning path. The call is wrapped to stay robust either way. + set.seed(123) + m <- 60 + R <- matrix(0.999, m, m); diag(R) <- 1 + z <- as.numeric(R %*% rnorm(m, sd = 3)) + res <- suppressWarnings( + dentistSingleWindow(z, R = R, nSample = 50, propSVD = 0.95, + duprThreshold = 1.0)) + expect_equal(nrow(res), m) +}) + +# =========================================================================== +# Final-coverage closeouts: a handful of branch/error-handler lines that the +# existing suite leaves untouched. +# =========================================================================== + +test_that(".matchRefPanel sanitizes an empty-named target column to 'unnamed_N'", { + # 5 columns whose first four are positional (NOT literally named + # chrom/pos/A2/A1), so .matchRefPanel routes through variantIdToDf -- which + # preserves the extra column -- rather than the select()-based path. The 5th + # column has an empty name, so the joined matchResult carries an empty-named + # column and sanitizeNames() rewrites it to "unnamed_1" (sumstatsQc.R:83). + target <- data.frame(CHR = c(1, 1, 1), POS = c(100, 200, 300), + ref = c("A", "C", "G"), alt = c("G", "T", "A"), + extra = c(0.1, 0.2, 0.3), stringsAsFactors = FALSE) + names(target)[5] <- "" + ref <- data.frame(chrom = c(1, 1, 1), pos = c(100, 200, 300), + A2 = c("A", "C", "G"), A1 = c("G", "T", "A"), + stringsAsFactors = FALSE) + res <- pecotmr:::.matchRefPanel(target, ref, matchMinProp = 0) + expect_equal(nrow(res$harmonizedData), 3L) + nm <- colnames(res$harmonizedData) + expect_false(any(nm == "" | is.na(nm))) # the empty name was repaired + expect_true(any(grepl("^unnamed_", nm))) # ... to an "unnamed_*" label +}) + +test_that("dentistSingleWindow captures cpp11 rsq>1 warnings and re-emits a summary", { + # Mock the cpp11 imputer so it (a) emits the inner "Adjusted rsq_eigen value + # exceeding 1" warning the handler captures + muffles (sumstatsQc.R:688-689) + # and (b) returns the documented raw columns so the rest of the function runs. + # The captured warning triggers the summary warning (sumstatsQc.R:704-705). + set.seed(1) + m <- 5 + R <- diag(m) + z <- c(1, 2, 3, 4, 5) + local_mocked_bindings( + dentistIterativeImpute = function(ldMatR, nSample, zScoreR, ...) { + warning("Adjusted rsq_eigen value exceeding 1: capped at 1.0") + n <- length(zScoreR) + list(originalZ = as.numeric(zScoreR), imputedZ = as.numeric(zScoreR) * 0.5, + rsq = rep(0.3, n), zDiff = rep(0.1, n), iterToCorrect = rep(1L, n)) + }, + .package = "pecotmr") + # Capture every warning (the <2000-variant note and the rsq summary) so we can + # assert on the rsq summary specifically. + seen <- character(0) + res <- withCallingHandlers( + dentistSingleWindow(z, R = R, nSample = 1000, duprThreshold = 1.0), + warning = function(w) { + seen <<- c(seen, conditionMessage(w)); invokeRestart("muffleWarning") + }) + expect_true(any(grepl("rsq_eigen values exceeded 1", seen))) # summary re-emitted + # The post-warning code (renames, outlier stat, z_diff drop) ran successfully. + expect_equal(nrow(res), m) + expect_equal(res$original_z, z) + expect_true(all(c("imputed_z", "rsq", "outlier_stat", "outlier") %in% colnames(res))) + expect_false("z_diff" %in% colnames(res)) +}) + +test_that("segmentByDist keeps the last window when its span clears the cutoff", { + # A single dense block spanning 1.25x the distance cutoff produces two windows + # whose final window is wide enough that adjustLastFn does NOT shrink it: the + # else branch returns the current startIdx unchanged (sumstatsQc.R:1095). + pos <- as.integer(round(seq(1, 1250000, length.out = 600))) + win <- pecotmr:::segmentByDist(pos, maxDist = 1000000, minDim = 500) + expect_true(is.data.frame(win)) + expect_gte(nrow(win), 2L) + expect_true(all(c("windowStartIdx", "windowEndIdx", "fillStartIdx", "fillEndIdx") + %in% colnames(win))) + # Window indices stay in range (end indices are 1-based exclusive). + expect_true(all(win$windowStartIdx >= 1L)) + expect_true(all(win$windowEndIdx <= length(pos) + 1L)) + # Fill regions tile every variant index exactly once. + covered <- integer(0) + for (k in seq_len(nrow(win))) + covered <- c(covered, win$fillStartIdx[k]:(win$fillEndIdx[k] - 1L)) + expect_equal(sort(unique(covered)), seq_along(pos)) +}) + +test_that("raiss multi-LD-block skips a NULL middle block and keeps the rest (line 1965)", { + td <- generate_block_diagonal_test_data(seed = 11, + block_structure = "non_overlapping", + n_variants = 30) + # Drop every known z-score in block 2 (var11..var20) so raissSingleMatrix + # returns NULL for it. Blocks 1 and 3 still succeed, so resultsList carries a + # NULL hole at index 2 and combineWithBoundaryCheck(accumulated, NULL) returns + # the accumulated result unchanged (sumstatsQc.R:1965). + kz <- dplyr::filter(td$known_zscores, !variant_id %in% paste0("var", 11:20)) + res <- raiss(td$ref_panel, kz, ldMatrix = td$LD_matrix_blocks, + lamb = 0.01, rcond = 0.01, r2Threshold = 0, minimumLd = 0, + verbose = FALSE) + expect_true(is.list(res)) + expect_false(is.null(res$resultNofilter)) + # Block 2 was skipped entirely, so none of its variants survive. + expect_false(any(paste0("var", 11:20) %in% res$resultNofilter$variant_id)) + # Blocks 1 and 3 (var1..10, var21..30) are present. + expect_true(all(c("var1", "var21") %in% res$resultNofilter$variant_id)) +}) + +test_that("summaryStatsQc kriging prefilter drops an LD-inconsistent variant (line 3063)", { + skip_if_not("kriging_rss" %in% getNamespaceExports("susieR"), + "installed susieR has no kriging_rss") + # All eight genotype columns load on one common factor (pairwise rho ~0.7), + # but rs4 carries a z-score (-15) grossly inconsistent with its strongly + # correlated neighbours. krigingOutlierQc flags it, so the prefilter actually + # drops a row (sumstatsQc.R:3063, the nKr > 0 branch). + corrExtractor <- function(handle, snpIdx, meanImpute = TRUE) { + set.seed(42) + n <- length(handle@sampleIds) + k <- length(snpIdx) + f <- rnorm(n) # shared latent factor + M <- sapply(seq_len(k), + function(j) sqrt(0.7) * f + sqrt(0.3) * rnorm(n)) + rr <- GenomicRanges::GRanges( + seqnames = paste0("chr", handle@snpInfo$CHR[snpIdx]), + ranges = IRanges::IRanges(start = handle@snpInfo$BP[snpIdx], width = 1L)) + S4Vectors::mcols(rr) <- S4Vectors::DataFrame( + SNP = handle@snpInfo$SNP[snpIdx], A1 = handle@snpInfo$A1[snpIdx], + A2 = handle@snpInfo$A2[snpIdx]) + dosage <- t(M) + rownames(dosage) <- handle@snpInfo$SNP[snpIdx] + colnames(dosage) <- handle@sampleIds + SummarizedExperiment::SummarizedExperiment( + assays = list(dosage = dosage), rowRanges = rr, + colData = S4Vectors::DataFrame(sampleId = handle@sampleIds, + row.names = handle@sampleIds)) + } + gr <- .ssQ_makeEntryGr(snp_ids = paste0("rs", 1:8), + positions = seq(100L, by = 100L, length.out = 8L)) + mc <- S4Vectors::mcols(gr) + mc$Z <- c(4, 4, 4, -15, 4, 4, 4, 4) + mc$N <- rep(3000L, 8) + S4Vectors::mcols(gr) <- mc + ss <- GwasSumStats(study = "g1", entry = list(gr), genome = "hg19", + ldSketch = .ssQ_makeHandleVid(snp_n = 8L, n_samples = 300L)) + local_mocked_bindings(extractBlockGenotypes = corrExtractor, .package = "pecotmr") + res <- suppressWarnings( + summaryStatsQc(ss, alleleFlipKriging = TRUE, pipCutoffToSkip = 0, nCutoff = 0)) + ea <- getQcInfo(res)$entryAudit[[1L]] + expect_gte(ea$krigingOutliersDropped, 1L) # at least one dropped + expect_equal(length(res$entry[[1L]]), 8L - ea$krigingOutliersDropped) +}) diff --git a/tests/testthat/test_twasWeights.R b/tests/testthat/test_twasWeights.R index d3e6c8a5..94ce8ab7 100644 --- a/tests/testthat/test_twasWeights.R +++ b/tests/testthat/test_twasWeights.R @@ -765,313 +765,15 @@ test_that("twasWeightsCv: multivariate Y with multiple columns", { # # =========================================================================== -test_that("twasWeightsPipeline: returns list with expected structure (mocked)", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - susie = mock_susie, - enetWeights = function(X, y, ...) rep(0.1, ncol(X)), - lassoWeights = function(X, y, ...) rep(0.2, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix(d$X, y_vec, susieFit = NULL, cvFolds = 0, - estimatePi = FALSE) - - expect_true(is.list(result)) - expect_true("twasWeights" %in% names(result)) - expect_true("twasPredictions" %in% names(result)) - expect_true("totalTimeElapsed" %in% names(result)) - # Verify that mock values appear in the weight matrices - enet_w <- .weightsByMethod(result$twasWeights, "enet") - expect_true(all(enet_w[, 1] == 0.1)) - lasso_w <- .weightsByMethod(result$twasWeights, "lasso") - expect_true(all(lasso_w[, 1] == 0.2)) - # The number of weight methods should equal the 10 default methods - expect_equal(length(getMethodNames(result$twasWeights)), 10) -}) - -test_that("twasWeightsPipeline: twasWeights contains all default methods", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - susie = mock_susie, - enetWeights = function(X, y, ...) rep(0.1, ncol(X)), - lassoWeights = function(X, y, ...) rep(0.2, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - result <- pecotmr:::.twasWeightsPipelineMatrix(d$X, y_vec, susieFit = NULL, cvFolds = 0, - estimatePi = FALSE) - expected_methods <- c( - "enet", "lasso", "bayes_r", - "bayes_c", "mrash", "mcp", - "scad", "l0learn", "susie", - "susie_inf" - ) - expect_true(all(expected_methods %in% getMethodNames(result$twasWeights))) -}) - -test_that("twasWeightsPipeline: stores ensemble weights when ensemble is fitted", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - cv_perf <- matrix(NA_real_, nrow = 1, ncol = 6) - colnames(cv_perf) <- c("corr", "rsq", "adj_rsq", "pval", "RMSE", "MAE") - rownames(cv_perf) <- "outcome_1" - cv_perf[1, "rsq"] <- 0.5 - - local_mocked_bindings( - enetWeights = function(X, y, ...) rep(0.1, ncol(X)), - lassoWeights = function(X, y, ...) rep(0.2, ncol(X)), - twasWeightsCv = function(X, Y, ...) { - list( - prediction = list( - enetPredicted = matrix(as.numeric(Y), ncol = 1, dimnames = list(rownames(X), "outcome_1")), - lassoPredicted = matrix(as.numeric(Y), ncol = 1, dimnames = list(rownames(X), "outcome_1")) - ), - performance = list( - enetPerformance = cv_perf, - lassoPerformance = cv_perf - ) - ) - }, - ensembleWeights = function(cvResults, Y, twasWeightList, ...) { - list( - methodCoef = c(enet = 0.5, lasso = 0.5), - ensembleTwasWeights = (twasWeightList$enet_weights + twasWeightList$lasso_weights) / 2 - ) - } - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, - weightMethods = list(enetWeights = list(), lassoWeights = list()), - cvFolds = 2, - ensemble = TRUE, - ensembleR2Threshold = 0, - estimatePi = FALSE - ) - - expect_true("ensemble" %in% getMethodNames(result$twasWeights)) - expect_true("ensemble_predicted" %in% names(result$twasPredictions)) - expect_true("ensemble" %in% names(result)) -}) - -test_that("twasWeightsPipeline: predictions have _predicted suffix", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - susie = mock_susie, - enetWeights = function(X, y, ...) rep(0, ncol(X)), - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix(d$X, y_vec, susieFit = NULL, cvFolds = 0, - estimatePi = FALSE) - - expected_pred_names <- c( - "enet_predicted", "lasso_predicted", "bayes_r_predicted", - "bayes_c_predicted", "mrash_predicted", "mcp_predicted", - "scad_predicted", "l0learn_predicted", "susie_predicted", - "susie_inf_predicted" - ) - expect_true(all(expected_pred_names %in% names(result$twasPredictions))) -}) - -test_that("twasWeightsPipeline: cv_folds=0 skips cross-validation", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - local_mocked_bindings( - susie = mock_susie, - enetWeights = function(X, y, ...) rep(0, ncol(X)), - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix(d$X, y_vec, susieFit = NULL, cvFolds = 0, - estimatePi = FALSE) - - expect_false("twasCvResult" %in% names(result)) - # All mock weights were zero, so all predictions should be zero - for (pred_name in names(result$twasPredictions)) { - expect_true(all(result$twasPredictions[[pred_name]] == 0), - info = paste("Non-zero prediction in", pred_name)) - } - # Weight dimensions should match ncol(X) - for (w_name in getMethodNames(result$twasWeights)) { - expect_equal(nrow(.weightsByMethod(result$twasWeights, w_name)), ncol(d$X), - info = paste("Wrong nrow for", w_name)) - } -}) - -test_that("twasWeightsPipeline: custom weight_methods are respected", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - lassoWeights = function(X, y, ...) rep(1, ncol(X)), - enetWeights = function(X, y, ...) rep(2, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, susieFit = NULL, cvFolds = 0, - weightMethods = list(lassoWeights = list(), enetWeights = list()) - ) - - expect_equal(sort(getMethodNames(result$twasWeights)), sort(c("lasso", "enet"))) -}) - -test_that("twasWeightsPipeline: accepts 'fast_default' preset string", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - susie = mock_susie, - enetWeights = function(X, y, ...) rep(0, ncol(X)), - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, susieFit = NULL, cvFolds = 0, - weightMethods = "fast_default" - ) - - expected_methods <- c("susie", "susie_inf", "mrash", - "enet", "lasso", "mcp", - "scad", "l0learn") - expect_equal(sort(getMethodNames(result$twasWeights)), sort(expected_methods)) -}) - -test_that("twasWeightsPipeline: accepts custom short-name vector", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - lassoWeights = function(X, y, ...) rep(1, ncol(X)), - enetWeights = function(X, y, ...) rep(2, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, susieFit = NULL, cvFolds = 0, - weightMethods = c("lasso", "enet") - ) - - expect_equal(sort(getMethodNames(result$twasWeights)), sort(c("lasso", "enet"))) -}) - -test_that("twasWeightsPipeline: with fitted_models stores SuSiE intermediates", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - fake_susie <- make_fake_susie_fit(p = 10, L = 5) - local_mocked_bindings( - enetWeights = function(X, y, ...) rep(0, ncol(X)), - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieWeights = function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) rep(0, ncol(X)) - ) - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, - fittedModels = list(susie = fake_susie), - cvFolds = 0, - estimatePi = FALSE - ) - expect_true("susieWeightsIntermediate" %in% names(result)) - expect_true("mu" %in% names(result$susieWeightsIntermediate)) -}) -test_that("twasWeightsPipeline: fitted_models are injected into SuSiE-family weights", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - fake_susie <- make_fake_susie_fit(p = 10, L = 5) - fake_susie_inf <- make_fake_susie_fit(p = 10, L = 5, inf = TRUE) - susie_received_fit <- FALSE - susie_inf_received_fit <- FALSE - local_mocked_bindings( - enetWeights = function(X, y, ...) rep(0, ncol(X)), - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - bayesRWeights = function(X, y, ...) rep(0, ncol(X)), - bayesCWeights = function(X, y, ...) rep(0, ncol(X)), - mrashWeights = function(X, y, ...) rep(0, ncol(X)), - mcpWeights = function(X, y, ...) rep(0, ncol(X)), - scadWeights = function(X, y, ...) rep(0, ncol(X)), - l0learnWeights =function(X, y, ...) rep(0, ncol(X)), - susieInfWeights = function(X, y, ...) { - args <- list(...) - if (!is.null(args$susieInfFit) && "susieInf" %in% class(args$susieInfFit)) { - susie_inf_received_fit <<- TRUE - } - rep(0, ncol(X)) - }, - susieWeights = function(X, y, ...) { - args <- list(...) - if (!is.null(args$susieFit) && "susie" %in% class(args$susieFit)) { - susie_received_fit <<- TRUE - } - rep(0, ncol(X)) - } - ) - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, - fittedModels = list(susie = fake_susie, susieInf = fake_susie_inf), - cvFolds = 0, - estimatePi = FALSE - ) - expect_true(susie_received_fit) - expect_true(susie_inf_received_fit) -}) test_that("twasWeights: SuSiE-inf is fitted before and initializes ordinary SuSiE", { d <- make_data(n = 50, p = 10) @@ -1112,26 +814,6 @@ test_that("twasWeights: SuSiE-inf is fitted before and initializes ordinary SuSi expect_equal(susie_calls[[2]]$L_greedy, 5) }) -test_that("twasWeightsPipeline: weight dimensions match input", { - d <- make_data(n = 50, p = 10) - y_vec <- as.numeric(d$Y) - - local_mocked_bindings( - lassoWeights = function(X, y, ...) rep(0.5, ncol(X)), - enetWeights = function(X, y, ...) rep(0.3, ncol(X)) - ) - - result <- pecotmr:::.twasWeightsPipelineMatrix( - d$X, y_vec, susieFit = NULL, cvFolds = 0, - weightMethods = list(lassoWeights = list(), enetWeights = list()) - ) - - for (method_name in getMethodNames(result$twasWeights)) { - w <- .weightsByMethod(result$twasWeights, method_name) - expect_equal(nrow(w), ncol(d$X)) - expect_equal(ncol(w), 1) - } -}) # =========================================================================== # twasWeightsCv: extra split_data / sample-name / variant-selection branches @@ -1249,55 +931,19 @@ test_that("twasWeightsCv: multivariate cv_args data_driven_prior_matrices_cv is weightMethods = list(mrmashWeights = list()), data_driven_prior_matrices_cv = prior_cv ) - # mrmashWeights mock should have been called and received the per-fold prior matrix + # mrmashWeights mock should have been called and received the per-fold prior + # matrix under the camelCase name that actually binds mrmashWrapper's + # `dataDrivenPriorMatrices` argument (the snake_case form was a latent no-op). expect_true(length(captured_args) >= 1) expect_true(any(vapply(captured_args, function(a) - "data_driven_prior_matrices" %in% names(a), logical(1)))) + "dataDrivenPriorMatrices" %in% names(a), logical(1)))) }) # =========================================================================== # twasWeightsPipeline: removed_methods warning + max_cv_variants subsampling # =========================================================================== -test_that("twasWeightsPipeline: warns when methods are removed because all weights are zero", { - d <- make_data(n = 30, p = 6) - local_mocked_bindings( - lassoWeights = function(X, y, ...) rep(0, ncol(X)), - enetWeights = function(X, y, ...) rep(0, ncol(X)) - ) - set.seed(42) - expect_warning( - suppressMessages(pecotmr:::.twasWeightsPipelineMatrix( - d$X, d$Y, susieFit = NULL, cvFolds = 2, - weightMethods = list(lassoWeights = list(), enetWeights = list()) - )), - "are removed from CV because all their weights are zeros" - ) -}) -test_that("twasWeightsPipeline: max_cv_variants subsamples colnames of X", { - d <- make_data(n = 30, p = 20) - captured_keep <- NULL - local_mocked_bindings( - lassoWeights = function(X, y, ...) { - w <- rep(0, ncol(X)); w[1] <- 0.5; w - }, - twasWeightsCv = function(X, Y, fold, samplePartitions, weightMethods, - maxNumVariants, numThreads, variantsToKeep, ...) { - captured_keep <<- variantsToKeep - list(samplePartition = data.frame(Sample = rownames(X), Fold = 1), - prediction = list(), performance = list(), timeElapsed = 0) - } - ) - set.seed(42) - suppressMessages(suppressWarnings(pecotmr:::.twasWeightsPipelineMatrix( - d$X, d$Y, susieFit = NULL, cvFolds = 2, - weightMethods = list(lassoWeights = list()), - maxCvVariants = 5 - ))) - expect_equal(length(captured_keep), 5) - expect_true(all(captured_keep %in% colnames(d$X))) -}) # =========================================================================== # twasWeights: dim-fix branch when nrow(weights_matrix) != length(valid_columns) @@ -1477,19 +1123,21 @@ test_that("TwasWeights: rejects duplicate 4-tuples", { test_that("TwasWeights: joint columns work the same as on the FMR class", { e <- .sc_makeTwasWeightsEntry() + # Univariate lasso at c1 + the c1 slice of an mr.mash joint over (c1, c2). tw <- TwasWeights( study = c("s1", "s1"), - context = c("c1", "joint"), + context = c("c1", "c1"), trait = c("t1", "t1"), method = c("lasso", "mrmash"), entry = list(e, e), jointContexts = c(NA_character_, "c1;c2")) expect_true("jointContexts" %in% names(tw)) expect_identical(tw$jointContexts, c(NA_character_, "c1;c2")) - # uniqueness: distinct jointContexts -> distinct rows + # uniqueness: same (s1, c1, t1, mrmash) tuple from two joint fits over + # (c1, c2) and (c1, c3) -> distinct rows via jointContexts. tw2 <- TwasWeights( study = c("s1", "s1"), - context = c("joint", "joint"), + context = c("c1", "c1"), trait = c("t1", "t1"), method = c("mrmash", "mrmash"), entry = list(e, e), @@ -1545,4 +1193,431 @@ test_that("show.TwasWeights reports ldSketch when present", { expect_true(any(grepl("LD sketch: gds @ /tmp/test.gds", out))) }) +# =========================================================================== +# +# .normalizeCvFolds: fold-spec normalization (direct internal calls) +# +# =========================================================================== + +test_that(".normalizeCvFolds: list cvFolds + samplePartition are mutually exclusive", { + expect_error( + pecotmr:::.normalizeCvFolds( + cvFolds = list(c("s1", "s2"), c("s3", "s4")), + samplePartition = data.frame(Sample = "s1", Fold = 1)), + "not both" + ) +}) + +test_that(".normalizeCvFolds: samplePartition must have Sample and Fold columns", { + expect_error( + pecotmr:::.normalizeCvFolds(samplePartition = data.frame(a = 1, b = 2)), + "must have columns" + ) +}) + +test_that(".normalizeCvFolds: a sample assigned to >1 fold errors", { + sp <- data.frame(Sample = c("s1", "s1", "s2"), Fold = c(1, 2, 2), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.normalizeCvFolds(samplePartition = sp), + "more than one fold" + ) +}) + +test_that(".normalizeCvFolds: unknown sample (vs sampleNames) errors", { + sp <- data.frame(Sample = c("s1", "s99"), Fold = c(1, 2), + stringsAsFactors = FALSE) + expect_error( + pecotmr:::.normalizeCvFolds(samplePartition = sp, + sampleNames = c("s1", "s2")), + "unknown sample" + ) +}) + +test_that(".normalizeCvFolds: uncovered samples error", { + sp <- data.frame(Sample = "s1", Fold = 1, stringsAsFactors = FALSE) + expect_error( + pecotmr:::.normalizeCvFolds(samplePartition = sp, + sampleNames = c("s1", "s2", "s3")), + "does not cover" + ) +}) + +test_that(".normalizeCvFolds: valid samplePartition returns df + nFolds", { + sp <- data.frame(Sample = c("s1", "s2", "s3", "s4"), + Fold = c(1, 1, 2, 2), stringsAsFactors = FALSE) + res <- pecotmr:::.normalizeCvFolds(samplePartition = sp, + sampleNames = c("s1", "s2", "s3", "s4")) + expect_equal(res$nFolds, 2L) + expect_equal(nrow(res$samplePartition), 4L) + expect_setequal(unique(res$samplePartition$Fold), c(1, 2)) +}) + +test_that(".normalizeCvFolds: list-form requires at least 2 folds", { + expect_error( + pecotmr:::.normalizeCvFolds(cvFolds = list(c("s1", "s2"))), + "at least 2 folds" + ) +}) + +test_that(".normalizeCvFolds: numeric fold ids require sampleNames", { + expect_error( + pecotmr:::.normalizeCvFolds(cvFolds = list(c(1, 2), c(3, 4))), + "Numeric fold vectors require" + ) +}) + +test_that(".normalizeCvFolds: numeric fold ids out of range error", { + expect_error( + pecotmr:::.normalizeCvFolds( + cvFolds = list(c(1, 2), c(3, 99)), + sampleNames = c("s1", "s2", "s3", "s4")), + "out-of-range" + ) +}) + +test_that(".normalizeCvFolds: numeric fold ids resolve via sampleNames", { + res <- pecotmr:::.normalizeCvFolds( + cvFolds = list(c(1, 2), c(3, 4)), + sampleNames = c("s1", "s2", "s3", "s4")) + expect_equal(res$nFolds, 2L) + expect_setequal(res$samplePartition$Sample, c("s1", "s2", "s3", "s4")) + expect_equal( + res$samplePartition$Fold[res$samplePartition$Sample == "s1"], 1) +}) + +test_that(".normalizeCvFolds: character fold-id vectors are used as-is", { + res <- pecotmr:::.normalizeCvFolds( + cvFolds = list(c("s1", "s2"), c("s3", "s4")), + sampleNames = c("s1", "s2", "s3", "s4")) + expect_equal(res$nFolds, 2L) + expect_equal(nrow(res$samplePartition), 4L) +}) + +test_that(".normalizeCvFolds: integer cvFolds returns NULL partition + nFolds = k", { + res <- pecotmr:::.normalizeCvFolds(cvFolds = 5) + expect_null(res$samplePartition) + expect_equal(res$nFolds, 5L) +}) + +test_that(".normalizeCvFolds: a non-integer scalar cvFolds errors", { + expect_error( + pecotmr:::.normalizeCvFolds(cvFolds = "not_a_fold_spec"), + "single integer, a list of fold vectors" + ) +}) + +# =========================================================================== +# +# TwasWeights constructor: length-mismatch guards + accessors +# +# =========================================================================== + +test_that("TwasWeights: mismatched core-vector lengths error", { + e <- .sc_makeTwasWeightsEntry() + expect_error( + TwasWeights(study = c("s1", "s2"), context = "c1", + trait = "t1", method = "lasso", entry = list(e)), + "same length" + ) +}) + +test_that("TwasWeights: joint* column length must match study", { + e <- .sc_makeTwasWeightsEntry() + expect_error( + TwasWeights(study = "s1", context = "c1", trait = "t1", + method = "lasso", entry = list(e), + jointStudies = c("a", "b")), + "same length as" + ) +}) + +test_that("TwasWeights: getStandardized/getDataType/getVariantIds delegate to the entry", { + e1 <- TwasWeightsEntry(variantIds = paste0("v", 1:4), weights = rnorm(4), + standardized = TRUE, dataType = "expression") + e2 <- TwasWeightsEntry(variantIds = paste0("v", 1:4), weights = rnorm(4), + standardized = FALSE, dataType = "splicing") + tw <- TwasWeights( + study = c("s1", "s1"), context = c("c1", "c1"), + trait = c("t1", "t1"), method = c("lasso", "enet"), + entry = list(e1, e2)) + + expect_true(getStandardized(tw, study = "s1", context = "c1", + trait = "t1", method = "lasso")) + expect_false(getStandardized(tw, study = "s1", context = "c1", + trait = "t1", method = "enet")) + expect_equal(getDataType(tw, study = "s1", context = "c1", + trait = "t1", method = "enet"), "splicing") + expect_equal(getVariantIds(tw, study = "s1", context = "c1", + trait = "t1", method = "lasso"), + paste0("v", 1:4)) +}) + +test_that("TwasWeights: getStudy returns unique study labels", { + e <- .sc_makeTwasWeightsEntry() + tw <- TwasWeights( + study = c("s1", "s2"), context = c("c1", "c2"), + trait = c("t1", "t1"), method = c("lasso", "lasso"), + entry = list(e, e)) + expect_setequal(getStudy(tw), c("s1", "s2")) +}) + +test_that("TwasWeights: getWeights/getCvResult/getFits/getLdSketch delegate per tuple", { + w1 <- rnorm(4) + e1 <- TwasWeightsEntry(variantIds = paste0("v", 1:4), weights = w1, + fits = list(tag = "fitA"), + cvResult = list(rsq = 0.42)) + e2 <- TwasWeightsEntry(variantIds = paste0("v", 1:4), weights = rnorm(4)) + tw <- TwasWeights( + study = c("s1", "s1"), context = c("c1", "c1"), + trait = c("t1", "t1"), method = c("lasso", "enet"), + entry = list(e1, e2), + ldSketch = .sh_makeGenotypeHandle()) + + expect_equal(getWeights(tw, study = "s1", context = "c1", + trait = "t1", method = "lasso"), w1) + expect_equal(getCvResult(tw, study = "s1", context = "c1", + trait = "t1", method = "lasso")$rsq, 0.42) + expect_equal(getFits(tw, study = "s1", context = "c1", + trait = "t1", method = "lasso")$tag, "fitA") + expect_s4_class(getLdSketch(tw), "GenotypeHandle") +}) + +# =========================================================================== +# +# .resolveMethodFunction: unresolvable-key fallback +# +# =========================================================================== + +test_that(".resolveMethodFunction: unresolvable key falls back to the key itself", { + expect_equal(pecotmr:::.resolveMethodFunction("no_such_fn_xyz"), + "no_such_fn_xyz") +}) + +# =========================================================================== +# +# .prepareSusieWeightMethods: seed susie from a supplied susieInf fit +# +# =========================================================================== + +test_that(".prepareSusieWeightMethods: seeds susie_weights from a supplied susieInf fit (vector Y)", { + d <- make_data(n = 40, p = 8) + y_vec <- as.numeric(d$Y) # vector -> exercises the Y matrix coercion + infFit <- make_fake_susie_fit(p = 8, L = 3, inf = TRUE) + + wm <- pecotmr:::.prepareSusieWeightMethods( + d$X, y_vec, + weightMethods = list(susie_weights = list(L = 5), + susie_inf_weights = list()), + fittedModels = list(susieInf = infFit)) + + # The supplied susieInf fit is class-tagged and propagated onto susie_inf_weights, + # and susie_weights is rebuilt from it (model_init carries the inf fit). + expect_true("susieInf" %in% class(wm$susie_inf_weights$susieInfFit)) + expect_true("susieInf" %in% class(wm$susie_weights$model_init)) + expect_equal(wm$susie_weights$unmappable_effects, "none") +}) + +# =========================================================================== +# +# twasWeightsCv: no-seed warning + multivariate/univariate fitter branches +# +# =========================================================================== + +test_that("twasWeightsCv: warns when no random seed has been set", { + d <- make_data(n = 20, p = 5) + # make_data() set a seed; drop it just before the call so the unset-seed + # branch (verbose>=1) is exercised. The fold-sampling at line ~713 restores + # .Random.seed afterwards, so later tests are unaffected. + if (exists(".Random.seed", envir = .GlobalEnv)) + rm(".Random.seed", envir = .GlobalEnv) + expect_message( + twasWeightsCv(d$X, d$Y, fold = 2, weightMethods = NULL), + "No seed has been set" + ) +}) + +test_that("twasWeightsCv: mvsusie per-fold reweighted prior is plumbed (verbose=2)", { + set.seed(42) + n <- 24; p <- 4 + X <- matrix(rnorm(n * p), nrow = n) + colnames(X) <- paste0("v", seq_len(p)); rownames(X) <- paste0("s", seq_len(n)) + Y <- matrix(rnorm(n * 2), nrow = n) + colnames(Y) <- c("y1", "y2"); rownames(Y) <- rownames(X) + + captured <- list() + local_mocked_bindings( + mvsusieWeights = function(X, Y, ...) { + captured[[length(captured) + 1]] <<- list(...) + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + } + ) + prior_cv <- list(matrix(1, 2, 2), matrix(2, 2, 2)) + set.seed(1) + result <- suppressMessages(twasWeightsCv( + X, Y, fold = 2, + weightMethods = list(mvsusieWeights = list()), + reweightedMixturePriorCv = prior_cv, + verbose = 2)) + expect_true("prediction" %in% names(result)) + # the per-fold prior_variance was forwarded to the multivariate fitter + expect_true(any(vapply(captured, function(a) + "prior_variance" %in% names(a), logical(1)))) +}) + +test_that("twasWeightsCv: retainFits forwards retainFit to a multivariate fitter that supports it", { + set.seed(42) + n <- 24; p <- 4 + X <- matrix(rnorm(n * p), nrow = n) + colnames(X) <- paste0("v", seq_len(p)); rownames(X) <- paste0("s", seq_len(n)) + Y <- matrix(rnorm(n * 2), nrow = n) + colnames(Y) <- c("y1", "y2"); rownames(Y) <- rownames(X) + + captured <- list() + local_mocked_bindings( + mrmashWeights = function(X, Y, retainFit = FALSE, ...) { + captured[[length(captured) + 1]] <<- list(retainFit = retainFit) + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + } + ) + set.seed(1) + result <- suppressMessages(twasWeightsCv( + X, Y, fold = 2, + weightMethods = list(mrmashWeights = list()), + retainFits = TRUE)) + expect_true("foldFits" %in% names(result)) + expect_true(all(vapply(captured, function(a) isTRUE(a$retainFit), logical(1)))) +}) + +test_that("twasWeightsCv: univariate fitter runs under verbose=2 (no quiet wrapper)", { + d <- make_data(n = 30, p = 6) + local_mocked_bindings( + lassoWeights = function(X, y, ...) { w <- rep(0, ncol(X)); w[1] <- 0.3; w } + ) + set.seed(1) + result <- suppressMessages(twasWeightsCv( + d$X, d$Y, fold = 2, + weightMethods = list(lassoWeights = list()), + verbose = 2)) + expect_true("prediction" %in% names(result)) + expect_equal(nrow(result$prediction[["lassoPredicted"]]), nrow(d$X)) +}) + +test_that("twasWeightsCv: parallel fold path (numThreads = 2)", { + d <- make_data(n = 30, p = 6) + local_mocked_bindings( + lassoWeights = function(X, y, ...) { w <- rep(0, ncol(X)); w[1] <- 0.4; w } + ) + set.seed(1) + result <- suppressMessages(twasWeightsCv( + d$X, d$Y, fold = 2, + weightMethods = list(lassoWeights = list()), + numThreads = 2)) + expect_true("prediction" %in% names(result)) + expect_equal(nrow(result$prediction[["lassoPredicted"]]), nrow(d$X)) +}) + +# =========================================================================== +# +# learnTwasWeights: retainFits plumbing + verbose=2 + parallel +# +# =========================================================================== + +test_that("learnTwasWeights: multivariate fitter with retainFits + verbose=2 (fitDetail forwarded)", { + set.seed(42) + n <- 24; p <- 5 + X <- matrix(rnorm(n * p), nrow = n) + colnames(X) <- paste0("v", seq_len(p)); rownames(X) <- paste0("s", seq_len(n)) + Y <- matrix(rnorm(n * 2), nrow = n) + colnames(Y) <- c("y1", "y2") + + captured <- list() + local_mocked_bindings( + mrmashWeights = function(X, Y, retainFit = FALSE, + fitDetail = c("slim", "full"), ...) { + captured[[length(captured) + 1]] <<- + list(retainFit = retainFit, fitDetail = fitDetail) + matrix(0, nrow = ncol(X), ncol = ncol(Y), + dimnames = list(colnames(X), colnames(Y))) + } + ) + result <- suppressMessages(learnTwasWeights( + X, Y, + weightMethods = list(mrmashWeights = list()), + retainFits = TRUE, verbose = 2)) + expect_true(is(result, "TwasWeights")) + expect_true(isTRUE(captured[[1]]$retainFit)) + expect_equal(captured[[1]]$fitDetail, "slim") +}) + +test_that("learnTwasWeights: legacy retain_fit alias is forwarded to methods exposing it", { + d <- make_data(n = 30, p = 6) + captured <- list() + local_mocked_bindings( + bayesRWeights = function(X, y, retain_fit = FALSE, ...) { + captured[[length(captured) + 1]] <<- list(retain_fit = retain_fit) + rep(0, ncol(X)) + } + ) + result <- suppressMessages(learnTwasWeights( + d$X, d$Y, + weightMethods = list(bayesRWeights = list()), + retainFits = TRUE)) + expect_true(is(result, "TwasWeights")) + expect_true(isTRUE(captured[[1]]$retain_fit)) +}) + +test_that("learnTwasWeights: univariate fitter runs under verbose=2", { + d <- make_data(n = 30, p = 6) + local_mocked_bindings( + lassoWeights = function(X, y, ...) rep(0.2, ncol(X)) + ) + result <- suppressMessages(learnTwasWeights( + d$X, d$Y, + weightMethods = list(lassoWeights = list()), + verbose = 2)) + expect_true(is(result, "TwasWeights")) + expect_equal(nrow(.weightsByMethod(result, "lassoWeights")), ncol(d$X)) +}) + +test_that("learnTwasWeights: parallel weights path (numThreads = 2)", { + d <- make_data(n = 30, p = 6) + local_mocked_bindings( + lassoWeights = function(X, y, ...) rep(0.1, ncol(X)), + enetWeights = function(X, y, ...) rep(0.2, ncol(X)) + ) + result <- suppressMessages(learnTwasWeights( + d$X, d$Y, + weightMethods = list(lassoWeights = list(), enetWeights = list()), + numThreads = 2)) + expect_true(is(result, "TwasWeights")) + expect_setequal(getMethodNames(result), c("lasso", "enet")) +}) + +# =========================================================================== +# +# twasPredict: TwasWeights S4-collection path +# +# =========================================================================== + +test_that("twasPredict: accepts a TwasWeights S4 collection", { + set.seed(42) + p <- 5 + w1 <- rnorm(p); w2 <- rnorm(p) + e1 <- TwasWeightsEntry(variantIds = paste0("v", seq_len(p)), weights = w1) + e2 <- TwasWeightsEntry(variantIds = paste0("v", seq_len(p)), weights = w2) + tw <- TwasWeights( + study = c("s1", "s1"), context = c("c1", "c1"), + trait = c("t1", "t1"), method = c("lasso", "enet"), + entry = list(e1, e2)) + + X <- matrix(rnorm(8 * p), nrow = 8, ncol = p) + res <- twasPredict(X, tw) + expect_equal(names(res), c("lasso_predicted", "enet_predicted")) + expect_equal(res[["lasso_predicted"]], X %*% matrix(w1, ncol = 1)) + expect_equal(res[["enet_predicted"]], X %*% matrix(w2, ncol = 1)) +}) + diff --git a/tests/testthat/test_twasWeightsPipeline.R b/tests/testthat/test_twasWeightsPipeline.R index 62a8cee9..d0cd5a78 100644 --- a/tests/testthat/test_twasWeightsPipeline.R +++ b/tests/testthat/test_twasWeightsPipeline.R @@ -1213,167 +1213,10 @@ test_that("ensembleWeights: end-to-end with twasWeightsCv output", { # twasWeightsPipeline ensemble integration # =========================================================================== -test_that("pipeline: ensemble=TRUE with only 1 method prints skip message", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list()), - ensemble = TRUE - ) - ) - - # Should see the skip message - expect_true(any(grepl("Ensemble model skipped.*only 1 weight method provided", msgs))) - - # No ensemble result should be present - expect_null(res$ensemble) - expect_false("ensemble" %in% getMethodNames(res$twasWeights)) -}) - -test_that("pipeline: ensemble=TRUE skips when methods fail R^2 cutoff", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - # Use signal so methods produce non-zero weights, but set threshold very high - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleR2Threshold = 0.99 # impossibly high threshold - ) - ) - - expect_true(any(grepl("Ensemble TWAS skipped", msgs))) - expect_null(res$ensemble) - expect_false("ensemble" %in% getMethodNames(res$twasWeights)) -}) - -test_that("pipeline: ensemble=TRUE succeeds and adds ensembleWeights", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE - ) - ) - - expect_true(any(grepl("Computing ensemble TWAS weights", msgs))) - - # Ensemble weights added alongside individual methods - expect_true("ensemble" %in% getMethodNames(res$twasWeights)) - expect_true("lasso" %in% getMethodNames(res$twasWeights)) - expect_true("enet" %in% getMethodNames(res$twasWeights)) - - # Ensemble predictions added - expect_true("ensemble_predicted" %in% names(res$twasPredictions)) - - # Ensemble result metadata present - expect_false(is.null(res$ensemble)) - expect_true(all(res$ensemble$methodCoef >= 0)) - expect_equal(sum(res$ensemble$methodCoef), 1, tolerance = 1e-6) - - # Ensemble weights should have same length as individual weights - expect_equal(length(getWeights(res$twasWeights, - study = "", context = "", trait = "", - method = "ensemble")), - length(getWeights(res$twasWeights, - study = "", context = "", trait = "", - method = "lasso"))) -}) - -test_that("pipeline: ensemble=FALSE does not run ensemble", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - res <- suppressMessages(pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = FALSE - )) - - expect_null(res$ensemble) - expect_false("ensemble" %in% getMethodNames(res$twasWeights)) -}) -test_that("pipeline: ensemble_r2_threshold filters methods for ensemble", { - skip_if_not_installed("glmnet") - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - # Run with very low threshold - both methods should pass - msgs_low <- testthat::capture_messages( - res_low <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleR2Threshold = 0.001 - ) - ) - expect_false(is.null(res_low$ensemble)) - - # Run with very high threshold - neither should pass - msgs_high <- testthat::capture_messages( - res_high <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleR2Threshold = 0.99 - ) - ) - expect_true(any(grepl("Ensemble TWAS skipped", msgs_high))) - expect_null(res_high$ensemble) -}) # =========================================================================== # Solver alternatives @@ -1433,90 +1276,8 @@ test_that("ensembleWeights: invalid solver errors", { "arg") }) -test_that("pipeline: ensemble_solver='nnls' works end-to-end", { - skip_if_not_installed("glmnet") - skip_if_not_installed("nnls") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleSolver = "nnls" - ) - ) - - expect_true(any(grepl("Computing ensemble TWAS weights", msgs))) - expect_true("ensemble" %in% getMethodNames(res$twasWeights)) - expect_true(all(res$ensemble$methodCoef >= 0)) - expect_equal(sum(res$ensemble$methodCoef), 1, tolerance = 1e-6) -}) - -test_that("pipeline: ensemble_solver='lbfgsb' works end-to-end", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleSolver = "lbfgsb" - ) - ) - expect_true(any(grepl("Computing ensemble TWAS weights", msgs))) - expect_true("ensemble" %in% getMethodNames(res$twasWeights)) - expect_true(all(res$ensemble$methodCoef >= 0)) - expect_equal(sum(res$ensemble$methodCoef), 1, tolerance = 1e-6) -}) -test_that("pipeline: ensemble_solver='glmnet' works end-to-end", { - skip_if_not_installed("glmnet") - - set.seed(42) - n <- 100 - p <- 20 - X <- matrix(rnorm(n * p), nrow = n, ncol = p) - colnames(X) <- paste0("var_", seq_len(p)) - rownames(X) <- paste0("sample_", seq_len(n)) - - beta <- c(1.5, -1.0, 0.8, rep(0, p - 3)) - y <- as.numeric(X %*% beta + rnorm(n, sd = 0.5)) - - msgs <- testthat::capture_messages( - res <- pecotmr:::.twasWeightsPipelineMatrix( - X, y, cvFolds = 3, - weightMethods = list(lassoWeights = list(), enetWeights = list()), - ensemble = TRUE, - ensembleSolver = "glmnet" - ) - ) - - expect_true(any(grepl("Computing ensemble TWAS weights", msgs))) - expect_true("ensemble" %in% getMethodNames(res$twasWeights)) - expect_true(all(res$ensemble$methodCoef >= 0)) - expect_equal(sum(res$ensemble$methodCoef), 1, tolerance = 1e-6) -}) test_that("ensembleWeights: solver='glmnet' respects alpha parameter", { skip_if_not_installed("glmnet") @@ -2005,72 +1766,11 @@ test_that(".twasLdFromSketch: returns a square LD matrix named by variantIds", { # .twasWeightsPipelineMatrix: susieFit pre-fit pass-through # =========================================================================== -test_that(".twasWeightsPipelineMatrix: susieFit pre-fit is recorded in res", { - set.seed(0) - n <- 30; p <- 5 - X <- matrix(rnorm(n * p), n, p, - dimnames = list(paste0("s", 1:n), paste0("v", 1:p))) - y <- as.numeric(X %*% c(1.0, -0.5, 0, 0, 0) + rnorm(n, sd = 0.2)) - - # Build a stub susie fit shape; the pipeline records its intermediate. - fake_susie <- list( - alpha = matrix(1/p, nrow = 2, ncol = p), - mu = matrix(0, nrow = 2, ncol = p), - X_column_scale_factors = rep(1, p), - pip = rep(0.1, p)) - - # The intermediate-recording branch keys on snake_case `susie_weights`. - res <- suppressMessages( - pecotmr:::.twasWeightsPipelineMatrix( - X = X, y = y, - susieFit = fake_susie, - cvFolds = 0, - weightMethods = list(susie_weights = list()), - estimatePi = FALSE, - verbose = 0)) - expect_true("susieWeightsIntermediate" %in% names(res)) - expect_true("twasWeights" %in% names(res)) -}) # =========================================================================== # .twasWeightsPipelineMatrix: empirical pi path via mr.ash (mocked) # =========================================================================== -test_that(".twasWeightsPipelineMatrix: empirical pi from mr.ash gets propagated", { - set.seed(1) - n <- 30; p <- 5 - X <- matrix(rnorm(n * p), n, p, - dimnames = list(paste0("s", 1:n), paste0("v", 1:p))) - y <- as.numeric(X %*% c(0.5, 0, 0, 0, 0) + rnorm(n, sd = 0.2)) - - # Mock mrashWeights to return a fake matrix carrying a fit$pi attribute. - local_mocked_bindings( - mrashWeights = function(X, y, ...) { - out <- matrix(rep(0.05, ncol(X)), ncol = 1) - attr(out, "fit") <- list(pi = c(0.8, 0.1, 0.1)) - rownames(out) <- colnames(X) - out - }, - bayesCWeights = function(X, y, pi, ...) { - # Capture the pi the pipeline injected. - out <- matrix(pi, nrow = ncol(X), ncol = 1) - rownames(out) <- colnames(X) - out - }, - .package = "pecotmr" - ) - - res <- suppressMessages( - pecotmr:::.twasWeightsPipelineMatrix( - X = X, y = y, - cvFolds = 0, - weightMethods = list(mrash_weights = list(), - bayes_c_weights = list()), - estimatePi = TRUE, - verbose = 0)) - expect_true("empiricalPi" %in% names(res)) - expect_equal(as.numeric(res$empiricalPi), 1 - 0.8, tolerance = 1e-12) -}) # =========================================================================== # Multi-region / jointRegions (P3) @@ -2093,7 +1793,7 @@ test_that(".twasRegionLabel / .twasFitsForRegion select per-region fits", { test_that(".twasMergeRegionEntries stacks weights and builds a flat per-region CV df", { mk <- function(vids, w, rsq) TwasWeightsEntry( variantIds = vids, weights = w, - cvPerformance = list(samplePartition = NULL, predictions = NULL, + cvResult = list(samplePartition = NULL, predictions = NULL, metrics = c(rsq = rsq, pval = 0.01))) e1 <- mk(c("v1", "v2"), c(0.1, 0.2), 0.3) e2 <- mk(c("v3", "v4"), c(0.3, 0.4), 0.5) @@ -2102,7 +1802,7 @@ test_that(".twasMergeRegionEntries stacks weights and builds a flat per-region C expect_s4_class(m, "TwasWeightsEntry") expect_equal(getVariantIds(m), c("v1", "v2", "v3", "v4")) expect_equal(unname(getWeights(m)), c(0.1, 0.2, 0.3, 0.4)) - cv <- getCvPerformance(m) + cv <- getCvResult(m) expect_s3_class(cv, "data.frame") expect_equal(cv$region, c("chr1:1-100", "chr1:200-300")) expect_equal(cv$rsq, c(0.3, 0.5)) @@ -2220,37 +1920,618 @@ test_that(".twasCvResultFor returns NULL when no entry carries CV", { expect_null(pecotmr:::.twasCvResultFor(fmr, "S", "C", "T")) }) -test_that(".twasWeightsPipelineMatrix merges fineMappingCv predictions and adopts its partition", { + +test_that(".unpackMashPrior routes a MashPrior into the internal CV arguments", { + unpack <- pecotmr:::.unpackMashPrior + U <- list(U1 = diag(2)) + sp <- data.frame(Sample = paste0("s", 1:6), Fold = rep(1:3, each = 2), + stringsAsFactors = FALSE) + spX <- data.frame(Sample = paste0("s", 1:6), Fold = rep(c(1, 2), 3), + stringsAsFactors = FALSE) + pf <- list(list(U = U), list(U = U), list(U = U)) + + # NULL -> all NULL, but the explicitly supplied partition is preserved. + r1 <- unpack(NULL, sp) + expect_null(r1$fullPrior) + expect_null(r1$dataDrivenPriorMatricesCv) + expect_identical(r1$samplePartition, sp) + + # full-only. + r2 <- unpack(MashPrior(fullFit = list(U = U)), NULL) + expect_identical(r2$fullPrior$U, U) + expect_null(r2$dataDrivenPriorMatricesCv) + + # cv -> per-fold priors + the partition the priors were computed on. + mpC <- MashPrior(cvFits = list(samplePartition = sp, perFoldFits = pf)) + r3 <- unpack(mpC, NULL) + expect_length(r3$dataDrivenPriorMatricesCv, 3L) + expect_identical(r3$samplePartition, sp) + + # An explicit partition wins over the bundle's. + expect_identical(unpack(mpC, spX)$samplePartition, spX) + + # A non-MashPrior is rejected. + expect_error(unpack(list(U = U)), "MashPrior") +}) + + + +# =========================================================================== +# Additional coverage: rbind/region-merge helpers, normalize/capability +# helpers, FM-fit lookups, solver fallbacks, ensembleWeights error branches, +# QtlDataset/QtlSumStats method branches, and the MultiStudyQtlDataset method. +# =========================================================================== + +.tp_tw <- function(study = "S", context = "c1", trait = "t1", method = "lasso", + vid = "v1", w = 0.5) { + TwasWeights(study = study, context = context, trait = trait, method = method, + entry = list(TwasWeightsEntry(variantIds = vid, weights = w))) +} + +# ----------------------------------------------------------------------------- +# .rbindTwasWeights +# ----------------------------------------------------------------------------- + +test_that(".rbindTwasWeights: concatenates two collections and rejects non-TwasWeights", { + out <- pecotmr:::.rbindTwasWeights(.tp_tw(method = "lasso"), + .tp_tw(method = "enet")) + expect_s4_class(out, "TwasWeights") + expect_equal(nrow(out), 2L) + expect_setequal(as.character(out$method), c("lasso", "enet")) + expect_error(pecotmr:::.rbindTwasWeights(list(), .tp_tw()), + "expects two TwasWeights") +}) + +# ----------------------------------------------------------------------------- +# .twasMergeRegions / .twasMergeRegionEntries / .twasFitsForRegion (dead-ish +# multi-region helpers and per-region fit selection) +# ----------------------------------------------------------------------------- + +test_that(".twasMergeRegions: stacks per-region entries under one key", { + r1 <- TwasWeights(study = "S", context = "c1", trait = "t1", method = "lasso", + entry = list(TwasWeightsEntry(variantIds = "v1", weights = 0.1))) + r2 <- TwasWeights(study = "S", context = "c1", trait = "t1", method = "lasso", + entry = list(TwasWeightsEntry(variantIds = "v2", weights = 0.2))) + out <- pecotmr:::.twasMergeRegions(list(r1, r2), c("rA", "rB")) + expect_s4_class(out, "TwasWeights") + expect_equal(nrow(out), 1L) + expect_setequal(getVariantIds(out$entry[[1L]]), c("v1", "v2")) # stacked + # Single-element and all-NULL short-circuits. + expect_identical(pecotmr:::.twasMergeRegions(list(r1), "rA"), r1) + expect_null(pecotmr:::.twasMergeRegions(list(NULL), "rA")) +}) + +test_that(".twasMergeRegionEntries: all-NULL entries -> NULL", { + expect_null(pecotmr:::.twasMergeRegionEntries(list(NULL, NULL), c("a", "b"))) +}) +# (.twasFitsForRegion per-region selection is covered by the existing +# ".twasRegionLabel / .twasFitsForRegion select per-region fits" test.) + +# ----------------------------------------------------------------------------- +# .twasBuildFromCachedRows +# ----------------------------------------------------------------------------- + +test_that(".twasBuildFromCachedRows: assembles a TwasWeights keyed by method names", { + rows <- list(lasso = TwasWeightsEntry(variantIds = "v1", weights = 0.1), + enet = TwasWeightsEntry(variantIds = "v1", weights = 0.2)) + out <- pecotmr:::.twasBuildFromCachedRows(rows, "S", "c1", "t1") + expect_s4_class(out, "TwasWeights") + expect_setequal(as.character(out$method), c("lasso", "enet")) + expect_null(pecotmr:::.twasBuildFromCachedRows(list(), "S", "c1", "t1")) +}) + +# ----------------------------------------------------------------------------- +# .twasNormalizeMethods / .twasTokensFromMethodList / .twasIsMultivariateToken +# ----------------------------------------------------------------------------- + +test_that(".twasNormalizeMethods: a fine-mapping token without a learner gets a stub entry", { + norm <- pecotmr:::.twasNormalizeMethods(c("lasso", "fsusie")) + expect_true("fsusie" %in% norm$tokens) + expect_true("fsusie_weights" %in% names(norm$methodList)) + expect_equal(norm$methodList$fsusie_weights, list()) +}) + +test_that(".twasTokensFromMethodList: unknown snake keys fall back to the bare name", { + toks <- pecotmr:::.twasTokensFromMethodList( + list(susie_inf_weights = list(), totally_made_up_weights = list())) + expect_true("susieInf" %in% toks) # known mapping + expect_true("totally_made_up" %in% toks) # unknown -> fallback +}) + +test_that(".twasIsMultivariateToken: TWAS table, FM table, and unknown tokens", { + expect_true(pecotmr:::.twasIsMultivariateToken("mrmash")) # TWAS table + expect_true(pecotmr:::.twasIsMultivariateToken("mvsusie")) # FM table + expect_false(pecotmr:::.twasIsMultivariateToken("lasso")) + expect_false(pecotmr:::.twasIsMultivariateToken("nonexistent")) +}) + +# ----------------------------------------------------------------------------- +# .twasFineMappingFitFor / .twasCvResultFor +# ----------------------------------------------------------------------------- + +test_that(".twasFineMappingFitFor: NULL fineMappingResult -> NULL", { + expect_null(pecotmr:::.twasFineMappingFitFor(NULL, "S", "c1", "t1", "susie")) +}) + +test_that(".twasCvResultFor: NULL / non-FMR / no-match all return NULL", { + expect_null(pecotmr:::.twasCvResultFor(NULL, "S", "c1", "t1")) + expect_null(pecotmr:::.twasCvResultFor(list(a = 1), "S", "c1", "t1")) + fmr <- .tp_makeStubFineMappingResult(study = "study1", contexts = "brain", + traits = "ENSG_A", method = "susie") + expect_null(pecotmr:::.twasCvResultFor(fmr, "study1", "brain", "OTHER")) +}) + +test_that(".twasCvResultFor: a multi-region (per-region list) cvResult is unwrapped", { + sp <- data.frame(Sample = paste0("s", 1:4), Fold = rep(1:2, 2), + stringsAsFactors = FALSE) + nested <- list(region1 = list( + samplePartition = sp, + prediction = list(susie_predicted = matrix(0, 4, 1, + dimnames = list(paste0("s", 1:4), NULL))), + performance = list(susie_performance = matrix(0, 1, 6)))) + entry <- FineMappingEntry(variantIds = "v1", susieFit = list(), + topLoci = data.frame(variant_id = "v1", pip = 0.9), + cvResult = nested) + fmr <- QtlFineMappingResult(study = "S", context = "c1", trait = "t1", + method = "susie", entry = list(entry)) + out <- pecotmr:::.twasCvResultFor(fmr, "S", "c1", "t1") + expect_false(is.null(out)) + expect_identical(out$samplePartition, sp) + expect_true("susie_predicted" %in% names(out$prediction)) +}) + +# ----------------------------------------------------------------------------- +# Ensemble stacking solver fallbacks (quadprog + lbfgsb: failure & all-zero) +# ----------------------------------------------------------------------------- + +test_that(".solveEnsembleQuadprog: solver failure and all-zero solution fall back to equal weights", { + P <- matrix(rnorm(20), 10, 2); y <- rnorm(10) + local_mocked_bindings(solve.QP = function(...) stop("boom"), .package = "pecotmr") + expect_warning(z <- pecotmr:::.solveEnsembleQuadprog(P, y, 2L), "QP solver failed") + expect_equal(z, c(0.5, 0.5)) + local_mocked_bindings(solve.QP = function(...) list(solution = c(0, 0)), + .package = "pecotmr") + expect_warning(z2 <- pecotmr:::.solveEnsembleQuadprog(P, y, 2L), "all-zero") + expect_equal(z2, c(0.5, 0.5)) +}) + +test_that(".solveEnsembleLbfgsb: solver failure and all-zero solution fall back to equal weights", { + P <- matrix(rnorm(20), 10, 2); y <- rnorm(10) + local_mocked_bindings(optim = function(...) stop("boom"), .package = "pecotmr") + expect_warning(z <- pecotmr:::.solveEnsembleLbfgsb(P, y, 2L), "L-BFGS-B solver failed") + expect_equal(z, c(0.5, 0.5)) + local_mocked_bindings(optim = function(...) list(par = c(0, 0)), + .package = "pecotmr") + expect_warning(z2 <- pecotmr:::.solveEnsembleLbfgsb(P, y, 2L), "all-zero") + expect_equal(z2, c(0.5, 0.5)) +}) + +# ----------------------------------------------------------------------------- +# ensembleWeights: error branches and the single-valid-method path +# ----------------------------------------------------------------------------- + +.tp_predBlock <- function(n = 30L, methods = c("a", "b")) { + samp <- paste0("s", seq_len(n)) + setNames(lapply(methods, function(.) matrix(rnorm(n), n, 1, + dimnames = list(samp, NULL))), + paste0(methods, "_predicted")) +} + +test_that("ensembleWeights: multi-dataset input validation errors", { + pb <- .tp_predBlock() + y <- rnorm(30) + expect_error(ensembleWeights(cvResults = list(), Y = list(y)), + "non-empty list") + expect_error( + ensembleWeights(cvResults = list(list(prediction = pb), list(prediction = pb)), + Y = list(y, y), + twasWeightList = list(list())), # wrong length + "must be a list of the same length") + expect_error( + ensembleWeights(cvResults = list(list(prediction = pb), list(foo = 1)), + Y = list(y, y)), + "does not contain") +}) + +test_that("ensembleWeights: unnamed prediction list errors", { + pb <- .tp_predBlock(); names(pb) <- NULL + expect_error(ensembleWeights(cvResults = list(prediction = pb), Y = rnorm(30)), + "must be a named list") +}) + +test_that("ensembleWeights: too few complete observations errors", { + samp <- paste0("s", 1:2) # 2 obs < K + 1 = 3 + pb <- list(a_predicted = matrix(rnorm(2), 2, 1, dimnames = list(samp, NULL)), + b_predicted = matrix(rnorm(2), 2, 1, dimnames = list(samp, NULL))) + expect_error( + ensembleWeights(cvResults = list(prediction = pb), + Y = setNames(rnorm(2), samp)), + "Too few complete observations") +}) + +test_that("ensembleWeights: only one method with signal -> it gets full weight", { + samp <- paste0("s", 1:30) + y <- rnorm(30); names(y) <- samp + pb <- list(a_predicted = matrix(y + rnorm(30, sd = 0.1), 30, 1, + dimnames = list(samp, NULL)), + b_predicted = matrix(0, 30, 1, dimnames = list(samp, NULL))) # flat + res <- suppressMessages(ensembleWeights(cvResults = list(prediction = pb), Y = y)) + expect_equal(unname(res$methodCoef[["a"]]), 1) + expect_equal(unname(res$methodCoef[["b"]]), 0) +}) + +test_that("ensembleWeights: twasWeightList non-list and dimension-mismatch warnings", { + samp <- paste0("s", 1:30) + y <- rnorm(30); names(y) <- samp + pb <- list(a_predicted = matrix(y + rnorm(30, sd = .3), 30, 1, + dimnames = list(samp, NULL)), + b_predicted = matrix(y + rnorm(30, sd = .5), 30, 1, + dimnames = list(samp, NULL))) + # twasWeightList[[1]] empty -> warns and skips the weight combination. + expect_warning( + r1 <- ensembleWeights(cvResults = list(prediction = pb), Y = y, + twasWeightList = list()), + "empty or not a list") + expect_null(r1$ensembleTwasWeights) + # Mismatched dims across method weight matrices -> warn + skip that method. + wl <- list(a_weights = matrix(0.1, 3, 1, dimnames = list(paste0("v", 1:3), NULL)), + b_weights = matrix(0.1, 2, 1, dimnames = list(paste0("v", 1:2), NULL))) + expect_warning( + ensembleWeights(cvResults = list(prediction = pb), Y = y, + twasWeightList = wl), + "inconsistent dimensions") +}) + +# ----------------------------------------------------------------------------- +# twasWeightsPipeline(QtlDataset): remaining branches +# ----------------------------------------------------------------------------- + +test_that("twasWeightsPipeline(QtlDataset): fitFullData=FALSE without CV errors", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + expect_error( + twasWeightsPipeline(qd, methods = list(lasso_weights = list()), + fitFullData = FALSE, cvFolds = 0), + "fitFullData = FALSE requires cross-validation") +}) + +test_that("twasWeightsPipeline(QtlDataset): mashPrior with no mrmash warns and is ignored", { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockIndividualWeights(), list(.package = "pecotmr"))) + expect_warning( + suppressMessages(twasWeightsPipeline( + qd, methods = list(lasso_weights = list()), + mashPrior = MashPrior(fullFit = list(U = list(comp = diag(2)))), + cisWindow = 1000L, cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, + verbose = 0)), + "not among `methods`") +}) + +test_that("twasWeightsPipeline(QtlDataset): mashPrior full prior is threaded into mr.mash args", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), traits = "ENSG_A") + ddpm <- list(U = list(comp = diag(2))) + captured <- NULL + local_mocked_bindings( + extractBlockGenotypes = .tp_mockExtractor(), + mrmashWeights = function(X, Y, ...) { + dots <- list(...) + captured <<- dots$dataDrivenPriorMatrices + matrix(0, ncol(X), ncol(Y), dimnames = list(colnames(X), colnames(Y))) + }, + .package = "pecotmr") + suppressMessages(suppressWarnings(twasWeightsPipeline( + qd, methods = list(mrmash_weights = list()), + mashPrior = MashPrior(fullFit = ddpm), + cisWindow = 1000L, cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, + verbose = 0))) + expect_identical(captured, ddpm) +}) + +test_that("twasWeightsPipeline(QtlDataset): jointSpec mr.mash + univariate lasso both run", { + qd <- .tp_makeQtlDataset(contexts = c("brain", "liver"), traits = "ENSG_A") + jointRes <- .tp_tw(study = "study1", context = "brain", trait = "ENSG_A", + method = "mrmash") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + .twasDispatchJointSpecsQtlDataset = function(...) jointRes), + .tp_mockIndividualWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + qd, methods = c("mrmash", "lasso"), + jointSpecification = "context", cisWindow = 1000L, cvFolds = 0, + ensemble = FALSE, estimatePi = FALSE, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_true("mrmash" %in% as.character(res$method)) # from jointResult + expect_true("lasso" %in% as.character(res$method)) # from per-tuple loop +}) + +test_that("twasWeightsPipeline(QtlDataset): region selects overlapping traits", { + qd <- .tp_makeQtlDataset(contexts = "brain", + traits = c("ENSG_A", "ENSG_B")) # @1000, @2000 + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockIndividualWeights(), list(.package = "pecotmr"))) + region <- GenomicRanges::GRanges("chr1", IRanges::IRanges(900, 1600)) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + qd, methods = list(lasso_weights = list()), region = region, + cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, verbose = 0))) + expect_setequal(getTraits(res), "ENSG_A") # only the overlapping gene +}) + +# ----------------------------------------------------------------------------- +# twasWeightsPipeline(QtlSumStats): remaining branches +# ----------------------------------------------------------------------------- + +test_that("twasWeightsPipeline(QtlSumStats): NULL methods uses the default RSS preset", { + ss <- .tp_makeQtlSumStats() + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + prsCsWeights = function(stat, LD, ...) rep(0, nrow(LD)), + sdprWeights = function(stat, LD, ...) rep(0, nrow(LD))), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = NULL, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_setequal(getMethodNames(res), c("lasso", "prsCs", "dpr_gibbs")) +}) + +test_that("twasWeightsPipeline(QtlSumStats): named-list methods and invalid type", { + ss <- .tp_makeQtlSumStats() + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = list(lasso = list()), verbose = 0))) + expect_setequal(getMethodNames(res), "lasso") + expect_error(twasWeightsPipeline(ss, methods = 42), + "must be NULL, a character vector, or a named list") +}) + +test_that("twasWeightsPipeline(QtlSumStats): traitId filter selects matching rows", { + ss <- .tp_makeQtlSumStats() + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "lasso", traitId = "t1", verbose = 0))) + expect_setequal(getTraits(res), "t1") + expect_error( + twasWeightsPipeline(ss, methods = "lasso", traitId = "absent"), + "no entries matched") +}) + +test_that("twasWeightsPipeline(QtlSumStats): multivariate mr.mash returns a column per context", { + ss <- .tp_makeQtlSumStats(n_entries = 2L) # 2 contexts of (s1, t1) + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + mrmashRssWeights = function(stat, LD, ...) { + k <- if (is.matrix(stat$z)) ncol(stat$z) else 1L + matrix(0, nrow(LD), k, dimnames = list(rownames(LD), NULL)) + }), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "mrmash", verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_equal(nrow(res), 2L) # one row per context +}) + +test_that("twasWeightsPipeline(QtlSumStats): mvsusie with no matching FM fit warns and skips", { + ss <- .tp_makeQtlSumStats(n_entries = 2L) + # FineMappingResult that does NOT contain an mvsusie fit for (s1, t1). + fmr <- .tp_makeStubFineMappingResult(study = "s1", contexts = "c1", + traits = "t1", method = "susie") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + expect_error( + suppressWarnings(suppressMessages( + twasWeightsPipeline(ss, methods = "mvsusie", fineMappingResult = fmr, + verbose = 0))), + "no entries produced weights") +}) + +# ----------------------------------------------------------------------------- +# twasWeightsPipeline(MultiStudyQtlDataset) +# ----------------------------------------------------------------------------- + +.tp_makeMultiStudy <- function() { + qd <- .tp_makeQtlDataset(contexts = "brain", traits = "ENSG_A") # study1 + ss <- .tp_makeQtlSumStats() # s1 + MultiStudyQtlDataset(qtlDatasets = list(study1 = qd), sumStats = ss) +} + +test_that("twasWeightsPipeline(MultiStudyQtlDataset): recurses into components and rbinds", { + mt <- .tp_makeMultiStudy() + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor()), + .tp_mockIndividualWeights(), .tp_mockSumstatWeights(), + list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + mt, methods = "lasso", cisWindow = 1000L, cvFolds = 0, ensemble = FALSE, + estimatePi = FALSE, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_setequal(as.character(res$study), c("study1", "s1")) # both phases +}) + +test_that("twasWeightsPipeline(MultiStudyQtlDataset): region + cisWindow is rejected", { + mt <- .tp_makeMultiStudy() + expect_error( + twasWeightsPipeline(mt, methods = "lasso", + region = GenomicRanges::GRanges("chr1", + IRanges::IRanges(1, 100)), + cisWindow = 1000L), + "specify either") +}) + +test_that("twasWeightsPipeline(MultiStudyQtlDataset): jointSpec-only mr.mash returns the joint result", { + mt <- .tp_makeMultiStudy() + jointRes <- .tp_tw(study = "s1", context = "c1", trait = "t1", method = "mrmash") + local_mocked_bindings( + .twasDispatchJointSpecsMultiStudy = function(...) jointRes, + .package = "pecotmr") + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + mt, methods = "mrmash", jointSpecification = "context", + cisWindow = 1000L, verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_setequal(as.character(res$method), "mrmash") +}) + +test_that("twasWeightsPipeline(MultiStudyQtlDataset): jointSpec mr.mash + univariate lasso are combined", { + mt <- .tp_makeMultiStudy() + jointRes <- .tp_tw(study = "s1", context = "c1", trait = "t1", method = "mrmash") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + .twasDispatchJointSpecsMultiStudy = function(...) jointRes), + .tp_mockIndividualWeights(), .tp_mockSumstatWeights(), + list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + mt, methods = c("mrmash", "lasso"), jointSpecification = "context", + cisWindow = 1000L, cvFolds = 0, ensemble = FALSE, estimatePi = FALSE, + verbose = 0))) + expect_s4_class(res, "TwasWeights") + expect_true("mrmash" %in% as.character(res$method)) + expect_true("lasso" %in% as.character(res$method)) +}) + +# =========================================================================== +# Mop-up: gate no-op, CV unwrap no-partition, QtlSumStats jointSpec + univariate +# combine, multivariate group/SNP-order edges, MultiStudy list-form methods, +# ensembleWeights sample-overlap / contextIndex / vector-weight paths, and the +# nnls / glmnet solver fallbacks. +# =========================================================================== + +test_that(".twasCheckFineMappingMethods: empty token list is a no-op", { + expect_null(pecotmr:::.twasCheckFineMappingMethods(character(0), NULL, + "QtlDataset")) +}) + +test_that(".twasCvResultFor: an entry whose nested CV carries no partition is skipped", { + nested <- list(region1 = list(prediction = list(susie_predicted = matrix(0, 1, 1)))) + entry <- FineMappingEntry(variantIds = "v1", susieFit = list(), + topLoci = data.frame(variant_id = "v1", pip = 0.9), cvResult = nested) + fmr <- QtlFineMappingResult(study = "S", context = "c1", trait = "t1", + method = "susie", entry = list(entry)) + expect_null(pecotmr:::.twasCvResultFor(fmr, "S", "c1", "t1")) # 616 -> next +}) + +test_that("twasWeightsPipeline(QtlSumStats): jointSpec mr.mash-only with no fits errors", { + ss <- .tp_makeQtlSumStats() + local_mocked_bindings( + .twasDispatchJointSpecsQtlSumStats = function(...) NULL, .package = "pecotmr") + expect_error( + suppressMessages(twasWeightsPipeline(ss, methods = "mrmash", + jointSpecification = "context")), + "no joint fits produced") # 1059 +}) + +test_that("twasWeightsPipeline(QtlSumStats): jointSpec mr.mash + univariate lasso combine", { + ss <- .tp_makeQtlSumStats() + jointRes <- .tp_tw(study = "s1", context = "c1", trait = "t1", method = "mrmash") + do.call(local_mocked_bindings, + c(list(extractBlockGenotypes = .tp_mockExtractor(), + .twasDispatchJointSpecsQtlSumStats = function(...) jointRes), + .tp_mockSumstatWeights(), list(.package = "pecotmr"))) + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + ss, methods = c("mrmash", "lasso"), jointSpecification = "context", + verbose = 0))) + expect_true("mrmash" %in% as.character(res$method)) # 1062-1063 + expect_true("lasso" %in% as.character(res$method)) # 1300-1301 rbind +}) + +test_that("twasWeightsPipeline(QtlSumStats): a single-context (study, trait) group is skipped for mr.mash", { + # t1 has 2 contexts (processed); t2 has 1 context (skipped at the < 2 guard). + entries <- lapply(1:3, function(.) .tp_makeSumstatsEntry()) + ss <- QtlSumStats(study = rep("s1", 3), context = c("c1", "c2", "c1"), + trait = c("t1", "t1", "t2"), entry = entries, genome = "hg19", + ldSketch = .tp_makeHandle(snp_n = 20L), + qcInfo = list(step1 = "ok")) + local_mocked_bindings( + extractBlockGenotypes = .tp_mockExtractor(), + mrmashRssWeights = function(stat, LD, ...) { + k <- if (is.matrix(stat$z)) ncol(stat$z) else 1L + matrix(0, nrow(LD), k, dimnames = list(rownames(LD), NULL)) + }, .package = "pecotmr") + res <- suppressMessages(suppressWarnings( + twasWeightsPipeline(ss, methods = "mrmash", verbose = 0))) + expect_setequal(getTraits(res), "t1") # only t1 (1189) +}) + +test_that("twasWeightsPipeline(QtlSumStats): a mismatched SNP order in a multivariate group errors", { + ss <- .tp_makeQtlSumStats(n_entries = 2L) # (s1, t1) x 2 ctx + calls <- 0L + local_mocked_bindings( + getSumstatDf = function(x, study, context, trait, require, ...) { + calls <<- calls + 1L + vid <- if (calls == 1L) c("v1", "v2") else c("v2", "v1") + data.frame(variant_id = vid, z = c(1, 2), N = c(1000, 1000), + stringsAsFactors = FALSE) + }, .package = "pecotmr") + expect_error( + suppressMessages(twasWeightsPipeline(ss, methods = "mrmash", verbose = 0)), + "identical SNP order") # 1212 +}) + +test_that("twasWeightsPipeline(MultiStudyQtlDataset): list-form methods, joint mr.mash only", { + mt <- .tp_makeMultiStudy() + jointRes <- .tp_tw(study = "s1", context = "c1", trait = "t1", method = "mrmash") + local_mocked_bindings( + .twasDispatchJointSpecsMultiStudy = function(...) jointRes, .package = "pecotmr") + res <- suppressMessages(suppressWarnings(twasWeightsPipeline( + mt, methods = list(mrmash_weights = list()), + jointSpecification = "context", cisWindow = 1000L, verbose = 0))) + expect_setequal(as.character(res$method), "mrmash") # 1360-1377 +}) + +test_that("ensembleWeights: partial sample overlap emits a message", { + samp <- paste0("s", 1:30) + pb <- list(a_predicted = matrix(rnorm(30), 30, 1, dimnames = list(samp, NULL)), + b_predicted = matrix(rnorm(30), 30, 1, dimnames = list(samp, NULL))) + y <- setNames(rnorm(25), paste0("s", 1:25)) # subset of samples + expect_message( + ensembleWeights(cvResults = list(prediction = pb), Y = y), + "common samples") # 1824 +}) + +test_that("ensembleWeights: contextIndex beyond Y matrix columns errors (named path)", { + samp <- paste0("s", 1:30) + pb <- list(a_predicted = matrix(rnorm(30), 30, 1, dimnames = list(samp, NULL)), + b_predicted = matrix(rnorm(30), 30, 1, dimnames = list(samp, NULL))) + Ymat <- matrix(rnorm(30), 30, 1, dimnames = list(samp, "ctx1")) + expect_error( + ensembleWeights(cvResults = list(prediction = pb), Y = Ymat, + contextIndex = 2), + "exceeds number of columns") # 1830 +}) + +test_that("ensembleWeights: vector (non-matrix) method weights are coerced to a column", { + samp <- paste0("s", 1:30) + y <- rnorm(30); names(y) <- samp + pb <- list(a_predicted = matrix(y + rnorm(30, sd = .3), 30, 1, + dimnames = list(samp, NULL)), + b_predicted = matrix(y + rnorm(30, sd = .5), 30, 1, + dimnames = list(samp, NULL))) + wl <- list(a_weights = setNames(rep(0.1, 3), paste0("v", 1:3)), # bare vectors + b_weights = setNames(rep(0.2, 3), paste0("v", 1:3))) + res <- ensembleWeights(cvResults = list(prediction = pb), Y = y, + twasWeightList = wl) + expect_length(res$ensembleTwasWeights, 3L) # 1937 / 1947 +}) + +test_that(".solveEnsembleNnls: solver failure and all-zero fall back to equal weights", { + skip_if_not_installed("nnls") + P <- matrix(rnorm(20), 10, 2); y <- rnorm(10) + local_mocked_bindings(nnls = function(...) stop("boom"), .package = "nnls") + expect_warning(z <- pecotmr:::.solveEnsembleNnls(P, y, 2L), "NNLS solver failed") + expect_equal(z, c(0.5, 0.5)) + local_mocked_bindings(nnls = function(...) list(x = c(0, 0)), .package = "nnls") + expect_warning(z2 <- pecotmr:::.solveEnsembleNnls(P, y, 2L), "all-zero") + expect_equal(z2, c(0.5, 0.5)) +}) + +test_that(".solveEnsembleGlmnet: solver failure falls back to equal weights", { skip_if_not_installed("glmnet") - set.seed(11) - n <- 50L; p <- 10L - X <- matrix(rnorm(n * p), n, p, - dimnames = list(paste0("s", seq_len(n)), paste0("v", seq_len(p)))) - y <- X[, 1] * 1.2 + rnorm(n, sd = 0.6) - y <- matrix(y, ncol = 1L, dimnames = list(rownames(X), "ENSG_A")) - - # Fine-mapping hands over a shared partition + out-of-fold "susie" predictions. - part <- pecotmr:::.fmMakeSamplePartition(rownames(X), fold = 3L) - susiePred <- matrix(X[, 1] * 0.9, ncol = 1L, - dimnames = list(rownames(X), "ENSG_A")) - susiePerf <- matrix(c(0.5, 0.25, 0.24, 0.01, 0.4, 0.3), nrow = 1L, - dimnames = list("ENSG_A", - c("corr", "rsq", "adj_rsq", "pval", "RMSE", "MAE"))) - fmCv <- list(samplePartition = part, - prediction = list(susie_predicted = susiePred), - performance = list(susie_performance = susiePerf)) - - res <- suppressMessages(pecotmr:::.twasWeightsPipelineMatrix( - X = X, y = y, study = "study1", context = "brain", trait = "ENSG_A", - weightMethods = list(lasso_weights = list()), - cvFolds = 3, fineMappingCv = fmCv, ensemble = FALSE, verbose = 0)) - - # lasso was refit here; susie came from the handoff (not refit). - expect_true("lasso_predicted" %in% names(res$twasCvResult$prediction)) - expect_true("susie_predicted" %in% names(res$twasCvResult$prediction)) - # The CV used fine-mapping's partition verbatim. - expect_identical(res$twasCvResult$samplePartition, part) - # The handed-over susie predictions were aligned to the pipeline samples. - expect_equal(rownames(res$twasCvResult$prediction$susie_predicted), - rownames(X)) + P <- matrix(rnorm(20), 10, 2); y <- rnorm(10) + local_mocked_bindings(cv.glmnet = function(...) stop("boom"), .package = "glmnet") + expect_warning(z <- pecotmr:::.solveEnsembleGlmnet(P, y, 2L), "glmnet solver failed") + expect_equal(z, c(0.5, 0.5)) }) diff --git a/tests/testthat/test_vcfWriter.R b/tests/testthat/test_vcfWriter.R index ad531230..1147c3d9 100644 --- a/tests/testthat/test_vcfWriter.R +++ b/tests/testthat/test_vcfWriter.R @@ -292,3 +292,166 @@ test_that("writeSumstatsVcf(FineMappingResult): multi-row without split flags re expect_error(writeSumstatsVcf(fmr, out), "2 matching rows") }) + +# ============================================================================= +# GwasSumStats: multi-study selection, MAF/AF field, .gz rename +# ============================================================================= + +.make_two_study_gwas_sumstats <- function(n = 3, withMaf = FALSE) { + mkGr <- function() { + gr <- GenomicRanges::GRanges( + "chr1", + IRanges::IRanges(start = seq(100, by = 100, length.out = n), width = 1)) + mc <- S4Vectors::DataFrame( + SNP = paste0("rs", seq_len(n)), A1 = rep("A", n), A2 = rep("T", n), + Z = seq(1.5, by = -0.5, length.out = n), N = rep(1000L, n)) + if (withMaf) mc$MAF <- rep(0.2, n) + S4Vectors::mcols(gr) <- mc + gr + } + GwasSumStats( + study = c("studyA", "studyB"), entry = list(mkGr(), mkGr()), + genome = "hg38", ldSketch = make_test_genotype_handle()) +} + +test_that("writeSumstatsVcf(GwasSumStats): multi-study without `study` selector errors", { + skip_if_not_installed("VariantAnnotation") + ss2 <- .make_two_study_gwas_sumstats() + out <- tempfile(fileext = ".vcf") + expect_error(writeSumstatsVcf(ss2, out), "2 studies") +}) + +test_that("writeSumstatsVcf(GwasSumStats): `study` selector writes the chosen study", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + ss2 <- .make_two_study_gwas_sumstats() + out <- tempfile(fileext = ".vcf") + on.exit(unlink(out), add = TRUE) + res <- writeSumstatsVcf(ss2, out, study = "studyB") + expect_equal(res, out) + expect_true(file.exists(out)) +}) + +test_that("writeSumstatsVcf(GwasSumStats): emits the AF genotype field when MAF present", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + ss <- GwasSumStats( + study = "t", + entry = list(local({ + gr <- GenomicRanges::GRanges( + "chr1", IRanges::IRanges(start = c(100, 200, 300, 400), width = 1)) + S4Vectors::mcols(gr) <- S4Vectors::DataFrame( + SNP = paste0("rs", 1:4), A1 = rep("A", 4), A2 = rep("T", 4), + Z = c(1.5, 1.0, 0.5, 0.0), N = rep(1000L, 4), MAF = rep(0.2, 4)) + gr + })), + genome = "hg38", ldSketch = make_test_genotype_handle()) + out <- tempfile(fileext = ".vcf") + on.exit(unlink(out), add = TRUE) + writeSumstatsVcf(ss, out) + expect_true(file.exists(out)) + # The AF FORMAT field is declared in the header when MAF is present. + expect_true(any(grepl("ID=AF", readLines(out)))) +}) + +test_that("writeSumstatsVcf(GwasSumStats): .vcf.gz output is renamed from writeVcf's .bgz", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + ss <- make_test_gwas_sumstats(5) + out <- tempfile(fileext = ".vcf.gz") + on.exit(unlink(c(out, paste0(out, ".tbi")), force = TRUE), add = TRUE) + res <- writeSumstatsVcf(ss, out) + expect_equal(res, out) + # writeVcf always emits .bgz; the writer renames it to the requested .gz. + expect_true(file.exists(out)) + expect_false(file.exists(sub("\\.gz$", ".bgz", out))) +}) + +# ============================================================================= +# FineMappingResult: explicit selectors, no-match error, composite/empty tags +# ============================================================================= + +test_that("writeSumstatsVcf(FineMappingResult): explicit selectors pick a single row", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + out <- tempfile(fileext = ".vcf") + on.exit(unlink(out), add = TRUE) + res <- writeSumstatsVcf(fmr, out, study = "study1", context = "brain", + trait = "ENSG_A", method = "susie") + expect_equal(res, out) + expect_true(file.exists(out)) +}) + +test_that("writeSumstatsVcf(FineMappingResult): no matching rows errors", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + out <- tempfile(fileext = ".vcf") + expect_error(writeSumstatsVcf(fmr, out, study = "does_not_exist"), + "no rows match") +}) + +test_that("writeSumstatsVcf(FineMappingResult): splitByContext decorates composite .vcf.bgz paths", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + fmr <- .make_multi_tuple_qtl_fmr() + baseOut <- tempfile(fileext = ".vcf.bgz") + stem <- tools::file_path_sans_ext(tools::file_path_sans_ext(baseOut)) + on.exit(unlink(list.files(dirname(baseOut), pattern = basename(stem), + full.names = TRUE), force = TRUE), add = TRUE) + paths <- writeSumstatsVcf(fmr, baseOut, splitByContext = TRUE) + expect_length(paths, 2L) + # Composite extension is preserved while the context tag is injected before it. + expect_true(all(grepl("\\.vcf\\.bgz$", paths))) + expect_true(any(grepl("\\.brain\\.vcf\\.bgz$", paths))) + expect_true(any(grepl("\\.blood\\.vcf\\.bgz$", paths))) +}) + +test_that("writeSumstatsVcf(FineMappingResult): splitByContext on a context-less result keeps the path", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + # GwasFineMappingResult has no context/trait axes, so the split tag set is + # empty and the original output path is used unchanged. + fm <- make_test_finemapping_result(5) + out <- tempfile(fileext = ".vcf") + on.exit(unlink(out), add = TRUE) + paths <- writeSumstatsVcf(fm, out, splitByContext = TRUE) + expect_length(paths, 1L) + expect_equal(paths[[1L]], out) + expect_true(file.exists(out)) +}) + +test_that("writeSumstatsVcf(FineMappingResult): emits AF from the topLoci `af` column", { + skip_if_not_installed("VariantAnnotation") + skip_if_not_installed("Biostrings") + # The marginal view projects allele frequency to a column named `af`, so a + # topLoci carrying `af` must surface as the AF genotype field. + n <- 3 + tl <- data.frame( + variant_id = paste0("chr1:", c(100, 200, 300), ":T:A"), + chrom = rep("1", n), + pos = c(100L, 200L, 300L), + A1 = rep("A", n), + A2 = rep("T", n), + N = rep(1000, n), + af = c(0.1, 0.2, 0.3), + marginal_beta = c(0.3, -0.2, 0.1), + marginal_se = rep(0.05, n), + marginal_z = c(6.0, -4.0, 2.0), + marginal_p = c(1e-9, 6e-5, 0.045), + pip = c(0.9, 0.7, 0.5), + posterior_mean = rep(0.05, n), + posterior_sd = rep(0.02, n), + cs_95 = paste0("susie_", c(1L, 0L, 2L)), + stringsAsFactors = FALSE) + entry <- FineMappingEntry(variantIds = tl$variant_id, susieFit = list(), + topLoci = tl) + fm <- GwasFineMappingResult(study = "s", method = "susie", + entry = list(entry)) + out <- tempfile(fileext = ".vcf") + on.exit(unlink(out), add = TRUE) + writeSumstatsVcf(fm, out) + expect_true(file.exists(out)) + expect_true(any(grepl("ID=AF", readLines(out)))) +})