From 1c231ff97a75635e8ec06c04253e34f0eac7db79 Mon Sep 17 00:00:00 2001
From: Yining97 <liyining1997@gmail.com>
Date: Thu, 18 Jun 2026 23:58:10 -0400
Subject: [PATCH] RSS: drive univariate_rss.R through the current pecotmr
 camelCase API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Port the modular RSS script off the removed snake API (rss_analysis_pipeline,
load_study_LD, ...) to rssAnalysisPipeline/loadStudyLd with camelCase arguments so
the protocol runs against current pecotmr. Move R_finite/R_mismatch into
finemappingOpts, drop the unsupported init_L/max_L/l_step, map qc_method to
zMismatchQc, expose --min-abs-corr / --median-abs-corr (threaded through the SoS
notebook), coerce unset n-samples to 0, and fix two latent optparse parse_options
bugs. Verified on chr21 AD_Bellenguez: runs end-to-end, matches a manual
rssAnalysisPipeline call (ΔPIP=0), reproduces the step-1 result, and both purity
thresholds reach susie_get_cs.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../mnm_methods/rss_analysis.ipynb            |  10 +-
 .../pecotmr_integration/univariate_rss.R      | 125 ++++++++++--------
 2 files changed, 80 insertions(+), 55 deletions(-)

diff --git a/code/SoS/mnm_analysis/mnm_methods/rss_analysis.ipynb b/code/SoS/mnm_analysis/mnm_methods/rss_analysis.ipynb
index 6cb46eb19..967b8ffdb 100644
--- a/code/SoS/mnm_analysis/mnm_methods/rss_analysis.ipynb
+++ b/code/SoS/mnm_analysis/mnm_methods/rss_analysis.ipynb
@@ -50,7 +50,7 @@
     "kernel": "SoS"
    },
    "source": [
-    "## Output\n\nAll results are written under `--cwd`, one set of files per analyzed LD-block region:\n\n- `{qc_method}_{imputed}.chr{N}_{start}_{end}.univariate_susie_rss.rds` \u2014 the SuSiE-RSS fine-mapping result for that block (e.g. `SLALOM_RAISS_imputed.chr22_49355984_50799822.univariate_susie_rss.rds`), holding the `single_effect_regression`, `noqc`, `qc_only`, `qc_impute` and (where applicable) `conditional_regression_*` objects plus the QC'd / imputed `sumstats_*` tables; an extra diagnostic table is added when `--diagnostics` is set.\n- `{prefix}.sumstats.tsv.gz` \u2014 a companion harmonized summary-statistics table accompanying each RDS for convenience.\n- `{step_name}/*.pip_plot.png`, plus per-region `.stderr` / `.stdout` logs \u2014 the posterior-inclusion-probability plot and run logs.\n"
+    "## Output\n\nAll results are written under `--cwd`, one set of files per analyzed LD-block region:\n\n- `{qc_method}_{imputed}.chr{N}_{start}_{end}.univariate_susie_rss.rds` — the SuSiE-RSS fine-mapping result for that block (e.g. `SLALOM_RAISS_imputed.chr22_49355984_50799822.univariate_susie_rss.rds`), holding the `single_effect_regression`, `noqc`, `qc_only`, `qc_impute` and (where applicable) `conditional_regression_*` objects plus the QC'd / imputed `sumstats_*` tables; an extra diagnostic table is added when `--diagnostics` is set.\n- `{prefix}.sumstats.tsv.gz` — a companion harmonized summary-statistics table accompanying each RDS for convenience.\n- `{step_name}/*.pip_plot.png`, plus per-region `.stderr` / `.stdout` logs — the posterior-inclusion-probability plot and run logs.\n"
    ]
   },
   {
@@ -385,6 +385,10 @@
     "parameter: pip_cutoff = 0.025\n",
     "parameter: skip_analysis_pip_cutoff = 0.025\n",
     "parameter: minimum_ld = 5\n",
+    "# credible-set purity thresholds, routed to susie_get_cs by pecotmr\n",
+    "parameter: min_abs_corr = 0.8\n",
+    "# median_abs_corr is OR-logic purity; unset = off (needs susieR >= 0.16.5)\n",
+    "parameter: median_abs_corr = \"\"\n",
     "parameter: coverage = [0.95, 0.7, 0.5]\n",
     "# Whether to impute the sumstat for all the snp in LD but not in sumstat.\n",
     "parameter: impute = False \n",
@@ -434,6 +438,8 @@
     "        --lamb ${lamb} \\\n",
     "        --r2-threshold ${R2_threshold} \\\n",
     "        --minimum-ld ${minimum_ld} \\\n",
+    "        --min-abs-corr ${min_abs_corr} \\\n",
+    "        ${\"--median-abs-corr \" + str(median_abs_corr) if str(median_abs_corr) != \"\" else \"\"} \\\n",
     "        ${\"--qc-method \" + qc_method if qc_method else \"\"} \\\n",
     "        ${\"--comment-string \" + comment_string if comment_string != \"NULL\" else \"\"} \\\n",
     "        ${\"--diagnostics\" if diagnostics else \"\"} \\\n",
@@ -521,4 +527,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/code/script/pecotmr_integration/univariate_rss.R b/code/script/pecotmr_integration/univariate_rss.R
index b8593b11e..2bc728fda 100644
--- a/code/script/pecotmr_integration/univariate_rss.R
+++ b/code/script/pecotmr_integration/univariate_rss.R
@@ -59,7 +59,10 @@ option_specs <- list(
   option_spec("--lamb", type = "double", default = 0.01),
   option_spec("--r2-threshold", type = "double", default = 0.6),
   option_spec("--minimum-ld", type = "integer", default = 5),
+  option_spec("--min-abs-corr", type = "double", default = 0.8),
+  option_spec("--median-abs-corr", type = "character", default = ""),
   option_spec("--qc-method", type = "character", default = ""),
+  option_spec("--allele-flip-kriging", action = "store_true", default = FALSE),
   option_spec("--comment-string", type = "character", default = "NULL"),
   option_spec("--diagnostics", action = "store_true", default = FALSE),
   option_spec("--output-prefix", type = "character"),
@@ -128,9 +131,15 @@ parse_long_options <- function(specs) {
 parse_options <- function(specs) {
   if (requireNamespace("optparse", quietly = TRUE)) {
     option_list <- lapply(specs, function(spec) {
+      # store_true flags are logical in optparse; passing type = "character"
+      # (the option_spec default) makes optparse's validObject reject them.
+      opt_type <- if (identical(spec$action, "store_true")) "logical" else spec$type
       optparse::make_option(
-        spec$flag, type = spec$type, default = spec$default,
-        action = spec$action, help = spec$help
+        spec$flag, type = opt_type, default = spec$default,
+        action = spec$action,
+        # optparse requires a character help slot; option_specs without help
+        # default to NULL, which fails validObject.
+        help = if (is.null(spec$help)) "" else spec$help
       )
     })
     return(optparse::parse_args(optparse::OptionParser(option_list = option_list)))
@@ -168,6 +177,11 @@ parse_numeric_vec <- function(s, n) {
 n_samples <- parse_numeric_vec(opt[["n-samples"]], length(studies))
 n_cases <- parse_numeric_vec(opt[["n-cases"]], length(studies))
 n_controls <- parse_numeric_vec(opt[["n-controls"]], length(studies))
+# Unspecified counts (".") parse to NA; rssAnalysisPipeline expects 0 as the
+# "read from sumstats" sentinel and rejects NA, so coerce NA -> 0.
+n_samples[is.na(n_samples)] <- 0
+n_cases[is.na(n_cases)] <- 0
+n_controls[is.na(n_controls)] <- 0
 
 parse_character_vec <- function(s, n, default) {
   if (nchar(s) == 0) {
@@ -250,8 +264,8 @@ load_ld_for_region <- function(ld_meta_path, region_coord) {
       filter(`#chr` == region_chr, start == region_start, end == region_end) %>%
       pull(path) %>%
       stringr::str_replace(".bed", "")
-    LD_data <- pecotmr:::filter_X(
-      load_genotype_region(geno_path, region_coord),
+    LD_data <- pecotmr:::filterX(
+      loadGenotypeRegion(geno_path, region_coord),
       missing_rate_thresh = opt$imiss,
       maf_thresh = opt$maf
     ) %>%
@@ -264,11 +278,7 @@ load_ld_for_region <- function(ld_meta_path, region_coord) {
                 combined_LD_variants = rownames(LD_data)))
   }
 
-  if (exists("load_study_LD", mode = "function")) {
-    load_study_LD(ld_meta_path, region_coord)
-  } else {
-    load_LD_matrix(ld_meta_path, region_coord)
-  }
+  loadStudyLd(ld_meta_path, region_coord)
 }
 
 res <- setNames(replicate(length(studies), list(), simplify = FALSE), studies)
@@ -291,63 +301,72 @@ for (r in seq_along(res)) {
     }
     LD_data <- ld_cache[[ld_key]]
 
+    # Translate the fine-mapping method to the camelCase pecotmr value.
+    fm_method_map <- c(single_effect = "singleEffect", susie_rss = "susieRss",
+                       bayesian_conditional_regression = "bayesianConditionalRegression")
+    fm_method_raw <- opt[["finemapping-method"]]
+    finemapping_method <- if (nchar(fm_method_raw) == 0) {
+      NULL
+    } else if (fm_method_raw %in% names(fm_method_map)) {
+      unname(fm_method_map[[fm_method_raw]])
+    } else {
+      fm_method_raw
+    }
+
+    # Fine-mapping options are a free-form passthrough to susie_rss; the purity
+    # keys (min_abs_corr / median_abs_corr) are isolated by susieRssPipeline and
+    # routed to susie_get_cs. init_L / max_L / l_step are dropped (no susie_rss
+    # equivalent; L / L_greedy already carry the values). R_finite / R_mismatch
+    # ride here (no longer pipeline arguments).
+    finemapping_opts <- list(
+      L = finemapping_max_l,
+      L_greedy = finemapping_l,
+      coverage = coverage,
+      signal_cutoff = opt[["pip-cutoff"]],
+      min_abs_corr = opt[["min-abs-corr"]],
+      R_finite = opt[["ld-reference-size"]]
+    )
+    if (opt[["ld-mismatch-correction"]]) finemapping_opts$R_mismatch <- "eb"
+    if (nchar(opt[["median-abs-corr"]]) > 0) {
+      finemapping_opts$median_abs_corr <- as.numeric(opt[["median-abs-corr"]])
+    }
+
     rss_args <- list(
-      sumstat_path = sumstat_paths[r],
-      column_file_path = col_paths[r],
-      LD_data = LD_data,
-      extract_region_name = extract_region_name,
-      region_name_col = region_name_col,
-      n_sample = n_samples[r],
-      n_case = n_cases[r],
-      n_control = n_controls[r],
-      skip_region = if (length(skip_region_vec) == 0) NULL else skip_region_vec,
-      qc_method = if (nchar(opt[["qc-method"]]) == 0) NULL else opt[["qc-method"]],
+      sumstatPath = sumstat_paths[r],
+      columnFilePath = col_paths[r],
+      ldData = LD_data,
+      region = region_coord,
+      extractRegionName = extract_region_name,
+      regionNameCol = region_name_col,
+      nSample = n_samples[r],
+      nCase = n_cases[r],
+      nControl = n_controls[r],
+      skipRegion = if (length(skip_region_vec) == 0) NULL else skip_region_vec,
+      zMismatchQc = if (nchar(opt[["qc-method"]]) == 0) "none" else opt[["qc-method"]],
+      alleleFlipKriging = opt[["allele-flip-kriging"]],
       impute = opt$impute,
-      impute_opts = list(
+      imputeOpts = list(
         rcond = opt$rcond,
-        R2_threshold = opt[["r2-threshold"]],
-        minimum_ld = opt[["minimum-ld"]],
+        r2Threshold = opt[["r2-threshold"]],
+        minimumLd = opt[["minimum-ld"]],
         lamb = opt$lamb
       ),
-      finemapping_method = if (nchar(opt[["finemapping-method"]]) == 0) {
-        NULL
-      } else {
-        opt[["finemapping-method"]]
-      },
-      finemapping_opts = list(
-        L = finemapping_max_l,
-        L_greedy = finemapping_l,
-        init_L = finemapping_l,
-        max_L = finemapping_max_l,
-        l_step = finemapping_l_step,
-        coverage = coverage,
-        signal_cutoff = opt[["pip-cutoff"]],
-        min_abs_corr = 0.8
-      ),
-      pip_cutoff_to_skip = opt[["skip-analysis-pip-cutoff"]],
-      comment_string = if (opt[["comment-string"]] == "NULL") {
+      finemappingMethod = finemapping_method,
+      finemappingOpts = finemapping_opts,
+      pipCutoffToSkip = opt[["skip-analysis-pip-cutoff"]],
+      commentString = if (opt[["comment-string"]] == "NULL") {
         NULL
       } else {
         opt[["comment-string"]]
-      }
+      },
+      diagnostics = opt$diagnostics
     )
-    rss_formals <- names(formals(rss_analysis_pipeline))
-    accepts_extra_args <- "..." %in% rss_formals
-    if (accepts_extra_args || "diagnostics" %in% rss_formals) {
-      rss_args$diagnostics <- opt$diagnostics
-    }
-    if (accepts_extra_args || "R_finite" %in% rss_formals) {
-      rss_args$R_finite <- opt[["ld-reference-size"]]
-    }
-    if (accepts_extra_args || "R_mismatch" %in% rss_formals) {
-      rss_args$R_mismatch <- if (opt[["ld-mismatch-correction"]]) "eb" else NULL
-    }
-    res[[r]] <- do.call(rss_analysis_pipeline, rss_args)
+    res[[r]] <- do.call(rssAnalysisPipeline, rss_args)
     region_label <- paste0(
       opt[["output-prefix"]], ".",
       if (is.null(extract_region_name)) "" else paste0(extract_region_name, "."),
       studies[r], ".sumstats.tsv.gz")
-    fwrite(res[[r]]$rss_data_analyzed, file = region_label,
+    fwrite(res[[r]]$rssDataAnalyzed, file = region_label,
            sep = "\t", col.names = TRUE, row.names = FALSE,
            quote = FALSE, compress = "gzip")
     if (is.null(res[[r]][[1]])) {