From 9e3eb54944d3a6316c95c1775f926ddaaeb05cd0 Mon Sep 17 00:00:00 2001
From: Daniel Nachun <daniel.nachun@gmail.com>
Date: Fri, 26 Jun 2026 23:18:54 -0700
Subject: [PATCH] mvsusie fixes

---
 .../mnm_methods/mnm_regression.ipynb          | 557 +++++-------------
 .../script/pecotmr_integration/fine_mapping.R | 121 +++-
 .../pecotmr_integration/region_manifest.R     | 161 +++++
 .../script/pecotmr_integration/twas_weights.R |  25 +
 4 files changed, 429 insertions(+), 435 deletions(-)
 create mode 100644 code/script/pecotmr_integration/region_manifest.R

diff --git a/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb
index 075cd8a8..00f7449b 100644
--- a/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb
+++ b/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb
@@ -313,7 +313,7 @@
    "source": [
     "### Multivariate analysis: mvSuSiE and mr.mash (`mnm`)\n",
     "\n",
-    "Fine-maps multiple molecular phenotypes jointly across a shared window using mvSuSiE with mr.mash priors. This step needs **multiple contexts** (e.g. the same genes measured under two or more conditions). The toy set ships a single context, so for this MWE we add a **second, synthetic context** (`example/colocboost_ctx2.bed.gz`, derived from context 1 with added noise \u2014 *demo data only, not a real measurement*) and a multi-context phenotype manifest. With that in place the `mnm` step now runs end-to-end on the toy data and produces `multicontext_bvsr.rds`, `multicontext_twas_weights.rds`, and `multicontext_data.rds` per gene."
+    "Fine-maps multiple molecular phenotypes jointly across a shared window using mvSuSiE with mr.mash priors. This step needs **multiple contexts** (e.g. the same genes measured under two or more conditions). The toy set ships a single context, so for this MWE we add a **second, synthetic context** (`example/colocboost_ctx2.bed.gz`, derived from context 1 with added noise — *demo data only, not a real measurement*) and a multi-context phenotype manifest. With that in place the `mnm` step now runs end-to-end on the toy data and produces `multicontext_bvsr.rds`, `multicontext_twas_weights.rds`, and `multicontext_data.rds` per gene."
    ]
   },
   {
@@ -795,79 +795,45 @@
    "outputs": [],
    "source": [
     "[mnm]\n",
-    "# Prior model file generated from mashr. \n",
-    "# Default will be used if it does not exist.\n",
-    "parameter: mixture_prior = path()\n",
-    "parameter: mixture_prior_cv = path()\n",
-    "parameter: prior_weights_min = 5e-4\n",
-    "parameter: prior_canonical_matrices = False\n",
-    "parameter: sample_partition = path() \n",
+    "# Multivariate cross-context fine-mapping with mvSuSiE over the pre-built\n",
+    "# QtlDataset (from qtl_dataset_construct), one gene per fan-out unit. Each gene's\n",
+    "# contexts are fit jointly (jointSpecification=\"context\"). The canonical mixture\n",
+    "# prior is used by default; pass --prior-twas-weights (a TwasWeights RDS from a\n",
+    "# preceding `twas_weights.R --methods mrmash` run) for the mr.mash data-driven\n",
+    "# reweighted prior. Replaces the legacy mnm.R + multivariate_analysis_pipeline.\n",
+    "parameter: cis_window = 1000000\n",
     "parameter: mvsusie_max_iter = 200\n",
-    "parameter: mrmash_max_iter = 5000\n",
-    "depends: sos_variable(\"regional_data\")\n",
-    "# Check if both 'data' and 'meta_info' are empty lists\n",
-    "stop_if(len(regional_data['data']) == 0, f'Either genotype or phenotype data are not available for region {\", \".join(region_name)}.')\n",
-    "\n",
-    "meta_info = regional_data['meta_info']\n",
-    "\n",
-    "input: regional_data[\"data\"], group_by = lambda x: group_by_region(x, regional_data[\"data\"]), group_with = \"meta_info\"\n",
-    "if skip_fine_mapping and skip_twas_weights:\n",
-    "    save_data = True\n",
-    "    output_files = [f'{cwd:a}/data/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_data.rds']\n",
-    "elif not skip_fine_mapping and skip_twas_weights:\n",
-    "    output_files = [f'{cwd:a}/multivariate_fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_bvsr.rds']\n",
-    "elif skip_fine_mapping and not skip_twas_weights:\n",
-    "    # Warning that fine-mapping is required for TWAS weights\n",
-    "    if not ld_reference_meta_file.is_dir():\n",
-    "        print(\"WARNING: Fine-mapping is required to generate TWAS weights. Setting skip_fine_mapping to False. \"\n",
-    "              \"Fine-mapping will be performed and saved on variants listed in %s.\" % ld_reference_meta_file)\n",
-    "    else:\n",
-    "        print(\"WARNING: Fine-mapping is required to generate TWAS weights. Setting skip_fine_mapping to False.\")\n",
-    "    skip_fine_mapping = False\n",
-    "    output_files = [f'{cwd:a}/multivariate_fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_bvsr.rds',\n",
-    "                    f'{cwd:a}/multivariate_twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_twas_weights.rds']\n",
-    "else:    \n",
-    "    output_files = [f'{cwd:a}/multivariate_fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_bvsr.rds',\n",
-    "                    f'{cwd:a}/multivariate_twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multicontext_twas_weights.rds']   \n",
-    "output: output_files\n",
-    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n",
-    "bash: expand= \"${ }\", stderr = f\"{_output[0]:nn}.mnm.stderr\", stdout = f\"{_output[0]:nn}.mnm.stdout\", container = container, entrypoint = entrypoint\n",
-    "    Rscript ${modular_script_dir}/mnm_analysis/mnm_methods/mnm.R \\\n",
-    "        --genotype ${_input[0]:a} \\\n",
-    "        --phenotype \"${\",\".join([str(x.absolute()) for x in _input[1:len(_input)//2+1]])}\" \\\n",
-    "        --covariate \"${\",\".join([str(x.absolute()) for x in _input[len(_input)//2+1:]])}\" \\\n",
-    "        --region \"${_meta_info[0]}\" \\\n",
-    "        --window \"${_meta_info[1]}\" \\\n",
-    "        --region-name \"${_meta_info[2]}\" \\\n",
-    "        --extract-region-names \"${\"|\".join([x if isinstance(x,str) else \",\".join(x) for x in _meta_info[3]])}\" \\\n",
-    "        --conditions \"${\",\".join(_meta_info[4:])}\" \\\n",
-    "        --skip-analysis-pip-cutoff \"${\",\".join(skip_analysis_pip_cutoff)}\" \\\n",
-    "        --maf ${maf} \\\n",
-    "        --mac ${mac} \\\n",
-    "        --imiss ${imiss} \\\n",
-    "        ${\"--indel\" if indel else \"\"} \\\n",
-    "        ${\"--keep-samples \" + str(keep_samples) if keep_samples.is_file() else \"\"} \\\n",
-    "        ${\"--keep-variants \" + str(keep_variants) if not keep_variants.is_dir() else \"\"} \\\n",
-    "        ${\"--save-data\" if save_data else \"\"} \\\n",
+    "parameter: pip_cutoff_to_skip = 0.0\n",
+    "# Comma-separated context names to restrict to; empty = all contexts.\n",
+    "parameter: contexts = \"\"\n",
+    "# Optional TwasWeights RDS (preceding mr.mash run) supplying the data-driven prior.\n",
+    "parameter: prior_twas_weights = path()\n",
+    "parameter: prior_weights_min = 1e-10\n",
+    "# Optional JSON of extra mvsusie kwargs (overrides the max_iter default), e.g. '{\"mvsusie\":{\"max_iter\":500}}'.\n",
+    "parameter: mvsusie_method_args = \"\"\n",
+    "fail_if(len(region_name) == 0, \"mnm: pass --region-name <gene-id> [<gene-id> ...] to choose which gene(s) to analyze.\")\n",
+    "qtl_dataset = path(f\"{cwd:a}/qtl_dataset/{name}.qtl_dataset.rds\")\n",
+    "mvsusie_args = mvsusie_method_args if mvsusie_method_args else f'{{\"mvsusie\":{{\"max_iter\":{mvsusie_max_iter}}}}}'\n",
+    "input: qtl_dataset, for_each = \"region_name\"\n",
+    "output: f\"{cwd:a}/multivariate_fine_mapping/{name}.{_region_name}.multicontext_bvsr.rds\"\n",
+    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n",
+    "bash: expand = \"${ }\", stderr = f\"{_output:n}.stderr\", stdout = f\"{_output:n}.stdout\", container = container, entrypoint = entrypoint\n",
+    "    Rscript ${modular_script_dir}/pecotmr_integration/fine_mapping.R \\\n",
+    "        --qtl-dataset ${_input} \\\n",
+    "        --gene-id ${_region_name} \\\n",
+    "        --cis-window ${cis_window} \\\n",
+    "        --methods mvsusie \\\n",
+    "        --joint-specification context \\\n",
+    "        --coverage ${coverage[0]} \\\n",
+    "        ${(\"--secondary-coverage \" + \",\".join([str(x) for x in coverage[1:]])) if len(coverage) > 1 else \"\"} \\\n",
     "        --pip-cutoff ${pip_cutoff} \\\n",
-    "        --coverage \"${\",\".join([str(x) for x in coverage])}\" \\\n",
+    "        --pip-cutoff-to-skip ${pip_cutoff_to_skip} \\\n",
     "        --seed ${seed} \\\n",
-    "        --cwd ${cwd:a} \\\n",
-    "        ${\"--skip-fine-mapping\" if skip_fine_mapping else \"\"} \\\n",
-    "        ${\"--skip-twas-weights\" if skip_twas_weights else \"\"} \\\n",
-    "        --xvar-cutoff ${xvar_cutoff} \\\n",
-    "        --mvsusie-max-iter ${mvsusie_max_iter} \\\n",
-    "        --mrmash-max-iter ${mrmash_max_iter} \\\n",
-    "        --max-cv-variants ${max_cv_variants} \\\n",
-    "        --twas-cv-folds ${twas_cv_folds} \\\n",
-    "        --twas-cv-threads ${twas_cv_threads} \\\n",
-    "        ${\"--ld-reference-meta-file \" + str(ld_reference_meta_file) if not ld_reference_meta_file.is_dir() else \"\"} \\\n",
-    "        ${\"--mixture-prior \" + str(mixture_prior) if mixture_prior.is_file() else \"\"} \\\n",
-    "        ${\"--mixture-prior-cv \" + str(mixture_prior_cv) if mixture_prior_cv.is_file() else \"\"} \\\n",
-    "        --prior-weights-min ${prior_weights_min} \\\n",
-    "        ${\"--prior-canonical-matrices\" if prior_canonical_matrices else \"\"} \\\n",
-    "        ${\"--sample-partition \" + str(sample_partition) if sample_partition.is_file() else \"\"} \\\n",
-    "        --output-prefix ${_output[0]:nn}\n"
+    "        ${(\"--contexts \" + contexts) if contexts else \"\"} \\\n",
+    "        --method-args '${mvsusie_args}' \\\n",
+    "        ${(\"--twas-weights \" + str(prior_twas_weights)) if prior_twas_weights.is_file() else \"\"} \\\n",
+    "        --data-driven-prior-weights-cutoff ${prior_weights_min} \\\n",
+    "        --output ${_output}\n"
    ]
   },
   {
@@ -878,295 +844,59 @@
    },
    "outputs": [],
    "source": [
-    "[mnm_genes]\n",
-    "depends: sos_variable(\"regional_data\")\n",
-    "# Check if both 'data' and 'meta_info' are empty lists\n",
-    "stop_if(len(regional_data['data']) == 0, f'Either genotype or phenotype data are not available for region {\", \".join(region_name)}.')\n",
-    " \n",
-    "meta_info = regional_data['meta_info']\n",
-    "genes = meta_info[0][3]\n",
-    "if isinstance(genes, tuple):\n",
-    "    genes = genes[0] if isinstance(genes[0], tuple) else list(genes)\n",
-    "else:\n",
-    "    genes = [genes]\n",
-    "\n",
-    "if len(skip_analysis_pip_cutoff) == 0:\n",
-    "    skip_analysis_pip_cutoff = [0.0] * len(genes)\n",
-    "\n",
-    "if len(skip_analysis_pip_cutoff) == 1:\n",
-    "    skip_analysis_pip_cutoff = skip_analysis_pip_cutoff * len(genes)\n",
-    "\n",
-    "skip_analysis_pip_cutoff = [\"'{}'={}\".format(genes[i], skip_analysis_pip_cutoff[i].split('=')[1]) for i in range(len(genes))]\n",
-    "\n",
-    "parameter: pheno_id_map_file = path\n",
-    "parameter: fine_mapping_meta = path\n",
-    "parameter: data_driven_prior = False\n",
-    "# Parameters below are relevant when we use data driven prior\n",
-    "# which is an experimental feature\n",
-    "parameter: n_random = 10\n",
-    "parameter: n_null = 10\n",
-    "parameter: independent_variant_list = path()\n",
-    "parameter: prior_weights_min = 5e-4\n",
-    "parameter: prior_canonical_matrices = False\n",
-    "# This is relevant to cross-validation\n",
-    "parameter: sample_partition = path() \n",
+    "[mnm_genes_1]\n",
+    "# Resolve per-locus regions (region_id -> chr:start-end) into a manifest via\n",
+    "# region_manifest.R for the cross-gene (jointSpecification=\"trait\") mvSuSiE fit.\n",
+    "# Loci come from --customized-association-windows (or --region-list); restrict\n",
+    "# with --region-name. Falls back to per-gene cis windows from the manifest.\n",
+    "input: None\n",
+    "output: f\"{cwd:a}/multivariate_fine_mapping/{name}.mnm_genes_region_manifest.tsv\"\n",
+    "task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n",
+    "bash: expand = \"${ }\", stderr = f\"{_output}.stderr\", stdout = f\"{_output}.stdout\", container = container, entrypoint = entrypoint\n",
+    "    Rscript ${modular_script_dir}/pecotmr_integration/region_manifest.R \\\n",
+    "        --pheno-manifest ${phenoFile[0]} \\\n",
+    "        --cis-window ${cis_window if cis_window > 0 else 1000000} \\\n",
+    "        ${(\"--customized-association-windows \" + str(customized_association_windows)) if customized_association_windows.is_file() else \"\"} \\\n",
+    "        ${(\"--region-name \" + \",\".join(region_name)) if len(region_name) > 0 else \"\"} \\\n",
+    "        ${(\"--region-list \" + str(region_list)) if region_list.is_file() else \"\"} \\\n",
+    "        --output ${_output}\n",
+    "\n",
+    "[mnm_genes_2]\n",
+    "# Cross-gene (multi-trait) mvSuSiE per locus: region mode +\n",
+    "# jointSpecification=\"trait\" joins the genes whose coordinates overlap the\n",
+    "# locus. Optional mr.mash data-driven prior via --prior-twas-weights (canonical\n",
+    "# otherwise). Replaces the legacy multigene combine + multigene_udr machinery.\n",
     "parameter: mvsusie_max_iter = 200\n",
-    "parameter: mrmash_max_iter = 5000\n",
-    "\n",
-    "if not data_driven_prior:\n",
-    "     prior_canonical_matrices = True\n",
-    "input: regional_data[\"data\"],group_by = lambda x: group_by_region(x, regional_data[\"data\"]), group_with = \"meta_info\"\n",
-    "\n",
-    "if skip_fine_mapping and skip_twas_weights:\n",
-    "    save_data = True\n",
-    "    output_files = [f'{cwd:a}/data/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multigene_data.rds']\n",
-    "elif not skip_fine_mapping and skip_twas_weights:\n",
-    "    output_files = [f'{cwd:a}/multivariate_fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multigene_bvsr.rds']\n",
-    "elif skip_fine_mapping and not skip_twas_weights:\n",
-    "    output_files = [f'{cwd:a}/multivariate_twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multigene_twas_weights.rds']\n",
-    "else:    \n",
-    "    output_files = [f'{cwd:a}/multivariate_fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multigene_bvsr.rds',\n",
-    "    f'{cwd:a}/multivariate_twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.multigene_twas_weights.rds']    \n",
-    "output: output_files\n",
-    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'\n",
-    "R: expand = '${ }', stdout = f\"{_output[0]:nn}.mnm_genes.stdout\", stderr = f\"{_output[0]:nn}.mnm_genes.stderr\", container = container, entrypoint = entrypoint\n",
-    "\n",
-    "    library(data.table)\n",
-    "    library(dplyr)\n",
-    "    library(pecotmr)\n",
-    "    combine_result_list <- function(univariate_finemapping_metadata, condition_name, conditions = NULL, extracted_region){\n",
-    "        regional_window_combined_susie_result <- list()\n",
-    "        regional_window_combined_sumstats_result <- list()\n",
-    "        extracted_univariate_finemapping_metadata <- univariate_finemapping_metadata%>%filter(region_id%in%extracted_region)\n",
-    "        for(i in 1:dim(extracted_univariate_finemapping_metadata)[1]){\n",
-    "           gene_id <- extracted_univariate_finemapping_metadata$region_id[i]\n",
-    "           susie_result <- readRDS(extracted_univariate_finemapping_metadata$combined_data[i])\n",
-    "           sumstats_result <- readRDS(extracted_univariate_finemapping_metadata$combined_data_sumstats[i]) \n",
-    "           if (!is.null(conditions)) {\n",
-    "              if (any(names(susie_result[[gene_id]]) %in% conditions)) {\n",
-    "                  extracted_conditions = names(susie_result[[gene_id]])[names(susie_result[[gene_id]]) %in% conditions]\n",
-    "                 for (j in 1:length(extracted_conditions)){\n",
-    "                     regional_window_combined_susie_result[[condition_name]][[extracted_conditions[j]]] <- susie_result[[gene_id]][[extracted_conditions[j]]]\n",
-    "                     regional_window_combined_sumstats_result[[condition_name]][[extracted_conditions[j]]] <- sumstats_result[[gene_id]][[extracted_conditions[j]]]\n",
-    "                 }\n",
-    "              }\n",
-    "           } else {\n",
-    "                    regional_window_combined_susie_result[[condition_name]][[gene_id]] <- susie_result[[gene_id]][[condition_name]]\n",
-    "                    regional_window_combined_sumstats_result[[condition_name]][[gene_id]] <- sumstats_result[[gene_id]][[condition_name]]\n",
-    "                  }\n",
-    "          }       \n",
-    "           return(list(regional_window_combined_susie_result = regional_window_combined_susie_result, regional_window_combined_sumstats_result = regional_window_combined_sumstats_result))\n",
-    "     }\n",
-    "\n",
-    "  \n",
-    "    extract_non_null_cs_list <- function(combined_list, condition_name, extracted_region) {\n",
-    "        # List to store results for genes passing the first step\n",
-    "        extracted_regional_window_combined_susie_result <- list()\n",
-    "        extracted_regional_window_combined_sumstats_result <- list()\n",
-    "\n",
-    "        # Iterate over each gene's susie and sumstats results\n",
-    "        filtered_region <- extracted_region[extracted_region%in%names(combined_list[[\"regional_window_combined_susie_result\"]][[condition_name]])]\n",
-    "        for (gene_id in filtered_region) {\n",
-    "            susie_result <- get_nested_element(combined_list,c(\"regional_window_combined_susie_result\",condition_name,gene_id))\n",
-    "\n",
-    "           # Check if there exits top_loci.\n",
-    "           if (!is.null(susie_result[[\"top_loci\"]])&&nrow(susie_result[[\"top_loci\"]])[1]!=0) {\n",
-    "              # If not all zeros, add this gene's susie and sumstats results to the lists\n",
-    "              extracted_regional_window_combined_susie_result[[condition_name]][[gene_id]] <- get_nested_element(combined_list,c(\"regional_window_combined_susie_result\",condition_name,gene_id))\n",
-    "              extracted_regional_window_combined_sumstats_result[[condition_name]][[gene_id]] <- get_nested_element(combined_list,c(\"regional_window_combined_sumstats_result\",condition_name,gene_id))\n",
-    "           }\n",
-    "         }\n",
-    "         return(list(extracted_regional_window_combined_susie_result = extracted_regional_window_combined_susie_result, extracted_regional_window_combined_sumstats_result = extracted_regional_window_combined_sumstats_result))\n",
-    "     }\n",
-    "  \n",
-    "    process_and_compare_variants <- function(combined_list, condition_name) {\n",
-    "       extracted_regional_window_combined_susie_result <- list()\n",
-    "       extracted_regional_window_combined_sumstats_result <- list()\n",
-    "       compare_variants <- function(variants1, variants2) {\n",
-    "            return(any(variants1 %in% variants2) | any(variants2 %in% variants1))\n",
-    "       }\n",
-    "       subset_top_loci_variants <- function(df) {\n",
-    "          find_non_zero_rows <- function(df) {\n",
-    "                non_zero_indices <- which(rowSums(df!= 0) > 0)\n",
-    "                return(non_zero_indices)\n",
-    "           }\n",
-    "           non_zero_indices <- find_non_zero_rows(df)\n",
-    "\n",
-    "           if (length(non_zero_indices) > 0) {\n",
-    "              non_zero_index_variants <- df[non_zero_indices, , drop = FALSE]%>%select(variant_id)%>%pull(variant_id)\n",
-    "              zero_index_filtered_variants <- df[-non_zero_indices, ,drop = FALSE]%>%filter(pip>=0.01)%>%select(variant_id)%>%pull(variant_id)\n",
-    "              top_loci_variants <- c(non_zero_index_variants, zero_index_filtered_variants)\n",
-    "           } else {\n",
-    "              zero_index_filtered_variants <- df%>%filter(pip>=0.01)%>%select(variant_id)%>%pull(variant_id)\n",
-    "              top_loci_variants <- zero_index_filtered_variants\n",
-    "           }\n",
-    "             return(top_loci_variants)\n",
-    "       }\n",
-    "  \n",
-    "       for (i in 1:length(combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]])) {\n",
-    "           susie_result_1 <- combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]][[i]]\n",
-    "           gene_id <- names(combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]])[i]\n",
-    "    \n",
-    "           # Extract top_loci variants\n",
-    "           variants1 <- subset_top_loci_variants(susie_result_1[[\"top_loci\"]])\n",
-    "           if (length(variants1) > 0) {\n",
-    "              different_in_all_genes <- TRUE\n",
-    "    \n",
-    "           for (j in 1:length(combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]])) {\n",
-    "               if (i != j) {\n",
-    "                  susie_result_2 <- combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]][[j]]\n",
-    "                  variants2 <- subset_top_loci_variants(susie_result_2[[\"top_loci\"]])\n",
-    "        \n",
-    "                  # Compare variants\n",
-    "                  if (length(variants2) > 0) {\n",
-    "                    if (compare_variants(variants1, variants2)) {\n",
-    "                        # If variants are not totally different across all genes, set the flag to FALSE\n",
-    "                       different_in_all_genes <- FALSE\n",
-    "                       break\n",
-    "                     }\n",
-    "                   }\n",
-    "                 }\n",
-    "               }\n",
-    "    \n",
-    "            # After the loop, check the flag\n",
-    "            if (!different_in_all_genes) {\n",
-    "               extracted_regional_window_combined_susie_result[[condition_name]][[gene_id]] <- get_nested_element(combined_list, c(\"extracted_regional_window_combined_susie_result\", condition_name, gene_id))\n",
-    "               extracted_regional_window_combined_sumstats_result[[condition_name]][[gene_id]] <- get_nested_element(combined_list, c(\"extracted_regional_window_combined_sumstats_result\", condition_name, gene_id))\n",
-    "            }\n",
-    "          }\n",
-    "        }\n",
-    "      return(list(extracted_regional_window_combined_susie_result = extracted_regional_window_combined_susie_result, extracted_regional_window_combined_sumstats_result = extracted_regional_window_combined_sumstats_result))\n",
-    "    }\n",
-    "\n",
-    "    fine_mapping_metadata = fread(${fine_mapping_meta:r})\n",
-    "    phenotype_files = c(${\",\".join(['\"%s\"' % x.absolute() for x in _input[1:len(_input)//2+1]])})\n",
-    "    covariate_files = c(${\",\".join(['\"%s\"' % x.absolute() for x in _input[len(_input)//2+1:]])})\n",
-    "    condition_name = ${(\"'%s'\" % _meta_info[-1])}\n",
-    "    region_names = ${\",\".join([(\"c('\"+x+\"')\") if isinstance(x, str) else (\"c\"+ str(x)) for x in _meta_info[3]])}\n",
-    "    if (any(region_names %in% fine_mapping_metadata$region_id)) {\n",
-    "       filtered_region <- fine_mapping_metadata%>%filter(region_id%in%region_names)%>%select(region_id)%>%pull(region_id)\n",
-    "       filtered_event_names <- filtered_region\n",
-    "    } else {\n",
-    "       pheno_id_map <- fread(${pheno_id_map_file:r})\n",
-    "       gene_ids <- pheno_id_map%>%filter(ID%in%region_names)%>%select(gene)%>%pull(gene)%>%{.[!duplicated(.)]}\n",
-    "       filtered_region <- fine_mapping_metadata%>%filter(region_id%in%gene_ids)%>%select(region_id)%>%pull(region_id)\n",
-    "       filtered_event_names <- pheno_id_map%>%filter(ID%in%region_names&gene%in%filtered_region)%>%select(ID)%>%pull(ID)\n",
-    "     }\n",
-    "    keep_samples = NULL\n",
-    "    if (${\"TRUE\" if keep_samples.is_file() else \"FALSE\"}) {\n",
-    "      keep_samples = unlist(strsplit(readLines(${keep_samples:ar}), \"\\\\s+\"))\n",
-    "      message(paste(length(keep_samples), \"samples are selected to be loaded for analysis\"))\n",
-    "    }\n",
-    "                                     \n",
-    "    fdat = load_regional_multivariate_data(genotype = ${_input[0]:anr},\n",
-    "                                          phenotype = phenotype_files,\n",
-    "                                          covariate = covariate_files,\n",
-    "                                          region = ${(\"'%s'\" % _meta_info[1]) if int(_meta_info[1].split('-')[-1])>0 else 'NULL'},\n",
-    "                                          maf_cutoff = ${maf},       \n",
-    "                                          mac_cutoff = ${mac},\n",
-    "                                          imiss_cutoff = ${imiss},\n",
-    "                                          conditions = NULL,\n",
-    "                                          association_window = ${(\"'%s'\" % _meta_info[1]) if int(_meta_info[1].split('-')[-1])>0 else 'NULL'},\n",
-    "                                          extract_region_name = list(filtered_event_names),\n",
-    "                                          region_name_col = ${\"4\" if int(_meta_info[1].split('-')[-1])>0 else \"1\"},\n",
-    "                                          keep_indel = ${\"TRUE\" if indel else \"FALSE\"},\n",
-    "                                          keep_samples = keep_samples,\n",
-    "                                          keep_variants = ${'\"%s\"' % keep_variants if not keep_variants.is_dir() else \"NULL\"},\n",
-    "                                          phenotype_header = ${\"4\" if int(_meta_info[1].split('-')[-1])>0 else \"1\"}, # skip first 4 rows of transposed phenotype for chr, start, end and ID\n",
-    "                                          scale_residuals = FALSE)\n",
-    "    if (${\"TRUE\" if save_data else \"FALSE\"}) {\n",
-    "        saveRDS(fdat, \"${_output[0]:ann}.multigene_data.rds\", compress='xz')\n",
-    "    }\n",
-    "    if (${\"FALSE\" if skip_fine_mapping and skip_twas_weights else \"TRUE\"}) {\n",
-    "        # Extract and consolidate univariate results to determine which genes to focus on\n",
-    "        if (grepl(\"sQTL\", condition_name)) {conditions <- paste(condition_name, filtered_event_names, sep = \"_\")} else {conditions <- NULL}\n",
-    "        combined_regional_window_list <- combine_result_list(fine_mapping_metadata, condition_name, conditions, filtered_region)\n",
-    "        if (grepl(\"sQTL\",condition_name)) { filtered_region <- paste(condition_name, filtered_event_names, sep = \"_\") }\n",
-    "        extracted_non_null_combined_susie_list <- extract_non_null_cs_list(combined_list = combined_regional_window_list, condition_name, filtered_region)\n",
-    "        if (length(extracted_non_null_combined_susie_list[[\"extracted_regional_window_combined_susie_result\"]])!=0) {\n",
-    "           extracted_combined_susie_list <- process_and_compare_variants(combined_list = extracted_non_null_combined_susie_list, condition_name)\n",
-    "        } else {\n",
-    "           extracted_combined_susie_list <- NULL\n",
-    "        }\n",
-    "        if (!is.null(extracted_combined_susie_list)&length(extracted_combined_susie_list[[\"extracted_regional_window_combined_susie_result\"]])!=0) {\n",
-    "           if (grepl(\"sQTL\", condition_name)) {\n",
-    "              filtered_event_names <- sub(paste0(\"^\", condition_name, \"_\"), \"\", names(get_nested_element(extracted_combined_susie_list,c(\"extracted_regional_window_combined_susie_result\", condition_name))))\n",
-    "           }  else if (grepl(\"pQTL\", condition_name)) {\n",
-    "              filtered_event_names <- pheno_id_map %>%\n",
-    "                              filter(ID%in%region_names)%>%\n",
-    "                              filter(gene %in% names(get_nested_element(extracted_combined_susie_list, c(\"extracted_regional_window_combined_susie_result\", condition_name)))) %>%\n",
-    "                              select(ID) %>% pull(ID)\n",
-    "          } else {\n",
-    "            filtered_event_names <- names(get_nested_element(extracted_combined_susie_list, c(\"extracted_regional_window_combined_susie_result\", condition_name)))\n",
-    "          }\n",
-    "        } else {\n",
-    "          filtered_event_names <- NULL\n",
-    "        }\n",
-    "\n",
-    "        if (\"${_meta_info[2]}\" != \"${_meta_info[3]}\") {\n",
-    "          region_name = c(\"${_meta_info[2]}\", c(${\",\".join([(\"c('\"+x+\"')\") if isinstance(x, str) else (\"c\"+ str(x)) for x in _meta_info[3]])}))\n",
-    "        } else {\n",
-    "          region_name = \"${_meta_info[2]}\"\n",
-    "        }\n",
-    "\n",
-    "        region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n",
-    "            \n",
-    "        dd_prior <- NULL\n",
-    "        if (${\"TRUE\" if data_driven_prior else \"FALSE\"}) {\n",
-    "            # FIXME: please complete this function call\n",
-    "            dd_prior <- multigene_udr(extracted_combined_susie_list, coverage = paste0(\"cs_coverage_\",${coverage[0]},sep=\"\"), independent_variant_list = ${independent_variant_list:r}, n_random = ${n_random}, n_null = ${n_null}, seed = ${seed})\n",
-    "        }\n",
-    "        if (length(filtered_event_names)>=2) {\n",
-    "           pip_cutoff_to_skip = list(${\",\".join(skip_analysis_pip_cutoff)})\n",
-    "           pip_cutoff_to_skip = unlist(pip_cutoff_to_skip[filtered_event_names])\n",
-    "           dd_prior_cv <- dd_prior\n",
-    "           set.seed(${seed})\n",
-    "           result <- multivariate_analysis_pipeline(\n",
-    "                            X=fdat$X,\n",
-    "                            Y=fdat$residual_Y[,filtered_event_names],\n",
-    "                            maf=fdat$maf,\n",
-    "                            X_variance = fdat$X_variance,\n",
-    "                            other_quantities = list(dropped_samples = fdat$dropped_samples),\n",
-    "                            # filters\n",
-    "                            imiss_cutoff = ${imiss}, \n",
-    "                            maf_cutoff = ${maf},\n",
-    "                            xvar_cutoff = ${xvar_cutoff},\n",
-    "                            ld_reference_meta_file = ${('\"%s\"' % ld_reference_meta_file) if not ld_reference_meta_file.is_dir() else \"NULL\"},\n",
-    "                            pip_cutoff_to_skip = pip_cutoff_to_skip,\n",
-    "                            # methods parameter configuration\n",
-    "                            max_L = -1,\n",
-    "                            data_driven_prior_matrices = dd_prior, \n",
-    "                            data_driven_prior_matrices_cv = dd_prior_cv, \n",
-    "                            data_driven_prior_weights_cutoff = ${prior_weights_min}, \n",
-    "                            canonical_prior_matrices = ${\"TRUE\" if prior_canonical_matrices else \"FALSE\"},\n",
-    "                            mvsusie_max_iter = ${mvsusie_max_iter}, \n",
-    "                            estimate_residual_variance = ${'TRUE' if estimate_residual_variance else 'FALSE'},\n",
-    "                            mrmash_max_iter = ${mrmash_max_iter},\n",
-    "                            # fine-mapping results summary\n",
-    "                            signal_cutoff = ${pip_cutoff},\n",
-    "                            coverage = c(${\",\".join([str(x) for x in coverage])}),\n",
-    "                            # TWAS weights and CV for TWAS weights\n",
-    "                            twas_weights = ${\"TRUE\" if not skip_twas_weights else \"FALSE\"},\n",
-    "                            sample_partition = ${\"NULL\" if sample_partition=='.' or sample_partition=='' else sample_partition},\n",
-    "                            max_cv_variants = ${max_cv_variants}, \n",
-    "                            cv_folds = ${twas_cv_folds},\n",
-    "                            cv_threads = ${twas_cv_threads}            \n",
-    "                            )\n",
-    "          result$region_info <- region_info\n",
-    "          if (!is.null(result$twas_weights_result)) {\n",
-    "              for (r in names(result$twas_weights_result)) {\n",
-    "                  result$twas_weights_result[[r]]$region_info <- region_info\n",
-    "              }\n",
-    "              saveRDS(list(\"${_meta_info[2]}\" = result$twas_weights_result), \"${_output[0]:ann}.multigene_twas_weights.rds\", compress='xz')\n",
-    "              result$twas_weights_result <- NULL\n",
-    "          }\n",
-    "          saveRDS(list(\"${_meta_info[2]}\" = result), \"${_output[0]:ann}.multigene_bvsr.rds\", compress='xz')        \n",
-    "        } else {\n",
-    "          for (f in c(${_output:ar,})) {\n",
-    "            saveRDS(NULL, f)\n",
-    "          }\n",
-    "        }\n",
-    "    }"
+    "parameter: pip_cutoff_to_skip = 0.0\n",
+    "parameter: contexts = \"\"\n",
+    "parameter: prior_twas_weights = path()\n",
+    "parameter: prior_weights_min = 1e-10\n",
+    "parameter: mvsusie_method_args = \"\"\n",
+    "import csv\n",
+    "manifest = path(f\"{cwd:a}/multivariate_fine_mapping/{name}.mnm_genes_region_manifest.tsv\")\n",
+    "jobs = list(csv.DictReader(open(manifest), delimiter='\\t'))\n",
+    "stop_if(len(jobs) == 0, \"mnm_genes: empty region manifest; check --region-name / --customized-association-windows.\")\n",
+    "qtl_dataset = path(f\"{cwd:a}/qtl_dataset/{name}.qtl_dataset.rds\")\n",
+    "mvsusie_args = mvsusie_method_args if mvsusie_method_args else f'{{\"mvsusie\":{{\"max_iter\":{mvsusie_max_iter}}}}}'\n",
+    "input: qtl_dataset, for_each = \"jobs\"\n",
+    "output: f\"{cwd:a}/multivariate_fine_mapping/{name}.{_jobs['region_id']}.multigene_bvsr.rds\"\n",
+    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n",
+    "bash: expand = \"${ }\", stderr = f\"{_output:n}.stderr\", stdout = f\"{_output:n}.stdout\", container = container, entrypoint = entrypoint\n",
+    "    Rscript ${modular_script_dir}/pecotmr_integration/fine_mapping.R \\\n",
+    "        --qtl-dataset ${_input} \\\n",
+    "        --region ${_jobs['ld_block']} \\\n",
+    "        --methods mvsusie \\\n",
+    "        --joint-specification trait \\\n",
+    "        --coverage ${coverage[0]} \\\n",
+    "        ${(\"--secondary-coverage \" + \",\".join([str(x) for x in coverage[1:]])) if len(coverage) > 1 else \"\"} \\\n",
+    "        --pip-cutoff ${pip_cutoff} \\\n",
+    "        --pip-cutoff-to-skip ${pip_cutoff_to_skip} \\\n",
+    "        --seed ${seed} \\\n",
+    "        ${(\"--contexts \" + contexts) if contexts else \"\"} \\\n",
+    "        --method-args '${mvsusie_args}' \\\n",
+    "        ${(\"--twas-weights \" + str(prior_twas_weights)) if prior_twas_weights.is_file() else \"\"} \\\n",
+    "        --data-driven-prior-weights-cutoff ${prior_weights_min} \\\n",
+    "        --output ${_output}\n"
    ]
   },
   {
@@ -1177,64 +907,63 @@
    },
    "outputs": [],
    "source": [
-    "[fsusie]\n",
-    "# prior can be either of [\"mixture_normal\", \"mixture_normal_per_scale\"]\n",
+    "[fsusie_1]\n",
+    "# Resolve per-locus regions into a manifest via region_manifest.R for the\n",
+    "# functional SuSiE fit (one locus = one fsusie unit; fsusieR::susiF jointly fits\n",
+    "# the multi-trait function within each context). Loci come from\n",
+    "# --customized-association-windows (or --region-list); restrict with --region-name.\n",
+    "input: None\n",
+    "output: f\"{cwd:a}/fsusie/{name}.fsusie_region_manifest.tsv\"\n",
+    "task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n",
+    "bash: expand = \"${ }\", stderr = f\"{_output}.stderr\", stdout = f\"{_output}.stdout\", container = container, entrypoint = entrypoint\n",
+    "    Rscript ${modular_script_dir}/pecotmr_integration/region_manifest.R \\\n",
+    "        --pheno-manifest ${phenoFile[0]} \\\n",
+    "        --cis-window ${cis_window if cis_window > 0 else 1000000} \\\n",
+    "        ${(\"--customized-association-windows \" + str(customized_association_windows)) if customized_association_windows.is_file() else \"\"} \\\n",
+    "        ${(\"--region-name \" + \",\".join(region_name)) if len(region_name) > 0 else \"\"} \\\n",
+    "        ${(\"--region-list \" + str(region_list)) if region_list.is_file() else \"\"} \\\n",
+    "        --output ${_output}\n",
+    "\n",
+    "[fsusie_2]\n",
+    "# Functional SuSiE per locus (fsusieR::susiF) over the pre-built QtlDataset.\n",
+    "# fsusie-specific knobs ride on --method-args; --susie-top-pc>0 additionally\n",
+    "# fine-maps each context's top principal components with univariate SuSiE\n",
+    "# (usePCA; ports the legacy fsusie.R susie_on_top_pc). Replaces fsusie.R.\n",
     "parameter: prior = \"mixture_normal\"\n",
+    "parameter: max_scale = 10\n",
+    "parameter: min_purity = 0.5\n",
+    "parameter: post_processing = \"TI\"\n",
     "parameter: max_SNP_EM = 100\n",
-    "# Max scale is such that 2^max_scale being the number of phenotypes in the transformed space. Default to 2^10  = 1024. Don't change it unless you know what you are doing. Max_scale should be at least larger than 5.\n",
-    "parameter:  max_scale = 10\n",
-    "# Purity and coverage used to call cs\n",
-    "parameter:  min_purity = 0.5\n",
-    "# Epigenetics mark filter\n",
-    "parameter: epigenetics_mark_treshold = 16\n",
-    "# Run susie for top pc of the fsusie input\n",
-    "parameter: susie_top_pc = 10\n",
-    "# Run fsusie with this post-processing option. Available options are TI, HMM, and none. TI is recommended as default. HMM performs particularly well when analyzing data with, say, few sampling points (i.e., ncol(Y)<30) or when the data are particularly noisy (low signal-to-noise ratio).\n",
-    "parameter: post_processing =  \"TI\"\n",
-    "# Run fsusie with small sample correction \n",
     "parameter: small_sample_correction = False\n",
-    "\n",
-    "depends: sos_variable(\"regional_data\")\n",
-    "# Check if both 'data' and 'meta_info' are empty lists\n",
-    "stop_if(len(regional_data['data']) == 0, f'Either genotype or phenotype data are not available for region {\", \".join(region_name)}.')\n",
-    "meta_info = regional_data['meta_info']\n",
-    "input: regional_data[\"data\"], group_by = lambda x: group_by_region(x, regional_data[\"data\"]), group_with = \"meta_info\"\n",
-    "output: f'{cwd:a}/{step_name[:-2]}/{name}.{_meta_info[0].replace(\":\", \"_\").replace(\"-\", \"_\")}.fsusie_{prior}_{post_processing}_{\"_top_pc_weights\" if not skip_twas_weights else \"\"}.rds'\n",
-    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output:bn}'\n",
-    "bash: expand= \"${ }\", stderr = f\"{_output:n}.stderr\", stdout = f\"{_output:n}.stdout\", container = container, entrypoint = entrypoint\n",
-    "    Rscript ${modular_script_dir}/mnm_analysis/mnm_methods/fsusie.R \\\n",
-    "        --genotype ${_input[0]:a} \\\n",
-    "        --phenotype \"${\",\".join([str(x.absolute()) for x in _input[1:len(_input)//2+1]])}\" \\\n",
-    "        --covariate \"${\",\".join([str(x.absolute()) for x in _input[len(_input)//2+1:]])}\" \\\n",
-    "        --region \"${_meta_info[0]}\" \\\n",
-    "        --window \"${_meta_info[1]}\" \\\n",
-    "        --region-name \"${_meta_info[2]}\" \\\n",
-    "        --conditions \"${\",\".join(_meta_info[4:])}\" \\\n",
-    "        --maf ${maf} \\\n",
-    "        --mac ${mac} \\\n",
-    "        --imiss ${imiss} \\\n",
-    "        ${\"--indel\" if indel else \"\"} \\\n",
-    "        ${\"--keep-samples \" + str(keep_samples) if keep_samples.is_file() else \"\"} \\\n",
-    "        ${\"--keep-variants \" + str(keep_variants) if not keep_variants.is_dir() else \"\"} \\\n",
-    "        --prior \"${prior}\" \\\n",
-    "        --max-snp-em ${max_SNP_EM} \\\n",
-    "        --max-scale ${max_scale} \\\n",
-    "        --min-purity ${min_purity} \\\n",
-    "        --epigenetics-mark-threshold ${epigenetics_mark_treshold} \\\n",
-    "        --susie-top-pc ${susie_top_pc} \\\n",
-    "        --post-processing \"${post_processing}\" \\\n",
-    "        ${\"--small-sample-correction\" if small_sample_correction else \"\"} \\\n",
+    "# >0 -> also fine-map the top `susie_top_pc` PCs per context with univariate SuSiE.\n",
+    "parameter: susie_top_pc = 0\n",
+    "parameter: pip_cutoff_to_skip = 0.0\n",
+    "parameter: contexts = \"\"\n",
+    "# Optional JSON overriding the assembled fsusie kwargs, e.g. '{\"fsusie\":{\"prior\":\"mixture_normal_per_scale\"}}'.\n",
+    "parameter: fsusie_method_args = \"\"\n",
+    "import csv, json\n",
+    "manifest = path(f\"{cwd:a}/fsusie/{name}.fsusie_region_manifest.tsv\")\n",
+    "jobs = list(csv.DictReader(open(manifest), delimiter='\\t'))\n",
+    "stop_if(len(jobs) == 0, \"fsusie: empty region manifest; check --region-name / --customized-association-windows.\")\n",
+    "qtl_dataset = path(f\"{cwd:a}/qtl_dataset/{name}.qtl_dataset.rds\")\n",
+    "fsusie_args = fsusie_method_args if fsusie_method_args else json.dumps({\"fsusie\": {\"prior\": prior, \"max_scale\": max_scale, \"min_purity\": min_purity, \"post_processing\": post_processing, \"max_SNP_EM\": max_SNP_EM, \"cor_small\": bool(small_sample_correction)}})\n",
+    "input: qtl_dataset, for_each = \"jobs\"\n",
+    "output: f\"{cwd:a}/fsusie/{name}.{_jobs['region_id']}.fsusie.rds\"\n",
+    "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n",
+    "bash: expand = \"${ }\", stderr = f\"{_output:n}.stderr\", stdout = f\"{_output:n}.stdout\", container = container, entrypoint = entrypoint\n",
+    "    Rscript ${modular_script_dir}/pecotmr_integration/fine_mapping.R \\\n",
+    "        --qtl-dataset ${_input} \\\n",
+    "        --region ${_jobs['ld_block']} \\\n",
+    "        --methods fsusie \\\n",
+    "        --coverage ${coverage[0]} \\\n",
+    "        ${(\"--secondary-coverage \" + \",\".join([str(x) for x in coverage[1:]])) if len(coverage) > 1 else \"\"} \\\n",
     "        --pip-cutoff ${pip_cutoff} \\\n",
-    "        --coverage \"${\",\".join([str(x) for x in coverage])}\" \\\n",
-    "        --L-greedy ${init_L} \\\n",
-    "        --L ${L} \\\n",
-    "        ${\"--skip-twas-weights\" if skip_twas_weights else \"\"} \\\n",
-    "        --max-cv-variants ${max_cv_variants} \\\n",
-    "        --twas-cv-folds ${twas_cv_folds} \\\n",
-    "        --twas-cv-threads ${twas_cv_threads} \\\n",
-    "        ${\"--save-data\" if save_data else \"\"} \\\n",
-    "        --output-prefix ${_output:n} \\\n",
-    "        --cwd ${cwd:a}\n"
+    "        --pip-cutoff-to-skip ${pip_cutoff_to_skip} \\\n",
+    "        --seed ${seed} \\\n",
+    "        ${(\"--contexts \" + contexts) if contexts else \"\"} \\\n",
+    "        --method-args '${fsusie_args}' \\\n",
+    "        ${(\"--use-pca --n-pcs \" + str(susie_top_pc)) if susie_top_pc > 0 else \"\"} \\\n",
+    "        --output ${_output}\n"
    ]
   },
   {
@@ -1330,4 +1059,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/code/script/pecotmr_integration/fine_mapping.R b/code/script/pecotmr_integration/fine_mapping.R
index 5baf570f..1a592fa0 100644
--- a/code/script/pecotmr_integration/fine_mapping.R
+++ b/code/script/pecotmr_integration/fine_mapping.R
@@ -29,6 +29,26 @@
 #                   forwarded to the underlying fitter (susieR::susie,
 #                   susieR::susie_rss, mvsusieR::mvsusie, fsusieR::susiF,
 #                   etc.). Example: '{"susie":{"L":1,"refine":false}}'.
+#                   For mvsusie / fsusie this is how you pass their kwargs,
+#                   e.g. '{"mvsusie":{"max_iter":200}}' or
+#                   '{"fsusie":{"prior":"mixture_normal","max_scale":10}}'.
+#
+# Multivariate / joint fits (QTL mode only; mvsusie / fsusie tokens):
+#   --joint-specification  Comma-separated joint-fit axes (subset of
+#                   context,trait,study). "context" joins a gene's contexts
+#                   (cross-context mvSuSiE); "trait" joins the genes in the
+#                   region (cross-trait / multi-gene). Empty (default) leaves
+#                   the implicit per-(context,trait) branch.
+#   --twas-weights  Optional TwasWeights RDS from a preceding mr.mash run
+#                   (twas_weights.R --methods mrmash) supplying the mvSuSiE
+#                   data-driven reweighted prior. Omit for the canonical
+#                   prior. Requires a pecotmr build with twasWeights support.
+#   --data-driven-prior-weights-cutoff  Prior-component weight floor for the
+#                   reweighted mvSuSiE prior (fineMappingPipeline
+#                   dataDrivenPriorWeightsCutoff). Only used with --twas-weights.
+#   --use-pca / --n-pcs  Fine-map each multi-trait context's top principal
+#                   components with univariate SuSiE (ports fsusie.R
+#                   susie_on_top_pc). Requires a pecotmr build with usePCA.
 #   --output        Output RDS path.
 
 suppressPackageStartupMessages({
@@ -91,6 +111,24 @@ parser <- add_argument(parser, "--seed",
 parser <- add_argument(parser, "--pip-cutoff-to-skip",
                        help = "Single-effect (SER) pre-screen cutoff (fineMappingPipeline pipCutoffToSkip), QTL mode; 0 = off, <0 = adaptive 3/nVariants",
                        type = "numeric", default = 0)
+# --- Multivariate / joint-fit knobs (QTL mode; mvsusie / fsusie). Each is
+# opt-in and omitted from the pipeline call when left at its default, so this
+# wrapper also runs against a pecotmr build that predates twasWeights / usePCA.
+parser <- add_argument(parser, "--joint-specification",
+                       help = "Comma-separated joint-fit axes (context,trait,study) for mvsusie/fsusie; empty = implicit per-(context,trait) branch",
+                       type = "character", default = "")
+parser <- add_argument(parser, "--twas-weights",
+                       help = "Optional TwasWeights RDS (preceding mr.mash run) supplying the mvSuSiE data-driven prior; omit for the canonical prior",
+                       type = "character", default = "")
+parser <- add_argument(parser, "--data-driven-prior-weights-cutoff",
+                       help = "Prior-component weight floor for the reweighted mvSuSiE prior (only used with --twas-weights)",
+                       type = "numeric", default = 1e-10)
+parser <- add_argument(parser, "--use-pca",
+                       help = "Fine-map each multi-trait context's top PCs with univariate SuSiE (usePCA; ports fsusie.R susie_on_top_pc)",
+                       flag = TRUE)
+parser <- add_argument(parser, "--n-pcs",
+                       help = "Cap on top principal components per context when --use-pca (nPCs)",
+                       type = "integer", default = 10L)
 # --- Per-analysis overrides of the QtlDataset's construct-time filters. Each is
 # opt-in (unset leaves the dataset's stored value); applied to the loaded object
 # before the pipeline runs (QTL mode only).
@@ -190,11 +228,18 @@ contexts_arg <- if (nzchar(argv$contexts) && argv$contexts != ".")
 
 methods <- trimws(strsplit(argv$methods, ",", fixed = TRUE)[[1L]])
 
-# Build the final `methods` argument for fineMappingPipeline as the named-list
-# form {token: kwargs}. The pipeline's SuSiE fit defaults (L = --L,
-# L_greedy = --L-greedy) seed every SuSiE-family token; any matching
-# --method-args entry overrides them per key (explicit > default). Keys in
-# --method-args must be among the --methods tokens (no silent typos).
+# Joint-fit axes for mvsusie/fsusie: "" -> NULL (implicit per-tuple branch).
+joint_spec <- if (nzchar(argv$joint_specification) && argv$joint_specification != ".")
+  trimws(strsplit(argv$joint_specification, ",", fixed = TRUE)[[1L]]) else NULL
+# Optional mr.mash data-driven prior (TwasWeights from a preceding mrmash run).
+twas_weights_obj <- if (nzchar(argv$twas_weights) && argv$twas_weights != "." &&
+                        file.exists(argv$twas_weights)) readRDS(argv$twas_weights) else NULL
+
+# Build the `methods` argument: the bare tokens, or the named-list {token: kwargs}
+# form when --method-args is given. SuSiE L / L_greedy defaults + susie-family
+# routing live in pecotmr (.fmNormalizeMethods); the wrapper only forwards the
+# --L / --L-greedy values as top-level args (see cs_args). --method-args keys
+# must be among the --methods tokens (no silent typos).
 if (!is.null(parsed_method_args)) {
   unknown <- setdiff(names(parsed_method_args), methods)
   if (length(unknown) > 0L)
@@ -202,19 +247,18 @@ if (!is.null(parsed_method_args)) {
          paste(unknown, collapse = ", "),
          "'; --methods = '", paste(methods, collapse = ", "), "').")
 }
-.susie_family <- c("susie", "susieInf", "susieAsh")
-.fit_defaults <- list(L = argv$L, L_greedy = argv[["L_greedy"]])
-methods_arg <- setNames(lapply(methods, function(tk) {
-  base <- if (tk %in% .susie_family) .fit_defaults else list()
-  user <- if (!is.null(parsed_method_args) && tk %in% names(parsed_method_args))
-            parsed_method_args[[tk]] else list()
-  modifyList(base, user)
-}), methods)
+methods_arg <- if (is.null(parsed_method_args)) methods else
+  setNames(lapply(methods, function(tk)
+    if (tk %in% names(parsed_method_args)) parsed_method_args[[tk]] else list()),
+    methods)
 
 # Credible-set / coverage knobs common to both modes. medianAbsCorr is added
 # only when set (NULL -> omitted), so the call also works against a pecotmr
-# that predates that argument.
+# that predates that argument. L / L_greedy are forwarded top-level; pecotmr
+# seeds the susie-family tokens and applies explicit --method-args overrides.
 cs_args <- list(methods           = methods_arg,
+                L                 = argv$L,
+                Lgreedy           = argv[["L_greedy"]],
                 coverage          = argv$coverage,
                 secondaryCoverage = secondary_cov,
                 signalCutoff      = argv$pip_cutoff,
@@ -238,20 +282,55 @@ if (has_gwas) {
   qd <- apply_qd_filter_overrides(qd, argv)
   # `contexts` and `pipCutoffToSkip` are QTL-mode only, so they ride on qtl_args
   # rather than the mode-shared cs_args.
+  # cisWindow is a gene-mode (traitId) knob — it expands each trait's own
+  # coordinates. In region mode the literal variant window comes from --region,
+  # and passing both is an error, so cisWindow rides on the traitId branch only.
   qtl_args <- c(list(qd), cs_args,
-                list(cisWindow       = argv$cis_window,
-                     contexts        = contexts_arg,
+                list(contexts        = contexts_arg,
                      pipCutoffToSkip = argv$pip_cutoff_to_skip))
-  res <- if (has_region) {
-    do.call(fineMappingPipeline, c(qtl_args, list(region = parse_region(argv$region))))
-  } else {
-    do.call(fineMappingPipeline, c(qtl_args, list(traitId = argv$gene_id)))
+  # Opt-in multivariate / joint knobs. Each is added only when the user set it,
+  # so the call stays compatible with a pecotmr that predates the argument
+  # (mirrors the medianAbsCorr handling above).
+  if (!is.null(joint_spec)) qtl_args$jointSpecification <- joint_spec
+  if (!is.null(twas_weights_obj)) {
+    qtl_args$twasWeights <- twas_weights_obj
+    qtl_args$dataDrivenPriorWeightsCutoff <- argv$data_driven_prior_weights_cutoff
+  }
+  if (isTRUE(argv$use_pca)) {
+    qtl_args$usePCA <- TRUE
+    qtl_args$nPCs   <- argv$n_pcs
   }
   label <- if (has_region) paste0("region '", argv$region, "'")
            else paste0("gene '", argv$gene_id, "'")
+  run_fm <- function() if (has_region) {
+    do.call(fineMappingPipeline, c(qtl_args, list(region = parse_region(argv$region))))
+  } else {
+    do.call(fineMappingPipeline, c(qtl_args,
+                                   list(traitId = argv$gene_id,
+                                        cisWindow = argv$cis_window)))
+  }
+  # A multivariate fan-out unit can legitimately have nothing to jointly fit:
+  # a locus with < 2 genes overlapping it (cross-trait / mnm_genes / fsusie), a
+  # single-context trait (cross-context), etc. fineMappingPipeline signals this
+  # in two ways that only the multivariate paths (explicit jointSpecification AND
+  # auto-detected mvsusie / fsusie) raise -- the univariate SuSiE family never
+  # does: the early multivariate guard ("mvsusie/fsusie requires multi-trait
+  # [or multi-context] input ...") and the late dispatch check ("no joint fits
+  # produced"). For a per-locus batch each is the honest NULL result, not a
+  # failure, so write NULL rather than aborting the substep; every other error
+  # propagates unchanged.
+  res <- tryCatch(run_fm(), error = function(e) {
+    if (grepl("no joint fits produced|requires multi-trait|requires multi-context",
+              conditionMessage(e))) {
+      message("fine_mapping.R: no multivariate fit for ", label,
+              " (scope yields < 2 joinable conditions); writing NULL.")
+      NULL
+    } else stop(e)
+  })
 }
 
 dir.create(dirname(argv$output), showWarnings = FALSE, recursive = TRUE)
 saveRDS(res, argv$output)
+n_rows <- if (is.null(res)) 0L else nrow(res)
 cat(sprintf("Wrote fineMapping result for %s (%d row(s)) to %s\n",
-            label, nrow(res), argv$output))
+            label, n_rows, argv$output))
diff --git a/code/script/pecotmr_integration/region_manifest.R b/code/script/pecotmr_integration/region_manifest.R
new file mode 100644
index 00000000..1b13b833
--- /dev/null
+++ b/code/script/pecotmr_integration/region_manifest.R
@@ -0,0 +1,161 @@
+#!/usr/bin/env Rscript
+# region_manifest.R
+#
+# Resolve mnm_regression's [mnm_genes] / [fsusie] per-locus analysis units into
+# a manifest TSV, so the downstream step can fan out region-mode
+# fineMappingPipeline over its rows with inline csv.DictReader (no notebook-local
+# Python parsing). One row per region (locus).
+#
+# A "region" here is a genomic window that a cross-trait (multi-gene) mvSuSiE or
+# a functional fSuSiE fit runs over; fineMappingPipeline auto-selects the traits
+# whose coordinates overlap the window. Region source priority:
+#   1. --customized-association-windows BED (#chr start end region_id): each row
+#      is a locus. Filtered to --region-name when given.
+#   2. --region-list: a file whose LAST column lists region IDs; resolved against
+#      the customized windows (else treated as gene IDs, see 3).
+#   3. A selected ID present only in --pheno-manifest (a gene): that gene's
+#      coordinates +/- --cis-window (single-gene fallback).
+#
+# Inputs:
+#   --pheno-manifest   QtlDataset phenotype manifest TSV (ID, #chr, start, end);
+#                      supplies gene coordinates for the single-gene fallback and
+#                      the default region set when no windows/list are given.
+#   --customized-association-windows  Optional BED (#chr start end region_id).
+#   --region-name      Optional comma-separated region/gene IDs to restrict to.
+#   --region-list      Optional file whose LAST column lists region IDs.
+#   --cis-window       bp added on each side of a gene for the single-gene
+#                      fallback window. Default 1000000.
+#   --output           Output manifest TSV path.
+#
+# Output TSV columns (one row per region): region_id, chr, start, end, ld_block.
+
+suppressPackageStartupMessages({
+  library(argparser)
+})
+
+parser <- arg_parser("Resolve per-locus regions into a manifest TSV for mnm_genes / fsusie")
+parser <- add_argument(parser, "--pheno-manifest",
+                       help = "QtlDataset phenotype manifest TSV (ID, #chr, start, end)",
+                       type = "character")
+parser <- add_argument(parser, "--customized-association-windows",
+                       help = "Optional BED (#chr start end region_id) of loci",
+                       type = "character", default = "")
+parser <- add_argument(parser, "--region-name",
+                       help = "Optional comma-separated region/gene IDs to restrict to",
+                       type = "character", default = "")
+parser <- add_argument(parser, "--region-list",
+                       help = "Optional file whose last column lists region IDs",
+                       type = "character", default = "")
+parser <- add_argument(parser, "--cis-window",
+                       help = "bp added on each side of a gene for the single-gene fallback window",
+                       type = "numeric", default = 1000000)
+parser <- add_argument(parser, "--output",
+                       help = "Output manifest TSV path", type = "character")
+argv <- parse_args(parser)
+
+norm_chr <- function(x) {
+  x <- as.character(x)
+  ifelse(is.na(x) | !nzchar(x), x,
+         ifelse(startsWith(x, "chr"), x, paste0("chr", x)))
+}
+
+# ----- Gene coordinates from the phenotype manifest -------------------------
+if (is.null(argv$pheno_manifest) || !nzchar(argv$pheno_manifest) ||
+    !file.exists(argv$pheno_manifest))
+  stop("--pheno-manifest is required and must exist: ", argv$pheno_manifest)
+pm <- read.table(argv$pheno_manifest, header = TRUE, sep = "\t",
+                 stringsAsFactors = FALSE, check.names = FALSE, comment.char = "")
+id_col    <- intersect(c("ID", "gene_id", "phenotype_id"), names(pm))[1L]
+chr_col   <- intersect(c("#chr", "chrom", "chr"),          names(pm))[1L]
+start_col <- intersect(c("start", "Start"),                names(pm))[1L]
+end_col   <- intersect(c("end", "End"),                    names(pm))[1L]
+if (any(is.na(c(id_col, chr_col, start_col, end_col))))
+  stop("--pheno-manifest needs ID, #chr/chrom, start, end columns (got: ",
+       paste(names(pm), collapse = ", "), ").")
+genes  <- as.character(pm[[id_col]])
+gchr   <- norm_chr(pm[[chr_col]])
+gstart <- suppressWarnings(as.integer(pm[[start_col]]))
+gend   <- suppressWarnings(as.integer(pm[[end_col]]))
+uniqGenes <- unique(genes)
+geneCoord <- lapply(uniqGenes, function(g) {
+  idx <- which(genes == g)
+  list(chr = gchr[idx][[1L]], start = min(gstart[idx], na.rm = TRUE),
+       end = max(gend[idx], na.rm = TRUE))
+})
+names(geneCoord) <- uniqGenes
+
+# ----- Customized association windows (loci) --------------------------------
+windows <- list()
+windowOrder <- character(0)
+if (nzchar(argv$customized_association_windows) &&
+    argv$customized_association_windows != "." &&
+    file.exists(argv$customized_association_windows)) {
+  caw <- read.table(argv$customized_association_windows, header = FALSE,
+                    sep = "", stringsAsFactors = FALSE, comment.char = "#")
+  for (i in seq_len(nrow(caw))) {
+    rid <- as.character(caw[[4L]][[i]])
+    windows[[rid]] <- list(chr = norm_chr(caw[[1L]][[i]]),
+                           start = as.integer(caw[[2L]][[i]]),
+                           end   = as.integer(caw[[3L]][[i]]))
+    windowOrder <- c(windowOrder, rid)
+  }
+}
+
+# ----- Resolve the selected region IDs --------------------------------------
+selected <- character(0)
+rn <- trimws(strsplit(argv$region_name, ",")[[1L]])
+rn <- rn[nzchar(rn)]
+if (length(rn) > 0L) {
+  selected <- rn
+} else if (nzchar(argv$region_list) && argv$region_list != "." &&
+           file.exists(argv$region_list)) {
+  for (line in readLines(argv$region_list)) {
+    line <- trimws(line)
+    if (!nzchar(line) || startsWith(line, "#")) next
+    parts <- strsplit(line, "\\s+")[[1L]]
+    selected <- c(selected, parts[[length(parts)]])
+  }
+  selected <- unique(selected)
+} else if (length(windowOrder) > 0L) {
+  selected <- windowOrder            # all loci from the windows file
+} else {
+  selected <- uniqGenes              # degenerate: per-gene (single-trait) fallback
+  message("NOTE: no --customized-association-windows / --region-list / ",
+          "--region-name given; emitting one region per gene from the ",
+          "phenotype manifest (single-trait regions are degenerate for ",
+          "cross-trait / fSuSiE fits).")
+}
+
+cis <- as.integer(argv$cis_window)
+resolve <- function(id) {
+  if (!is.null(windows[[id]])) {
+    w <- windows[[id]]
+    return(list(chr = w$chr, start = max(w$start, 0L), end = w$end))
+  }
+  if (!is.null(geneCoord[[id]])) {
+    c0 <- geneCoord[[id]]
+    return(list(chr = c0$chr, start = max(c0$start - cis, 0L), end = c0$end + cis))
+  }
+  stop("Region/gene '", id, "' not found in --customized-association-windows ",
+       "or --pheno-manifest; cannot determine its window.")
+}
+
+rows <- list()
+for (id in selected) {
+  r <- resolve(id)
+  rows[[length(rows) + 1L]] <- data.frame(
+    region_id = gsub("[^A-Za-z0-9_]", "_", id),
+    chr       = r$chr, start = r$start, end = r$end,
+    ld_block  = sprintf("%s:%d-%d", r$chr, r$start, r$end),
+    stringsAsFactors = FALSE)
+}
+if (length(rows) == 0L)
+  stop("No regions selected; check --region-name / --region-list / ",
+       "--customized-association-windows against the phenotype manifest.")
+out <- do.call(rbind, rows)
+
+dir.create(dirname(argv$output), showWarnings = FALSE, recursive = TRUE)
+write.table(out, file = argv$output, sep = "\t", quote = FALSE,
+            row.names = FALSE, na = "")
+cat(sprintf("Wrote region manifest with %d region(s) to %s\n",
+            nrow(out), argv$output))
diff --git a/code/script/pecotmr_integration/twas_weights.R b/code/script/pecotmr_integration/twas_weights.R
index fc6117da..13718c5b 100644
--- a/code/script/pecotmr_integration/twas_weights.R
+++ b/code/script/pecotmr_integration/twas_weights.R
@@ -30,6 +30,14 @@
 #                           values are kwargs lists forwarded to the
 #                           underlying per-method learner. Example:
 #                           '{"lasso":{"nfolds":10}}'.
+#   --mixture-prior         Optional RDS of mr.mash data-driven prior
+#                           covariance matrices (dataDrivenPriorMatrices)
+#                           for the 'mrmash' method. Omitted -> canonical
+#                           prior (canonicalPriorMatrices = TRUE). mr.mash
+#                           requires one of these; this wrapper supplies a
+#                           default so 'mrmash' runs out of the box, and is
+#                           also the producer of the mvSuSiE data-driven
+#                           prior consumed by fine_mapping.R --twas-weights.
 #   --output                Output RDS path (one TwasWeights)
 
 suppressPackageStartupMessages({
@@ -65,6 +73,13 @@ parser <- add_argument(parser, "--fine-mapping-result",
 parser <- add_argument(parser, "--method-args",
                        help = "JSON object {token: {kwarg: value, ...}, ...} for twasWeightsPipeline()",
                        type = "character", default = "")
+parser <- add_argument(parser, "--mixture-prior",
+                       help = paste0("Path to an RDS of mr.mash data-driven prior covariance matrices ",
+                                     "(dataDrivenPriorMatrices) for the 'mrmash' method. When omitted, ",
+                                     "mr.mash uses the canonical prior set (canonicalPriorMatrices = TRUE). ",
+                                     "Only consulted when 'mrmash' is in --methods and the prior is not ",
+                                     "already set via --method-args."),
+                       type = "character", default = "")
 parser <- add_argument(parser, "--min-twas-maf",
                        help = "Minimum MAF for the variants used to learn TWAS weights (twasWeightsPipeline minTwasMaf), applied on top of the dataset's construct-time mafCutoff",
                        type = "numeric", default = 0.01)
@@ -179,6 +194,15 @@ methods_arg <- if (is.null(parsed_method_args)) {
   }), methods)
 }
 
+# mr.mash data-driven prior override. The wrapper stays thin: it hands the prior
+# covariance set to pecotmr as a MashPrior object; twasWeightsPipeline unpacks it
+# (.unpackMashPrior) and injects it into the mr.mash fit. pecotmr's mrmash method
+# default supplies the canonical set, so the common case needs no prior here.
+mash_prior <- if (nzchar(argv$mixture_prior) && argv$mixture_prior != "." &&
+                  file.exists(argv$mixture_prior)) {
+  MashPrior(fullFit = readRDS(argv$mixture_prior))
+} else NULL
+
 parse_region <- function(s) {
   m <- regmatches(s, regexec("^([^:]+):([0-9]+)-([0-9]+)$", s))[[1L]]
   if (length(m) != 4L)
@@ -209,6 +233,7 @@ fmr <- if (nzchar(fmr_path) && fmr_path != "." && file.exists(fmr_path)) {
 # the weights (on top of the QtlDataset's construct-time cutoffs); the cv* knobs
 # control the cross-validated predictive-performance refits.
 tw_args <- list(methods           = methods_arg,
+                mashPrior         = mash_prior,
                 cisWindow         = argv$cis_window,
                 contexts          = contexts_arg,
                 minTwasMaf        = argv$min_twas_maf,