diff --git a/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb index fa3bc3d9..29394d6e 100644 --- a/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/SoS/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -641,189 +641,39 @@ }, "outputs": [], "source": [ - "[get_analysis_regions: shared = \"regional_data\"]\n", - "# input is genoFile, phenoFile, covFile and optionally region_list. If region_list presents then we only analyze what's contained in the list.\n", - "# regional_data should be a dictionary like:\n", - "#{'data': [(\"genotype_1.bed\", \"phenotype_1.bed.gz\", \"covariate_1.gz\"), (\"genotype_2.bed\", \"phenotype_1.bed.gz\", \"phenotype_2.bed.gz\", \"covariate_1.gz\", \"covariate_2.gz\") ... ],\n", - "# 'meta_info': [(\"chr12:752578-752579\",\"chr12:752577-752580\", \"gene_1\", \"trait_1\"), (\"chr13:852580-852581\",\"chr13:852579-852580\", \"gene_2\", \"trait_1\", \"trait_2\") ... ]}\n", - "import numpy as np\n", - "\n", - "def preload_id_map(id_map_files):\n", - " id_maps = {}\n", - " for id_map_file in id_map_files:\n", - " if id_map_file is not None and os.path.isfile(id_map_file):\n", - " df = pd.read_csv(id_map_file, sep=r\"\\s+\", header=None, comment='#', names=['old_ID', 'new_ID'])\n", - " id_maps[id_map_file] = df.set_index('old_ID')['new_ID'].to_dict()\n", - " return id_maps\n", - "\n", - "def load_and_apply_id_map(pheno_path, id_map_path, preloaded_id_maps):\n", - " pheno_df = pd.read_csv(pheno_path, sep=r\"\\s+\", header=0)\n", - " pheno_df['Original_ID'] = pheno_df['ID']\n", - " if id_map_path in preloaded_id_maps:\n", - " id_map = preloaded_id_maps[id_map_path]\n", - " pheno_df['ID'] = pheno_df['ID'].map(id_map).fillna(pheno_df['ID'])\n", - " return pheno_df\n", - "\n", - "def filter_by_region_ids(data, region_ids):\n", - " if region_ids is not None and len(region_ids) > 0:\n", - " data = data[data['ID'].isin(region_ids)]\n", - " return data\n", - "\n", - "def custom_join(series):\n", - " # Initialize an empty list to hold the processed items\n", - " result = []\n", - " for item in series:\n", - " if ',' in item:\n", - " # If the item contains commas, split by comma and convert to tuple\n", - " result.append(tuple(item.split(',')))\n", - " else:\n", - " # If the item does not contain commas, add it directly\n", - " result.append(item)\n", - " # Convert the list of items to a tuple and return\n", - " return tuple(result)\n", - "\n", - "def aggregate_phenotype_data(accumulated_pheno_df):\n", - " if not accumulated_pheno_df.empty:\n", - " accumulated_pheno_df = accumulated_pheno_df.groupby(['#chr','ID','cond','path','cov_path'], as_index=False).agg({\n", - " '#chr': lambda x: np.unique(x).astype(str)[0],\n", - " 'ID': lambda x: np.unique(x).astype(str)[0],\n", - " 'Original_ID': ','.join,\n", - " 'start': 'min',\n", - " 'end': 'max'\n", - " }).groupby(['#chr','ID'], as_index=False).agg({\n", - " 'cond': ','.join,\n", - " 'path': ','.join,\n", - " 'Original_ID': custom_join,\n", - " 'cov_path': ','.join,\n", - " 'start': 'min',\n", - " 'end': 'max'\n", - " })\n", - " return accumulated_pheno_df\n", - "\n", - "def process_cis_files(pheno_files, cov_files, phenotype_names, pheno_id_files, region_ids, preloaded_id_maps):\n", - " '''\n", - " Example output:\n", - " #chr start end ID Original_ID path cov_path cond\n", - " chr12 752578 752579 ENSG00000060237 Q9H4A3,P62873 protocol_example.protein_1.bed.gz,protocol_example.protein_2.bed.gz covar_1.gz,covar_2.gz trait_A,trait_B\n", - " '''\n", - " accumulated_pheno_df = pd.DataFrame()\n", - " pheno_id_files = [None] * len(pheno_files) if len(pheno_id_files) == 0 else pheno_id_files\n", - "\n", - " for pheno_path, cov_path, phenotype_name, id_map_path in zip(pheno_files, cov_files, phenotype_names, pheno_id_files):\n", - " if not os.path.isfile(cov_path):\n", - " raise FileNotFoundError(f\"No valid path found for file: {cov_path}\")\n", - " pheno_df = load_and_apply_id_map(pheno_path, id_map_path, preloaded_id_maps)\n", - " pheno_df = filter_by_region_ids(pheno_df, region_ids)\n", - " \n", - " if not pheno_df.empty:\n", - " pheno_df.iloc[:, 4] = adapt_file_path_all(pheno_df, pheno_df.columns[4], f\"{pheno_path:a}\")\n", - " pheno_df = pheno_df.assign(cov_path=str(cov_path), cond=phenotype_name) \n", - " accumulated_pheno_df = pd.concat([accumulated_pheno_df, pheno_df], ignore_index=True)\n", - "\n", - " accumulated_pheno_df = aggregate_phenotype_data(accumulated_pheno_df)\n", - " return accumulated_pheno_df\n", - "\n", - "def process_trans_files(pheno_files, cov_files, phenotype_names, pheno_id_files, region_ids, customized_association_windows):\n", - " '''\n", - " Example output:\n", - " #chr start end ID Original_ID path cov_path cond\n", - " chr21 0 0 chr21_18133254_19330300 carnitine,benzoate,hippurate metabolon_1.bed.gz,metabolon_2.bed.gz covar_1.gz,covar_2.gz trait_A,trait_B\n", - " '''\n", - " \n", - " if not os.path.isfile(customized_association_windows):\n", - " raise ValueError(\"Customized association analysis window must be specified for trans analysis.\")\n", - " accumulated_pheno_df = pd.DataFrame()\n", - " pheno_id_files = [None] * len(pheno_files) if len(pheno_id_files) == 0 else pheno_id_files\n", - " genotype_windows = pd.read_csv(customized_association_windows, comment=\"#\", header=None, names=[\"#chr\",\"start\",\"end\",\"ID\"], sep=\"\\t\")\n", - " genotype_windows = filter_by_region_ids(genotype_windows, region_ids)\n", - " if genotype_windows.empty:\n", - " return accumulated_pheno_df\n", - " \n", - " for pheno_path, cov_path, phenotype_name, id_map_path in zip(pheno_files, cov_files, phenotype_names, pheno_id_files):\n", - " if not os.path.isfile(cov_path):\n", - " raise FileNotFoundError(f\"No valid path found for file: {cov_path}\")\n", - " pheno_df = pd.read_csv(pheno_path, sep=r\"\\s+\", header=0, names=['Original_ID', 'path'])\n", - " if not pheno_df.empty:\n", - " pheno_df.iloc[:, -1] = adapt_file_path_all(pheno_df, pheno_df.columns[-1], f\"{pheno_path:a}\")\n", - " pheno_df = pheno_df.assign(cov_path=str(cov_path), cond=phenotype_name)\n", - " # Here we combine genotype_windows which contains \"#chr\" and \"ID\" to pheno_df by creating a cartesian product\n", - " pheno_df = pd.merge(genotype_windows.assign(key=1), pheno_df.assign(key=1), on='key').drop('key', axis=1)\n", - " # Only set start and end columns to zero if they don't exist\n", - " if 'start' not in pheno_df.columns:\n", - " pheno_df['start'] = 0\n", - " if 'end' not in pheno_df.columns:\n", - " pheno_df['end'] = 0\n", - " if id_map_path is not None:\n", - " # Filter pheno_df by specific association-window and phenotype pairs\n", - " association_analysis_pair = pd.read_csv(id_map_path, sep=r\"\\s+\", header=None, comment='#', names=['ID', 'Original_ID'])\n", - " pheno_df = pd.merge(pheno_df, association_analysis_pair, on=['ID', 'Original_ID'])\n", - " accumulated_pheno_df = pd.concat([accumulated_pheno_df, pheno_df], ignore_index=True)\n", - "\n", - " accumulated_pheno_df = aggregate_phenotype_data(accumulated_pheno_df)\n", - " return accumulated_pheno_df\n", - "\n", - "# Load genotype meta data\n", - "if f\"{genoFile:x}\" == \".bed\":\n", - " geno_meta_data = pd.DataFrame([(\"chr\"+str(x), f\"{genoFile:a}\") for x in range(1,23)] + [(\"chrX\", f\"{genoFile:a}\")], columns=['#chr', 'geno_path'])\n", - "else:\n", - " geno_meta_data = pd.read_csv(f\"{genoFile:a}\", sep =r\"\\s+\", header=0)\n", - " geno_meta_data.iloc[:, 1] = adapt_file_path_all(geno_meta_data, geno_meta_data.columns[1], f\"{genoFile:a}\")\n", - " geno_meta_data.columns = ['#chr', 'geno_path']\n", - " geno_meta_data['#chr'] = geno_meta_data['#chr'].apply(lambda x: str(x) if str(x).startswith('chr') else f'chr{x}')\n", - "\n", - "# Checking the DataFrame\n", - "valid_chr_values = [f'chr{x}' for x in range(1, 23)] + ['chrX']\n", - "if not all(value in valid_chr_values for value in geno_meta_data['#chr']):\n", - " raise ValueError(\"Invalid chromosome values found. Allowed values are chr1 to chr22 and chrX.\")\n", - "\n", - "region_ids = []\n", - "# If region_list is provided, read the file and extract IDs\n", - "if region_list.is_file():\n", - " region_list_df = pd.read_csv(region_list, sep=r\"\\s+\", header=None, comment = \"#\")\n", - " region_ids = region_list_df.iloc[:, -1].unique() # Extracting the last column for IDs\n", - "\n", - "# If region_name is provided, include those IDs as well\n", - "# --region-name A B C will result in a list of [\"A\", \"B\", \"C\"] here\n", - "if len(region_name) > 0:\n", - " region_ids = list(set(region_ids).union(set(region_name)))\n", - "\n", - "if trans_analysis:\n", - " meta_data = process_trans_files(phenoFile, covFile, phenotype_names, phenoIDFile, region_ids, customized_association_windows)\n", - "else:\n", - " meta_data = process_cis_files(phenoFile, covFile, phenotype_names, phenoIDFile, region_ids, preload_id_map(phenoIDFile))\n", - " \n", - "if not meta_data.empty:\n", - " meta_data = meta_data.merge(geno_meta_data, on='#chr', how='inner')\n", - " # Adjust association-window\n", - " if os.path.isfile(customized_association_windows):\n", - " print(f\"Loading customized association analysis window from {customized_association_windows}\")\n", - " association_windows_list = pd.read_csv(customized_association_windows, comment=\"#\", header=None, names=[\"#chr\",\"start\",\"end\",\"ID\"], sep=\"\\t\")\n", - " meta_data = pd.merge(meta_data, association_windows_list, on=['#chr', 'ID'], how='left', suffixes=('', '_association'))\n", - " mismatches = meta_data[meta_data['start_association'].isna()]\n", - " if not mismatches.empty:\n", - " print(\"First 5 mismatches:\")\n", - " print(mismatches[['ID']].head())\n", - " raise ValueError(f\"{len(mismatches)} regions to analyze cannot be found in ``{customized_association_windows}``. Please check your ``{customized_association_windows}`` database to make sure it contains all association-window definitions. \")\n", - " else:\n", - " if cis_window < 0 :\n", - " raise ValueError(\"Please either input valid path to association-window file via ``--customized-association-windows``, or set ``--cis-window`` to a non-negative integer.\")\n", - " if cis_window == 0:\n", - " print(\"Warning: only variants within the range of start and end of molecular phenotype will be considered since cis_window is set to zero and no customized association window file was found. Please make sure this is by design.\")\n", - " meta_data['start_association'] = meta_data['start'].apply(lambda x: max(x - cis_window, 0))\n", - " meta_data['end_association'] = meta_data['end'] + cis_window\n", - "\n", - " # Example meta_data:\n", - " # #chr start end start_association end_association ID Original_ID path cov_path cond coordinate geno_path\n", - " # 0 chr12 752578 752579 652578 852579 ENSG00000060237 Q9H4A3,P62873 protocol_example.protein_1.bed.gz,protocol_example.protein_2.bed.gz covar_1.gz,covar_2.gz trait_A,trait_B chr12:752578-752579 protocol_example.genotype.chr21_22.bed \n", - " # Create the final dictionary\n", - " regional_data = {\n", - " 'data': [(row['geno_path'], *row['path'].split(','), *row['cov_path'].split(',')) for _, row in meta_data.iterrows()],\n", - " 'meta_info': [(f\"{row['#chr']}:{row['start']}-{row['end']}\", # this is the phenotypic region to extract data from\n", - " f\"{row['#chr']}:{row['start_association']}-{row['end_association']}\", # this is the association window region\n", - " row['ID'], row['Original_ID'], *row['cond'].split(',')) for _, row in meta_data.iterrows()]\n", - " }\n", - "else:\n", - " regional_data = {'data':[], 'meta_info':[]}" + "[qtl_dataset_construct]\n", + "# Build one pecotmr::QtlDataset from the phenotype manifest + shared genotype and\n", + "# serialize to RDS, replacing the legacy get_analysis_regions / regional_data\n", + "# loader. No fan-out: downstream per-gene fine-mapping / TWAS load this single\n", + "# RDS and select contexts/genes at analysis time.\n", + "#\n", + "# Manifest schema (qtl_dataset_construct.R): columns ID, path, cond and an\n", + "# optional cov_path; extra columns (e.g. #chr/start/end) are ignored. The toy\n", + "# MWE pheno_manifest_multicontext.tsv already matches -- a single-context\n", + "# analysis just points at a one-`cond` manifest (or selects the context\n", + "# downstream). QTLtools-format covariate TSVs (rows = covariates) need\n", + "# --transpose-covariates.\n", + "parameter: study = name\n", + "# Set for QTLtools-format covariate TSVs (rows = covariates, cols = samples);\n", + "# required for the toy MWE covariates.\n", + "parameter: transpose_covariates = False\n", + "# Optional genotype-derived covariates applied uniformly across contexts;\n", + "# per-context covariates come from the manifest cov_path column.\n", + "parameter: genotype_covariates = path('.')\n", + "parameter: maf_cutoff = 0.0\n", + "parameter: xvar_cutoff = 0.0\n", + "output: f\"{cwd:a}/qtl_dataset/{study}.qtl_dataset.rds\"\n", + "task: trunk_workers = 1, trunk_size = 1, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output:bn}\"\n", + "bash: expand = '${ }', stderr = f\"{_output}.stderr\", stdout = f\"{_output}.stdout\", container = container\n", + " Rscript ${modular_script_dir}/pecotmr_integration/qtl_dataset_construct.R \\\n", + " --study ${study} \\\n", + " --genotype-prefix ${genoFile:n} \\\n", + " --phenotype-manifest ${phenoFile[0]} \\\n", + " --genotype-covariates ${genotype_covariates if genotype_covariates.is_file() else '\"\"'} \\\n", + " ${'--transpose-covariates' if transpose_covariates else ''} \\\n", + " --maf-cutoff ${maf_cutoff} \\\n", + " --xvar-cutoff ${xvar_cutoff} \\\n", + " --output ${_output}" ] }, { @@ -836,61 +686,51 @@ "outputs": [], "source": [ "[susie_twas]\n", - "# Further limit TWAS to only using selected variants\n", - "parameter: min_twas_maf = 0.01\n", - "parameter: min_twas_xvar = 0.01\n", - "depends: sos_variable(\"regional_data\")\n", - "# Check if both 'data' and 'meta_info' are empty lists\n", - "stop_if(len(regional_data['data']) == 0, f'Either genotype or phenotype data are not available for region {\", \".join(region_name)}.')\n", - "meta_info = regional_data['meta_info']\n", - "input: regional_data[\"data\"], group_by = lambda x: group_by_region(x, regional_data[\"data\"]), group_with = \"meta_info\"\n", - "\n", - "if skip_fine_mapping and skip_twas_weights:\n", - " save_data = True\n", - " output_files = [f'{cwd:a}/data/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.univariate_data.rds']\n", - "elif not skip_fine_mapping and skip_twas_weights:\n", - " output_files = [f'{cwd:a}/fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.univariate_bvsr.rds']\n", - "elif skip_fine_mapping and not skip_twas_weights:\n", - " output_files = [f'{cwd:a}/twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.univariate_twas_weights.rds']\n", - "else:\n", - " output_files = [f'{cwd:a}/fine_mapping/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.univariate_bvsr.rds',\n", - " f'{cwd:a}/twas_weights/{name}.{_meta_info[0].split(\":\")[0]}_{_meta_info[2]}.univariate_twas_weights.rds']\n", - "output: output_files\n", - "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bnn}'\n", - "bash: expand= \"${ }\", stderr = f\"{_output[0]:nn}.susie_twas.stderr\", stdout = f\"{_output[0]:nn}.susie_twas.stdout\", container = container, entrypoint = entrypoint\n", - " Rscript ${modular_script_dir}/mnm_analysis/mnm_methods/susie_twas.R \\\n", - " --genotype ${_input[0]:a} \\\n", - " --phenotype \"${\",\".join([str(x.absolute()) for x in _input[1:len(_input)//2+1]])}\" \\\n", - " --covariate \"${\",\".join([str(x.absolute()) for x in _input[len(_input)//2+1:]])}\" \\\n", - " --region \"${_meta_info[0]}\" \\\n", - " --window \"${_meta_info[1]}\" \\\n", - " --region-name \"${_meta_info[2]}\" \\\n", - " --extract-region-names \"${\"|\".join([x if isinstance(x,str) else \",\".join(x) for x in _meta_info[3]])}\" \\\n", - " --conditions \"${\",\".join(_meta_info[4:])}\" \\\n", - " --skip-analysis-pip-cutoff \"${\",\".join(skip_analysis_pip_cutoff)}\" \\\n", - " --maf ${maf} \\\n", - " --mac ${mac} \\\n", - " --imiss ${imiss} \\\n", - " ${\"--indel\" if indel else \"\"} \\\n", - " ${\"--keep-samples \" + str(keep_samples) if keep_samples.is_file() else \"\"} \\\n", - " ${\"--keep-variants \" + str(keep_variants) if not keep_variants.is_dir() else \"\"} \\\n", - " ${\"--save-data\" if save_data else \"\"} \\\n", - " --pip-cutoff ${pip_cutoff} \\\n", - " --coverage \"${\",\".join([str(x) for x in coverage])}\" \\\n", - " --seed ${seed} \\\n", - " --cwd ${cwd:a} \\\n", - " ${\"--skip-fine-mapping\" if skip_fine_mapping else \"\"} \\\n", - " ${\"--skip-twas-weights\" if skip_twas_weights else \"\"} \\\n", - " ${\"--trans-analysis\" if trans_analysis else \"\"} \\\n", - " --L-greedy ${init_L} \\\n", - " --L ${L} \\\n", - " --max-cv-variants ${max_cv_variants} \\\n", - " --twas-cv-folds ${twas_cv_folds} \\\n", - " --twas-cv-threads ${twas_cv_threads} \\\n", - " --min-twas-maf ${min_twas_maf} \\\n", - " --min-twas-xvar ${min_twas_xvar} \\\n", - " ${\"--ld-reference-meta-file \" + str(ld_reference_meta_file) if not ld_reference_meta_file.is_dir() else \"\"} \\\n", - " --output-prefix ${_output[0]:nn}\n" + "# Univariate SuSiE fine-mapping + TWAS weights over the pre-built QtlDataset\n", + "# (from the qtl_dataset_construct step), one gene per fan-out unit. The gene(s)\n", + "# to analyze are passed on the command line via --region-name; the QtlDataset\n", + "# RDS already holds the genotype + per-context phenotypes, so this step neither\n", + "# re-reads the phenotype manifest nor rebuilds regional_data. Fine-mapping runs\n", + "# first (pecotmr_integration/fine_mapping.R -> fineMappingPipeline); the TWAS\n", + "# weights pass then reuses its SuSiE fits via --fine-mapping-result\n", + "# (pecotmr_integration/twas_weights.R -> twasWeightsPipeline). Replaces the\n", + "# legacy regional_data + susie_twas.R path.\n", + "parameter: cis_window = 1000000\n", + "parameter: fine_mapping_methods = \"susie\"\n", + "parameter: twas_methods = \"susie,susieInf,mrash,enet,lasso,mcp,scad,l0learn,bayes_r,bayes_c\"\n", + "parameter: fine_mapping_coverage = 0.95\n", + "# Comma-separated context names to restrict both passes to; empty = all contexts.\n", + "parameter: contexts = \"\"\n", + "# Optional JSON of per-method kwargs forwarded to the pipelines, e.g.\n", + "# '{\"susie\":{\"L\":10}}' for fine-mapping or '{\"lasso\":{\"nfolds\":10}}' for TWAS.\n", + "parameter: fine_mapping_method_args = \"\"\n", + "parameter: twas_method_args = \"\"\n", + "fail_if(len(region_name) == 0, \"susie_twas: pass --region-name [ ...] to choose which gene(s) to analyze.\")\n", + "qtl_dataset = path(f\"{cwd:a}/qtl_dataset/{name}.qtl_dataset.rds\")\n", + "input: qtl_dataset, for_each = \"region_name\"\n", + "output: fine_mapping = f\"{cwd:a}/fine_mapping/{name}.{_region_name}.univariate_bvsr.rds\",\n", + " twas_weights = f\"{cwd:a}/twas_weights/{name}.{_region_name}.univariate_twas_weights.rds\"\n", + "task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f\"{step_name}_{_output[0]:bnn}\"\n", + "bash: expand = \"${ }\", stderr = f\"{_output[0]:nn}.susie_twas.stderr\", stdout = f\"{_output[0]:nn}.susie_twas.stdout\", container = container, entrypoint = entrypoint\n", + " Rscript ${modular_script_dir}/pecotmr_integration/fine_mapping.R \\\n", + " --qtl-dataset ${_input} \\\n", + " --gene-id ${_region_name} \\\n", + " --cis-window ${cis_window} \\\n", + " --methods ${fine_mapping_methods} \\\n", + " --coverage ${fine_mapping_coverage} \\\n", + " ${(\"--contexts \" + contexts) if contexts else \"\"} \\\n", + " ${(\"--method-args '\" + fine_mapping_method_args + \"'\") if fine_mapping_method_args else \"\"} \\\n", + " --output ${_output[\"fine_mapping\"]}\n", + "\n", + " Rscript ${modular_script_dir}/pecotmr_integration/twas_weights.R \\\n", + " --qtl-dataset ${_input} \\\n", + " --gene-id ${_region_name} \\\n", + " --cis-window ${cis_window} \\\n", + " --methods ${twas_methods} \\\n", + " --fine-mapping-result ${_output[\"fine_mapping\"]} \\\n", + " ${(\"--contexts \" + contexts) if contexts else \"\"} \\\n", + " ${(\"--method-args '\" + twas_method_args + \"'\") if twas_method_args else \"\"} \\\n", + " --output ${_output[\"twas_weights\"]}" ] }, { diff --git a/code/script/pecotmr_integration/fine_mapping.R b/code/script/pecotmr_integration/fine_mapping.R index 8246a7c7..4c8144ba 100644 --- a/code/script/pecotmr_integration/fine_mapping.R +++ b/code/script/pecotmr_integration/fine_mapping.R @@ -55,6 +55,9 @@ parser <- add_argument(parser, "--cis-window", parser <- add_argument(parser, "--region", help = "Genomic region as chr:start-end (QTL region mode)", type = "character", default = "") +parser <- add_argument(parser, "--contexts", + help = "Comma-separated context names to restrict to (QTL mode); empty = all contexts", + type = "character", default = "") parser <- add_argument(parser, "--methods", help = "Comma-separated fine-mapping method tokens", type = "character", default = "susie") @@ -132,6 +135,10 @@ if (has_qtl && has_gwas) if (!has_qtl && !has_gwas) stop("Specify either --qtl-dataset (QTL mode) or --gwas-sumstats (GWAS mode).") +# Optional context restriction (QTL mode): NULL = all contexts in the dataset. +contexts_arg <- if (nzchar(argv$contexts) && argv$contexts != ".") + trimws(strsplit(argv$contexts, ",", fixed = TRUE)[[1L]]) else NULL + methods <- trimws(strsplit(argv$methods, ",", fixed = TRUE)[[1L]]) # Build the final `methods` argument for fineMappingPipeline as the named-list @@ -179,7 +186,10 @@ if (has_gwas) { if (!has_gene && !has_region) stop("QTL mode requires --gene-id (with --cis-window) or --region.") qd <- readRDS(argv$qtl_dataset) - qtl_args <- c(list(qd), cs_args, list(cisWindow = argv$cis_window)) + # `contexts` is QTL-mode only (NULL = all contexts), so it rides on qtl_args + # rather than the mode-shared cs_args. + qtl_args <- c(list(qd), cs_args, + list(cisWindow = argv$cis_window, contexts = contexts_arg)) res <- if (has_region) { do.call(fineMappingPipeline, c(qtl_args, list(region = parse_region(argv$region)))) } else { diff --git a/code/script/pecotmr_integration/twas_weights.R b/code/script/pecotmr_integration/twas_weights.R index 618d1b61..0225127b 100644 --- a/code/script/pecotmr_integration/twas_weights.R +++ b/code/script/pecotmr_integration/twas_weights.R @@ -53,6 +53,9 @@ parser <- add_argument(parser, "--cis-window", parser <- add_argument(parser, "--region", help = "Genomic region as chr:start-end (region mode); mutually exclusive with --gene-id", type = "character", default = "") +parser <- add_argument(parser, "--contexts", + help = "Comma-separated context names to restrict to; empty = all contexts", + type = "character", default = "") parser <- add_argument(parser, "--methods", help = "Comma-separated TWAS method tokens (default 'default')", type = "character", default = "default") @@ -92,6 +95,10 @@ parsed_method_args <- if (nzchar(argv$method_args) && argv$method_args != "." && # that twasWeightsPipeline accepts directly. The "default" preset can't # carry per-method kwargs (it expands inside pecotmr to a fixed token # set we can't see here) — explicit --methods is required in that case. +# Optional context restriction: NULL = all contexts in the dataset. +contexts_arg <- if (nzchar(argv$contexts) && argv$contexts != ".") + trimws(strsplit(argv$contexts, ",", fixed = TRUE)[[1L]]) else NULL + methods <- trimws(strsplit(argv$methods, ",", fixed = TRUE)[[1L]]) methods_arg <- if (is.null(parsed_method_args)) { if (length(methods) == 1L && methods == "default") "default" else methods @@ -140,11 +147,13 @@ res <- if (has_region) { twasWeightsPipeline(qd, methods = methods_arg, region = parse_region(argv$region), cisWindow = argv$cis_window, + contexts = contexts_arg, fineMappingResult = fmr) } else { twasWeightsPipeline(qd, methods = methods_arg, traitId = argv$gene_id, cisWindow = argv$cis_window, + contexts = contexts_arg, fineMappingResult = fmr) }