From 591b5ccb2c42f114d82529dda88d25fbad85ffb7 Mon Sep 17 00:00:00 2001 From: agatab Date: Wed, 3 Apr 2024 17:41:00 -0700 Subject: [PATCH 1/2] Updated orig_mean_and_std_for_zscore and spca_transform_new_data to use select_data_subset for data selection based on spca parameters. --- drcme/spca.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/drcme/spca.py b/drcme/spca.py index eeb1216..7f1517e 100755 --- a/drcme/spca.py +++ b/drcme/spca.py @@ -195,16 +195,13 @@ def orig_mean_and_std_for_zscore(spca_results, orig_data, spca_params, Standard deviations of sPCs """ Z_list = [] - for ds in orig_data: - data = ds["data"] - for k in ds["part_keys"]: - _, _, _, indices = spca_params[k] - d = data[:, indices] - above_thresh = spca_results[k]["pev"] >= pev_threshold - Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) - if np.any(np.isnan(Z)): - print("NaNs found", k) - Z_list.append(Z) + subset_data = select_data_subset(orig_data, spca_params) + for k, d in subset_data.items(): + above_thresh = spca_results[k]["pev"] >= pev_threshold + Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) + if np.any(np.isnan(Z)): + print("NaNs found", k) + Z_list.append(Z) combo_orig = np.hstack(Z_list) return combo_orig.mean(axis=0), combo_orig.std(axis=0) @@ -238,16 +235,13 @@ def spca_transform_new_data(spca_results, new_data, spca_zht_params, orig_mean, Transformed and z-scored sPC values """ Z_list = [] - for ds in new_data: - data = ds["data"] - for k in ds["part_keys"]: - _, _, _, indices = spca_zht_params[k] - d = data[:, indices] - above_thresh = spca_results[k]["pev"] >= pev_threshold - Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) - if np.any(np.isnan(Z)): - print("NaNs found", k) - Z_list.append(Z) + subset_data = select_data_subset(new_data, spca_zht_params) + for k, d in subset_data.items(): + above_thresh = spca_results[k]["pev"] >= pev_threshold + Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) + if np.any(np.isnan(Z)): + print("NaNs found", k) + Z_list.append(Z) combo_new = np.hstack(Z_list) combo = (combo_new - orig_mean) / orig_std From b70902ccdb5ebaa37f679cf55444b11d3a96d7b8 Mon Sep 17 00:00:00 2001 From: agatab Date: Wed, 3 Apr 2024 17:43:11 -0700 Subject: [PATCH 2/2] Updated to conform with hdf5 feature vector datasets. --- drcme/bin/run_existing_spca_on_new_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drcme/bin/run_existing_spca_on_new_data.py b/drcme/bin/run_existing_spca_on_new_data.py index 644f079..6446d48 100755 --- a/drcme/bin/run_existing_spca_on_new_data.py +++ b/drcme/bin/run_existing_spca_on_new_data.py @@ -98,7 +98,7 @@ def main(orig_transform_file, orig_datasets, new_datasets, params_file, params_file=params_file) orig_data_objects.append(data_for_spca) orig_specimen_ids_list.append(specimen_ids) - orig_data_for_spca = [] + orig_data_for_spca = {} for i, do in enumerate(orig_data_objects): for k in do: if k not in orig_data_for_spca: @@ -128,7 +128,7 @@ def main(orig_transform_file, orig_datasets, new_datasets, params_file, params_file=params_file) new_data_objects.append(data_for_spca) new_specimen_ids_list.append(specimen_ids) - data_for_spca = [] + data_for_spca = {} for i, do in enumerate(new_data_objects): for k in do: if k not in data_for_spca: