diff --git a/drcme/bin/run_existing_spca_on_new_data.py b/drcme/bin/run_existing_spca_on_new_data.py index 644f079..6446d48 100755 --- a/drcme/bin/run_existing_spca_on_new_data.py +++ b/drcme/bin/run_existing_spca_on_new_data.py @@ -98,7 +98,7 @@ def main(orig_transform_file, orig_datasets, new_datasets, params_file, params_file=params_file) orig_data_objects.append(data_for_spca) orig_specimen_ids_list.append(specimen_ids) - orig_data_for_spca = [] + orig_data_for_spca = {} for i, do in enumerate(orig_data_objects): for k in do: if k not in orig_data_for_spca: @@ -128,7 +128,7 @@ def main(orig_transform_file, orig_datasets, new_datasets, params_file, params_file=params_file) new_data_objects.append(data_for_spca) new_specimen_ids_list.append(specimen_ids) - data_for_spca = [] + data_for_spca = {} for i, do in enumerate(new_data_objects): for k in do: if k not in data_for_spca: diff --git a/drcme/spca.py b/drcme/spca.py index eeb1216..7f1517e 100755 --- a/drcme/spca.py +++ b/drcme/spca.py @@ -195,16 +195,13 @@ def orig_mean_and_std_for_zscore(spca_results, orig_data, spca_params, Standard deviations of sPCs """ Z_list = [] - for ds in orig_data: - data = ds["data"] - for k in ds["part_keys"]: - _, _, _, indices = spca_params[k] - d = data[:, indices] - above_thresh = spca_results[k]["pev"] >= pev_threshold - Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) - if np.any(np.isnan(Z)): - print("NaNs found", k) - Z_list.append(Z) + subset_data = select_data_subset(orig_data, spca_params) + for k, d in subset_data.items(): + above_thresh = spca_results[k]["pev"] >= pev_threshold + Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) + if np.any(np.isnan(Z)): + print("NaNs found", k) + Z_list.append(Z) combo_orig = np.hstack(Z_list) return combo_orig.mean(axis=0), combo_orig.std(axis=0) @@ -238,16 +235,13 @@ def spca_transform_new_data(spca_results, new_data, spca_zht_params, orig_mean, Transformed and z-scored sPC values """ Z_list = [] - for ds in new_data: - data = ds["data"] - for k in ds["part_keys"]: - _, _, _, indices = spca_zht_params[k] - d = data[:, indices] - above_thresh = spca_results[k]["pev"] >= pev_threshold - Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) - if np.any(np.isnan(Z)): - print("NaNs found", k) - Z_list.append(Z) + subset_data = select_data_subset(new_data, spca_zht_params) + for k, d in subset_data.items(): + above_thresh = spca_results[k]["pev"] >= pev_threshold + Z = d.dot(spca_results[k]["loadings"][:, above_thresh]) + if np.any(np.isnan(Z)): + print("NaNs found", k) + Z_list.append(Z) combo_new = np.hstack(Z_list) combo = (combo_new - orig_mean) / orig_std