From 2a806765b795144caf43879c03d32055ce8da0a3 Mon Sep 17 00:00:00 2001 From: thomaswilliams23 Date: Thu, 20 Nov 2025 15:12:23 +1100 Subject: [PATCH 1/5] fix contemporaneous sampling for wartortle --- .gitignore | 4 +- config/simulation-charmeleon-alt.json | 6 +-- config/simulation-wartortle.json | 37 +++++++++++++++++++ ...aster-template-contemporaneous-lenient.xml | 32 ++++++++++++++++ src/remaster-template-contemporaneous.xml | 2 +- 5 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 config/simulation-wartortle.json create mode 100644 src/remaster-template-contemporaneous-lenient.xml diff --git a/.gitignore b/.gitignore index 14e1f5b..3c358cc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ out/sim-*/plots out/sim-*/dataset-*.hdf5 lib venv -*~ \ No newline at end of file +*~ +out/*/.DS_Store +.DS_Store \ No newline at end of file diff --git a/config/simulation-charmeleon-alt.json b/config/simulation-charmeleon-alt.json index 40ba792..a422ddd 100644 --- a/config/simulation-charmeleon-alt.json +++ b/config/simulation-charmeleon-alt.json @@ -4,12 +4,12 @@ "seed": 42, "remaster_xml": "src/remaster-template.xml", "num_simulations": 2000, - "num_workers": 16, + "num_workers": 5, "simulation_hyperparameters": { "duration_range": { "dist": "uniform_int", - "lower_bound": 30, - "upper_bound": 50 + "lower_bound": 10, + "upper_bound": 30 }, "num_changes": { "dist": "uniform_int", diff --git a/config/simulation-wartortle.json b/config/simulation-wartortle.json new file mode 100644 index 0000000..ffeba01 --- /dev/null +++ b/config/simulation-wartortle.json @@ -0,0 +1,37 @@ +{ + "simulation_name": "sim-wartortle", + "output_hdf5": "dataset-wartortle.hdf5", + "seed": 42, + "remaster_xml": "src/remaster-template-contemporaneous-lenient.xml", + "num_simulations": 2000, + "num_workers": 5, + "simulation_hyperparameters": { + "duration_range": { + "dist": "uniform_int", + "lower_bound": 10, + "upper_bound": 30 + }, + "num_changes": { + "dist": "uniform_int", + "lower_bound": 1, + "upper_bound": 2 + }, + "r0": { + "dist": "lognormal", + "LN_mean": 1.17, + "LN_sigma": 0.38 + }, + "net_removal_rate": { + "dist": "lognormal", + "LN_mean": -1.81, + "LN_sigma": 0.2 + }, + "sampling_prop": { + "dist": "beta", + "alpha": 2, + "beta": 10 + }, + "num_temp_measurements": 10, + "contemporaneous_sample": true + } +} diff --git a/src/remaster-template-contemporaneous-lenient.xml b/src/remaster-template-contemporaneous-lenient.xml new file mode 100644 index 0000000..adcfe97 --- /dev/null +++ b/src/remaster-template-contemporaneous-lenient.xml @@ -0,0 +1,32 @@ + + + + + + + + + + + X -> 2X + X -> Psi + X -> Mu + + + + + + + + + + + + + diff --git a/src/remaster-template-contemporaneous.xml b/src/remaster-template-contemporaneous.xml index baf16a2..5ed353e 100644 --- a/src/remaster-template-contemporaneous.xml +++ b/src/remaster-template-contemporaneous.xml @@ -15,7 +15,7 @@ X -> 2X - X -> Psi + X -> Psi X -> Mu From df4ef44e9558c6cd57cafdd8d65c64bb6fc484de Mon Sep 17 00:00:00 2001 From: thomaswilliams23 Date: Mon, 8 Dec 2025 12:56:01 +1100 Subject: [PATCH 2/5] Output rho in contemporaneous sampling --- main.py | 9 +++++ src/get_output_distributions.py | 65 +++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 src/get_output_distributions.py diff --git a/main.py b/main.py index 2281341..4b9b238 100644 --- a/main.py +++ b/main.py @@ -643,6 +643,15 @@ def create_database(pickle_files): "present_cumulative", data=sim["simulation_results"]["present_cumulative"], ) + + #put rho in database if doing contemporaneous sampling + if "rho" in sim["parameters"].keys(): + param_grp = params_grp.create_group("rho") + param_grp.create_dataset("values", data=sim["parameters"]["rho"]["values"]) + param_grp.create_dataset( + "change_times", data=sim["parameters"]["rho"]["change_times"] + ) + db_conn.attrs["num_simulations"] = num_sims db_conn.attrs["creation_date"] = datetime.datetime.now().isoformat() db_conn.close() diff --git a/src/get_output_distributions.py b/src/get_output_distributions.py new file mode 100644 index 0000000..059cd52 --- /dev/null +++ b/src/get_output_distributions.py @@ -0,0 +1,65 @@ +import h5py +import numpy as np +import scipy.stats as stats + +#look at an hdf5 file, pull out present prevalence and cumulative incidence + + + +#DB_FNAME = "out/sim-wartortle/dataset-wartortle.hdf5" +DB_FNAME = "out/sim-charmeleon-alt/dataset-charmeleon-alt.hdf5" + +print(f"\nReading from {DB_FNAME}\n") + + +#read in +db_conn = h5py.File(DB_FNAME, "r") + +present_times = np.array([db_conn[f"{key}/input/present"][()] for key in db_conn.keys()]) + +r0_values = np.array([db_conn[f"{key}/output/parameters/r0/values"][()] for key in db_conn.keys()]) +r0_ct = np.array([db_conn[f"{key}/output/parameters/r0/change_times"][()] for key in db_conn.keys()]) + +net_removal_rates = np.array([db_conn[f"{key}/output/parameters/net_removal_rate/values"][()] for key in db_conn.keys()]) +net_removal_ct = np.array([db_conn[f"{key}/output/parameters/net_removal_rate/change_times"][()] for key in db_conn.keys()]) + +present_prevalence = np.array([db_conn[f"{key}/output/present_prevalence"][()] for key in db_conn.keys()]) +present_cumulative = np.array([db_conn[f"{key}/output/present_cumulative"][()] for key in db_conn.keys()]) + + + + + +# + + + + + + + +#OUTPUT + +#prevalence +prevalence_mean = np.mean(present_prevalence) +prevalence_median = np.median(present_prevalence) +prevalence_ci = stats.t.interval( + 0.95, + len(present_prevalence)-1, + loc=prevalence_mean, + scale=stats.sem(present_prevalence) +) +print(f"Present Prevalence: Median={prevalence_median:.4f}, 95% CI=({prevalence_ci[0]:.4f}, {prevalence_ci[1]:.4f})") + + +#cumulative incidence +cumulative_mean = np.mean(present_cumulative) +cumulative_median = np.median(present_cumulative) +cumulative_ci = stats.t.interval( + 0.95, + len(present_cumulative)-1, + loc=cumulative_mean, + scale=stats.sem(present_cumulative) +) +cumulative_median = np.median(present_cumulative) +print(f"Present Cumulative Incidence: Median={cumulative_median:.4f}, 95% CI=({cumulative_ci[0]:.4f}, {cumulative_ci[1]:.4f})") \ No newline at end of file From 25e4434a93d6ce471afedfbec28e32e1f0a663bc Mon Sep 17 00:00:00 2001 From: thomaswilliams23 Date: Tue, 9 Dec 2025 09:24:59 +1100 Subject: [PATCH 3/5] bugfixing --- main.py | 10 ++++ src/get_output_distributions.py | 103 ++++++++++++++++++++++++-------- 2 files changed, 87 insertions(+), 26 deletions(-) diff --git a/main.py b/main.py index 4b9b238..dabe733 100644 --- a/main.py +++ b/main.py @@ -646,12 +646,22 @@ def create_database(pickle_files): #put rho in database if doing contemporaneous sampling if "rho" in sim["parameters"].keys(): + + print(sim["parameters"]["rho"]) + + print("about to make group") param_grp = params_grp.create_group("rho") + + print("about to pass values") param_grp.create_dataset("values", data=sim["parameters"]["rho"]["values"]) + + print("about to pass change times") param_grp.create_dataset( "change_times", data=sim["parameters"]["rho"]["change_times"] ) + print("successfully passed data") + db_conn.attrs["num_simulations"] = num_sims db_conn.attrs["creation_date"] = datetime.datetime.now().isoformat() db_conn.close() diff --git a/src/get_output_distributions.py b/src/get_output_distributions.py index 059cd52..45c05c1 100644 --- a/src/get_output_distributions.py +++ b/src/get_output_distributions.py @@ -1,13 +1,12 @@ import h5py import numpy as np -import scipy.stats as stats #look at an hdf5 file, pull out present prevalence and cumulative incidence -#DB_FNAME = "out/sim-wartortle/dataset-wartortle.hdf5" -DB_FNAME = "out/sim-charmeleon-alt/dataset-charmeleon-alt.hdf5" +DB_FNAME = "out/sim-wartortle/dataset-wartortle.hdf5" +#DB_FNAME = "out/sim-charmeleon-alt/dataset-charmeleon-alt.hdf5" print(f"\nReading from {DB_FNAME}\n") @@ -15,13 +14,19 @@ #read in db_conn = h5py.File(DB_FNAME, "r") -present_times = np.array([db_conn[f"{key}/input/present"][()] for key in db_conn.keys()]) +epidemic_durations = np.array([db_conn[f"{key}/output/parameters/epidemic_duration"][()] for key in db_conn.keys()]) -r0_values = np.array([db_conn[f"{key}/output/parameters/r0/values"][()] for key in db_conn.keys()]) -r0_ct = np.array([db_conn[f"{key}/output/parameters/r0/change_times"][()] for key in db_conn.keys()]) +r0_values = [db_conn[f"{key}/output/parameters/r0/values"][()] for key in db_conn.keys()] +r0_ct = [db_conn[f"{key}/output/parameters/r0/change_times"][()] for key in db_conn.keys()] -net_removal_rates = np.array([db_conn[f"{key}/output/parameters/net_removal_rate/values"][()] for key in db_conn.keys()]) -net_removal_ct = np.array([db_conn[f"{key}/output/parameters/net_removal_rate/change_times"][()] for key in db_conn.keys()]) +net_removal_rates = [db_conn[f"{key}/output/parameters/net_removal_rate/values"][()] for key in db_conn.keys()] +net_removal_ct = [db_conn[f"{key}/output/parameters/net_removal_rate/change_times"][()] for key in db_conn.keys()] + +sampling_prop_values = [db_conn[f"{key}/output/parameters/sampling_prop/values"][()] for key in db_conn.keys()] +sampling_prop_ct = [db_conn[f"{key}/output/parameters/sampling_prop/change_times"][()] for key in db_conn.keys()] + +num_param_changes = np.array([len(r0_ct[ix]) for ix in range(len(r0_ct))]) +all_change_times = np.concatenate(r0_ct) present_prevalence = np.array([db_conn[f"{key}/output/present_prevalence"][()] for key in db_conn.keys()]) present_cumulative = np.array([db_conn[f"{key}/output/present_cumulative"][()] for key in db_conn.keys()]) @@ -30,36 +35,82 @@ -# +#process parameter samples +all_r0_samples = np.concatenate(r0_values) +r0_weights = np.concatenate([ + np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(r0_ct) +]) + +all_net_removal_samples = np.concatenate(net_removal_rates) +net_removal_weights = np.concatenate([ + np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(net_removal_ct) +]) + +all_sampling_prop_samples = np.concatenate(sampling_prop_values) +sampling_prop_weights = np.concatenate([ + np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(sampling_prop_ct) +]) + + + + + + +# MODEL PARAMS +print("\nModel Parameter Distributions:") +#r0 +r0_median = np.median(all_r0_samples) +r0_interval = np.percentile(all_r0_samples, [2.5, 97.5], weights=r0_weights, method='inverted_cdf') +print(f"R0: Median={r0_median:.4f}, 95% interval=({r0_interval[0]:.4f}, {r0_interval[1]:.4f})") + +#net removal rate +net_removal_median = np.median(all_net_removal_samples) +net_removal_interval = np.percentile(all_net_removal_samples, [2.5, 97.5], weights=net_removal_weights, method='inverted_cdf') +print(f"Net Removal Rate: Median={net_removal_median:.4f}, 95% interval=({net_removal_interval[0]:.4f}, {net_removal_interval[1]:.4f})") + +#sampling proportion +sampling_prop_median = np.median(all_sampling_prop_samples) +sampling_prop_interval = np.percentile(all_sampling_prop_samples, [2.5, 97.5], weights=sampling_prop_weights, method='inverted_cdf') +print(f"Sampling Proportion: Median={sampling_prop_median:.4f}, 95% interval=({sampling_prop_interval[0]:.4f}, {sampling_prop_interval[1]:.4f})") + + + + +#HYPERPARAMS +print("\nHyperparameter Distributions:") + +#epidemic duration +epidemic_duration_median = np.median(epidemic_durations) +epidemic_duration_interval = np.percentile(epidemic_durations, [2.5, 97.5]) +print(f"Epidemic Duration: Median={epidemic_duration_median:.4f}, 95% interval=({epidemic_duration_interval[0]:.4f}, {epidemic_duration_interval[1]:.4f})") + +#number of parameter changes +num_param_changes_median = np.median(num_param_changes) +num_param_changes_interval = np.percentile(num_param_changes, [2.5, 97.5]) +print(f"Number of Parameter Changes: Median={num_param_changes_median:.4f}, 95% interval=({num_param_changes_interval[0]:.4f}, {num_param_changes_interval[1]:.4f})") + +#change times +all_change_times_median = np.median(all_change_times) +all_change_times_interval = np.percentile(all_change_times, [2.5, 97.5]) +print(f"Change Times: Median={all_change_times_median:.4f}, 95% interval=({all_change_times_interval[0]:.4f}, {all_change_times_interval[1]:.4f})") #OUTPUT +print("\nOutput Distributions:") #prevalence -prevalence_mean = np.mean(present_prevalence) prevalence_median = np.median(present_prevalence) -prevalence_ci = stats.t.interval( - 0.95, - len(present_prevalence)-1, - loc=prevalence_mean, - scale=stats.sem(present_prevalence) -) -print(f"Present Prevalence: Median={prevalence_median:.4f}, 95% CI=({prevalence_ci[0]:.4f}, {prevalence_ci[1]:.4f})") +prevalence_interval = np.percentile(present_prevalence, [2.5, 97.5]) +print(f"Present Prevalence: Median={prevalence_median:.4f}, 95% interval=({prevalence_interval[0]:.4f}, {prevalence_interval[1]:.4f})") #cumulative incidence -cumulative_mean = np.mean(present_cumulative) -cumulative_median = np.median(present_cumulative) -cumulative_ci = stats.t.interval( - 0.95, - len(present_cumulative)-1, - loc=cumulative_mean, - scale=stats.sem(present_cumulative) -) cumulative_median = np.median(present_cumulative) -print(f"Present Cumulative Incidence: Median={cumulative_median:.4f}, 95% CI=({cumulative_ci[0]:.4f}, {cumulative_ci[1]:.4f})") \ No newline at end of file +cumulative_interval = np.percentile(present_cumulative, [2.5, 97.5]) +print(f"Present Cumulative Incidence: Median={cumulative_median:.4f}, 95% interval=({cumulative_interval[0]:.4f}, {cumulative_interval[1]:.4f})") +print("\n") \ No newline at end of file From e1e6566c51722d96878dd107469d02aab7aa057b Mon Sep 17 00:00:00 2001 From: thomaswilliams23 Date: Tue, 9 Dec 2025 09:27:18 +1100 Subject: [PATCH 4/5] bugfixing again --- main.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/main.py b/main.py index dabe733..d0fd451 100644 --- a/main.py +++ b/main.py @@ -646,21 +646,7 @@ def create_database(pickle_files): #put rho in database if doing contemporaneous sampling if "rho" in sim["parameters"].keys(): - - print(sim["parameters"]["rho"]) - - print("about to make group") - param_grp = params_grp.create_group("rho") - - print("about to pass values") - param_grp.create_dataset("values", data=sim["parameters"]["rho"]["values"]) - - print("about to pass change times") - param_grp.create_dataset( - "change_times", data=sim["parameters"]["rho"]["change_times"] - ) - - print("successfully passed data") + params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) db_conn.attrs["num_simulations"] = num_sims db_conn.attrs["creation_date"] = datetime.datetime.now().isoformat() From d47ae070a2e2d76ac77f24915c62fe90cd090bec Mon Sep 17 00:00:00 2001 From: thomaswilliams23 Date: Tue, 9 Dec 2025 11:51:43 +1100 Subject: [PATCH 5/5] squash bug --- main.py | 6 +++--- src/get_output_distributions.py | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index d0fd451..b32d9c9 100644 --- a/main.py +++ b/main.py @@ -644,9 +644,9 @@ def create_database(pickle_files): data=sim["simulation_results"]["present_cumulative"], ) - #put rho in database if doing contemporaneous sampling - if "rho" in sim["parameters"].keys(): - params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) + #put rho in database if doing contemporaneous sampling + if "rho" in sim["parameters"].keys(): + params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) db_conn.attrs["num_simulations"] = num_sims db_conn.attrs["creation_date"] = datetime.datetime.now().isoformat() diff --git a/src/get_output_distributions.py b/src/get_output_distributions.py index 45c05c1..85446d9 100644 --- a/src/get_output_distributions.py +++ b/src/get_output_distributions.py @@ -25,6 +25,9 @@ sampling_prop_values = [db_conn[f"{key}/output/parameters/sampling_prop/values"][()] for key in db_conn.keys()] sampling_prop_ct = [db_conn[f"{key}/output/parameters/sampling_prop/change_times"][()] for key in db_conn.keys()] +if "rho" in db_conn[f"record_000001/output/parameters/"].keys(): + rho_values = np.array([db_conn[f"{key}/output/parameters/rho/values"][()] for key in db_conn.keys()]) + num_param_changes = np.array([len(r0_ct[ix]) for ix in range(len(r0_ct))]) all_change_times = np.concatenate(r0_ct) @@ -76,6 +79,12 @@ sampling_prop_interval = np.percentile(all_sampling_prop_samples, [2.5, 97.5], weights=sampling_prop_weights, method='inverted_cdf') print(f"Sampling Proportion: Median={sampling_prop_median:.4f}, 95% interval=({sampling_prop_interval[0]:.4f}, {sampling_prop_interval[1]:.4f})") +#rho if present +if "rho" in db_conn[f"record_000001/output/parameters/"].keys(): + rho_median = np.median(rho_values) + rho_interval = np.percentile(rho_values, [2.5, 97.5]) + print(f"Rho (Contemporaneous Sampling Proportion): Median={rho_median:.4f}, 95% interval=({rho_interval[0]:.4f}, {rho_interval[1]:.4f})") +