-
Notifications
You must be signed in to change notification settings - Fork 1
fix contemporaneous sampling for wartortle #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2a80676
df4ef44
25e4434
e1e6566
d47ae07
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,4 +3,6 @@ out/sim-*/plots | |
| out/sim-*/dataset-*.hdf5 | ||
| lib | ||
| venv | ||
| *~ | ||
| *~ | ||
| out/*/.DS_Store | ||
| .DS_Store | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| { | ||
| "simulation_name": "sim-wartortle", | ||
| "output_hdf5": "dataset-wartortle.hdf5", | ||
| "seed": 42, | ||
| "remaster_xml": "src/remaster-template-contemporaneous-lenient.xml", | ||
| "num_simulations": 2000, | ||
| "num_workers": 5, | ||
| "simulation_hyperparameters": { | ||
| "duration_range": { | ||
| "dist": "uniform_int", | ||
| "lower_bound": 10, | ||
| "upper_bound": 30 | ||
| }, | ||
| "num_changes": { | ||
| "dist": "uniform_int", | ||
| "lower_bound": 1, | ||
| "upper_bound": 2 | ||
| }, | ||
| "r0": { | ||
| "dist": "lognormal", | ||
| "LN_mean": 1.17, | ||
| "LN_sigma": 0.38 | ||
| }, | ||
| "net_removal_rate": { | ||
| "dist": "lognormal", | ||
| "LN_mean": -1.81, | ||
| "LN_sigma": 0.2 | ||
| }, | ||
| "sampling_prop": { | ||
| "dist": "beta", | ||
| "alpha": 2, | ||
| "beta": 10 | ||
| }, | ||
| "num_temp_measurements": 10, | ||
| "contemporaneous_sample": true | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -643,6 +643,11 @@ def create_database(pickle_files): | |||||||||||||||||||
| "present_cumulative", | ||||||||||||||||||||
| data=sim["simulation_results"]["present_cumulative"], | ||||||||||||||||||||
| ) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #put rho in database if doing contemporaneous sampling | ||||||||||||||||||||
| if "rho" in sim["parameters"].keys(): | ||||||||||||||||||||
| params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) | ||||||||||||||||||||
|
||||||||||||||||||||
| params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) | |
| rho_grp = params_grp.create_group("rho") | |
| rho_grp.create_dataset("values", data=sim["parameters"]["rho"]["values"]) | |
| rho_change_times = sim["parameters"]["rho"].get("change_times") | |
| if rho_change_times is None: | |
| # Store an empty dataset to maintain consistent structure | |
| rho_grp.create_dataset("change_times", data=np.array([])) | |
| else: | |
| rho_grp.create_dataset("change_times", data=rho_change_times) |
Copilot
AI
Jan 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code block is incorrectly placed outside the 'with open(pf, "rb") as f:' context manager block (which ends at line 645). The variable 'sim' is defined inside that context, and 'params_grp' is created from data read inside that context. This code should be moved inside the with block before line 646, at the same indentation level as the other dataset creation code (lines 638-645).
| #put rho in database if doing contemporaneous sampling | |
| if "rho" in sim["parameters"].keys(): | |
| params_grp.create_dataset("rho", data=sim["parameters"]["rho"]["values"]) | |
| # put rho in database if doing contemporaneous sampling | |
| if "rho" in sim["parameters"]: | |
| params_grp.create_dataset( | |
| "rho", data=sim["parameters"]["rho"]["values"] | |
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@thomaswilliams23 the other comment about how the rho is stored appears incorrect but I think copilot is correct on this one about the level of indentation. The sim value may not have the expected value outside of the context.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,125 @@ | ||
| import h5py | ||
| import numpy as np | ||
|
|
||
| #look at an hdf5 file, pull out present prevalence and cumulative incidence | ||
|
||
|
|
||
|
|
||
|
|
||
| DB_FNAME = "out/sim-wartortle/dataset-wartortle.hdf5" | ||
| #DB_FNAME = "out/sim-charmeleon-alt/dataset-charmeleon-alt.hdf5" | ||
|
||
|
|
||
| print(f"\nReading from {DB_FNAME}\n") | ||
|
|
||
|
|
||
| #read in | ||
|
||
| db_conn = h5py.File(DB_FNAME, "r") | ||
|
|
||
| epidemic_durations = np.array([db_conn[f"{key}/output/parameters/epidemic_duration"][()] for key in db_conn.keys()]) | ||
|
|
||
| r0_values = [db_conn[f"{key}/output/parameters/r0/values"][()] for key in db_conn.keys()] | ||
| r0_ct = [db_conn[f"{key}/output/parameters/r0/change_times"][()] for key in db_conn.keys()] | ||
|
|
||
| net_removal_rates = [db_conn[f"{key}/output/parameters/net_removal_rate/values"][()] for key in db_conn.keys()] | ||
| net_removal_ct = [db_conn[f"{key}/output/parameters/net_removal_rate/change_times"][()] for key in db_conn.keys()] | ||
|
|
||
| sampling_prop_values = [db_conn[f"{key}/output/parameters/sampling_prop/values"][()] for key in db_conn.keys()] | ||
| sampling_prop_ct = [db_conn[f"{key}/output/parameters/sampling_prop/change_times"][()] for key in db_conn.keys()] | ||
|
|
||
| if "rho" in db_conn[f"record_000001/output/parameters/"].keys(): | ||
| rho_values = np.array([db_conn[f"{key}/output/parameters/rho/values"][()] for key in db_conn.keys()]) | ||
|
|
||
| num_param_changes = np.array([len(r0_ct[ix]) for ix in range(len(r0_ct))]) | ||
| all_change_times = np.concatenate(r0_ct) | ||
|
|
||
| present_prevalence = np.array([db_conn[f"{key}/output/present_prevalence"][()] for key in db_conn.keys()]) | ||
| present_cumulative = np.array([db_conn[f"{key}/output/present_cumulative"][()] for key in db_conn.keys()]) | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| #process parameter samples | ||
|
||
| all_r0_samples = np.concatenate(r0_values) | ||
| r0_weights = np.concatenate([ | ||
| np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(r0_ct) | ||
| ]) | ||
|
|
||
| all_net_removal_samples = np.concatenate(net_removal_rates) | ||
| net_removal_weights = np.concatenate([ | ||
| np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(net_removal_ct) | ||
| ]) | ||
|
|
||
| all_sampling_prop_samples = np.concatenate(sampling_prop_values) | ||
| sampling_prop_weights = np.concatenate([ | ||
| np.diff(np.concatenate((np.array([0.0]), change_times, np.array([epidemic_durations[ix]])))) for (ix, change_times) in enumerate(sampling_prop_ct) | ||
| ]) | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
| # MODEL PARAMS | ||
| print("\nModel Parameter Distributions:") | ||
|
|
||
| #r0 | ||
|
||
| r0_median = np.median(all_r0_samples) | ||
| r0_interval = np.percentile(all_r0_samples, [2.5, 97.5], weights=r0_weights, method='inverted_cdf') | ||
| print(f"R0: Median={r0_median:.4f}, 95% interval=({r0_interval[0]:.4f}, {r0_interval[1]:.4f})") | ||
|
|
||
| #net removal rate | ||
|
||
| net_removal_median = np.median(all_net_removal_samples) | ||
| net_removal_interval = np.percentile(all_net_removal_samples, [2.5, 97.5], weights=net_removal_weights, method='inverted_cdf') | ||
| print(f"Net Removal Rate: Median={net_removal_median:.4f}, 95% interval=({net_removal_interval[0]:.4f}, {net_removal_interval[1]:.4f})") | ||
|
|
||
| #sampling proportion | ||
|
||
| sampling_prop_median = np.median(all_sampling_prop_samples) | ||
| sampling_prop_interval = np.percentile(all_sampling_prop_samples, [2.5, 97.5], weights=sampling_prop_weights, method='inverted_cdf') | ||
| print(f"Sampling Proportion: Median={sampling_prop_median:.4f}, 95% interval=({sampling_prop_interval[0]:.4f}, {sampling_prop_interval[1]:.4f})") | ||
|
|
||
| #rho if present | ||
|
||
| if "rho" in db_conn[f"record_000001/output/parameters/"].keys(): | ||
| rho_median = np.median(rho_values) | ||
| rho_interval = np.percentile(rho_values, [2.5, 97.5]) | ||
| print(f"Rho (Contemporaneous Sampling Proportion): Median={rho_median:.4f}, 95% interval=({rho_interval[0]:.4f}, {rho_interval[1]:.4f})") | ||
|
|
||
|
|
||
|
|
||
|
|
||
| #HYPERPARAMS | ||
|
||
| print("\nHyperparameter Distributions:") | ||
|
|
||
| #epidemic duration | ||
|
||
| epidemic_duration_median = np.median(epidemic_durations) | ||
| epidemic_duration_interval = np.percentile(epidemic_durations, [2.5, 97.5]) | ||
| print(f"Epidemic Duration: Median={epidemic_duration_median:.4f}, 95% interval=({epidemic_duration_interval[0]:.4f}, {epidemic_duration_interval[1]:.4f})") | ||
|
|
||
| #number of parameter changes | ||
|
||
| num_param_changes_median = np.median(num_param_changes) | ||
| num_param_changes_interval = np.percentile(num_param_changes, [2.5, 97.5]) | ||
| print(f"Number of Parameter Changes: Median={num_param_changes_median:.4f}, 95% interval=({num_param_changes_interval[0]:.4f}, {num_param_changes_interval[1]:.4f})") | ||
|
|
||
| #change times | ||
|
||
| all_change_times_median = np.median(all_change_times) | ||
| all_change_times_interval = np.percentile(all_change_times, [2.5, 97.5]) | ||
| print(f"Change Times: Median={all_change_times_median:.4f}, 95% interval=({all_change_times_interval[0]:.4f}, {all_change_times_interval[1]:.4f})") | ||
|
|
||
|
|
||
|
|
||
|
|
||
| #OUTPUT | ||
|
||
| print("\nOutput Distributions:") | ||
|
|
||
| #prevalence | ||
|
||
| prevalence_median = np.median(present_prevalence) | ||
| prevalence_interval = np.percentile(present_prevalence, [2.5, 97.5]) | ||
| print(f"Present Prevalence: Median={prevalence_median:.4f}, 95% interval=({prevalence_interval[0]:.4f}, {prevalence_interval[1]:.4f})") | ||
|
|
||
|
|
||
| #cumulative incidence | ||
|
||
| cumulative_median = np.median(present_cumulative) | ||
| cumulative_interval = np.percentile(present_cumulative, [2.5, 97.5]) | ||
| print(f"Present Cumulative Incidence: Median={cumulative_median:.4f}, 95% interval=({cumulative_interval[0]:.4f}, {cumulative_interval[1]:.4f})") | ||
| print("\n") | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| <beast version='2.0' namespace='beast.base.inference.parameter:beast.base.inference:remaster'> | ||
| <run spec="Simulator" nSims="1"> | ||
|
|
||
| <simulate id="tree" | ||
| spec="SimulatedTree" | ||
| maxRetries="20"> | ||
| <trajectory id="trajectory" | ||
| spec="StochasticTrajectory" | ||
| maxTime="50" | ||
| endsWhen="X==500000" | ||
| mustHave="Psi<1001 && Psi>1 && X>1"> | ||
|
|
||
| <population id="X" spec="RealParameter" value="1" /> | ||
| <samplePopulation id="Psi" spec="RealParameter" value="0" /> | ||
| <population id="Mu" spec="RealParameter" value="0" /> | ||
|
|
||
| <reaction id="lambdaReaction" spec="Reaction" rate="0.2" >X -> 2X</reaction> | ||
| <reaction id="rhoReaction" spec="PunctualReaction" p="0.01" times="50">X -> Psi</reaction> | ||
| <reaction id="muReaction" spec="Reaction" rate="0.046">X -> Mu</reaction> | ||
|
|
||
| </trajectory> | ||
| </simulate> | ||
|
|
||
| <logger fileName="out/demo.tree" mode="tree"> | ||
| <log spec="TypedTreeLogger" tree="@tree" /> | ||
| </logger> | ||
|
|
||
| <logger fileName="out/demo.log"> | ||
| <log idref="trajectory" /> | ||
| </logger> | ||
| </run> | ||
| </beast> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment style is inconsistent with the rest of the codebase. Other comments in this file have a space after the hash character. This comment should follow the same convention.