CSV ADRIOs raise an error when faced with columns that contain unexpected geographic data.
from pathlib import Path
from datetime import date
import numpy as np
import pandas as pd
from epymorph.adrio import csv
from epymorph.geography.us_census import StateScope
rng = np.random.default_rng(42)
data_df = (
pd.DataFrame(
{
"Date": date(2015, 1, 1),
"Node": ["AZ", "FL", "GA", "XX", "YY", "ZZ"], # <-- UNSUPPORTED STATE CODES!
"Value": rng.integers(0, 100_000, size=6),
}
).sample(frac=1, random_state=rng) # scramble order
)
# write to a file so we can load it back with CSV ADRIO
data_df.to_csv(
tmp_file := Path("scratch/population.csv"),
header=False,
index=False,
)
(
csv.CSVFileN(
file_path=tmp_file,
dtype=np.int64,
key_col=1,
key_type="state_abbrev",
data_col=2,
)
.with_context(scope=StateScope.in_states(["FL", "GA"], year=2015))
.evaluate() # <-- RAISES ERROR
)
Error:
---------------------------------------------------------------------------
ADRIOProcessingError Traceback (most recent call last)
Cell In[6], line 36
11 data_df = (
12 pd.DataFrame(
13 {
(...)
18 ).sample(frac=1, random_state=rng) # scramble order
19 )
21 data_df.to_csv(
22 tmp_file := Path("scratch/population.csv"),
23 header=False,
24 index=False,
25 )
27 (
28 csv.CSVFileN(
29 file_path=tmp_file,
30 dtype=np.int64,
31 key_col=1,
32 key_type="state_abbrev",
33 data_col=2,
34 )
35 .with_context(scope=StateScope.in_states(["FL", "GA"], year=2015))
---> 36 .evaluate()
37 )
File ~/Workspaces/epymorph/epymorph/simulation.py:592, in SimulationFunctionClass.__new__.<locals>.evaluate(self, *args, **kwargs)
590 @functools.wraps(orig_evaluate)
591 def evaluate(self, *args, **kwargs):
--> 592 result = orig_evaluate(self, *args, **kwargs)
593 self.validate(result)
594 return result
File ~/Workspaces/epymorph/epymorph/adrio/adrio.py:523, in ADRIO.evaluate(self)
514 def evaluate(self) -> NDArray[ResultT]:
515 """
516 Evaluate the ADRIO in the current context.
517
(...)
521 The result value.
522 """
--> 523 return self.inspect().result
File ~/Workspaces/epymorph/epymorph/adrio/csv.py:180, in CSVFileN.inspect(self)
170 kwarg_options["skiprows"] = self.skiprows
171 csv_df = read_csv(
172 self.file_path,
173 header=None,
(...)
177 **kwarg_options,
178 )
--> 180 work_df = self.parse_geo_key(csv_df, ["key"])
181 work_df = work_df.sort_values(by="key")
182 # Filter to requested geo
File ~/Workspaces/epymorph/epymorph/adrio/csv.py:70, in _CSVMixin.parse_geo_key(self, csv_df, key_cols)
68 result_df = csv_df.copy()
69 for j in key_cols:
---> 70 result_df[j] = map_keys(csv_df[j])
71 return result_df
File ~/Workspaces/epymorph/epymorph/adrio/csv.py:83, in _CSVMixin.parse_state_abbrev(self, keys)
81 if new_keys.isna().any():
82 err = "Invalid state code in key column."
---> 83 raise ADRIOProcessingError(self, self.context, err)
84 return new_keys
ADRIOProcessingError: Error processing epymorph.adrio.csv.CSVFileN: Invalid state code in key column.
(Good idea while we're at it to test more broadly to see if there are other issues around imperfect data.)
CSV ADRIOs raise an error when faced with columns that contain unexpected geographic data.
Error:
(Good idea while we're at it to test more broadly to see if there are other issues around imperfect data.)