Skip to content

CSV ADRIOs: error when using state_abbrev and the data contains invalid/unsupported states #280

@JavadocMD

Description

@JavadocMD

CSV ADRIOs raise an error when faced with columns that contain unexpected geographic data.

from pathlib import Path
from datetime import date
import numpy as np
import pandas as pd

from epymorph.adrio import csv
from epymorph.geography.us_census import StateScope


rng = np.random.default_rng(42)

data_df = (
    pd.DataFrame(
        {
            "Date": date(2015, 1, 1),
            "Node": ["AZ", "FL", "GA", "XX", "YY", "ZZ"],  # <-- UNSUPPORTED STATE CODES!
            "Value": rng.integers(0, 100_000, size=6),
        }
    ).sample(frac=1, random_state=rng)  # scramble order
)

# write to a file so we can load it back with CSV ADRIO
data_df.to_csv(
    tmp_file := Path("scratch/population.csv"),
    header=False,
    index=False,
)

(
    csv.CSVFileN(
        file_path=tmp_file,
        dtype=np.int64,
        key_col=1,
        key_type="state_abbrev",
        data_col=2,
    )
    .with_context(scope=StateScope.in_states(["FL", "GA"], year=2015))
    .evaluate()  # <-- RAISES ERROR
)

Error:

---------------------------------------------------------------------------
ADRIOProcessingError                      Traceback (most recent call last)
Cell In[6], line 36
     11 data_df = (
     12     pd.DataFrame(
     13         {
   (...)
     18     ).sample(frac=1, random_state=rng)  # scramble order
     19 )
     21 data_df.to_csv(
     22     tmp_file := Path("scratch/population.csv"),
     23     header=False,
     24     index=False,
     25 )
     27 (
     28     csv.CSVFileN(
     29         file_path=tmp_file,
     30         dtype=np.int64,
     31         key_col=1,
     32         key_type="state_abbrev",
     33         data_col=2,
     34     )
     35     .with_context(scope=StateScope.in_states(["FL", "GA"], year=2015))
---> 36     .evaluate()
     37 )

File ~/Workspaces/epymorph/epymorph/simulation.py:592, in SimulationFunctionClass.__new__.<locals>.evaluate(self, *args, **kwargs)
    590 @functools.wraps(orig_evaluate)
    591 def evaluate(self, *args, **kwargs):
--> 592     result = orig_evaluate(self, *args, **kwargs)
    593     self.validate(result)
    594     return result

File ~/Workspaces/epymorph/epymorph/adrio/adrio.py:523, in ADRIO.evaluate(self)
    514 def evaluate(self) -> NDArray[ResultT]:
    515     """
    516     Evaluate the ADRIO in the current context.
    517 
   (...)
    521         The result value.
    522     """
--> 523     return self.inspect().result

File ~/Workspaces/epymorph/epymorph/adrio/csv.py:180, in CSVFileN.inspect(self)
    170     kwarg_options["skiprows"] = self.skiprows
    171 csv_df = read_csv(
    172     self.file_path,
    173     header=None,
   (...)
    177     **kwarg_options,
    178 )
--> 180 work_df = self.parse_geo_key(csv_df, ["key"])
    181 work_df = work_df.sort_values(by="key")
    182 # Filter to requested geo

File ~/Workspaces/epymorph/epymorph/adrio/csv.py:70, in _CSVMixin.parse_geo_key(self, csv_df, key_cols)
     68 result_df = csv_df.copy()
     69 for j in key_cols:
---> 70     result_df[j] = map_keys(csv_df[j])
     71 return result_df

File ~/Workspaces/epymorph/epymorph/adrio/csv.py:83, in _CSVMixin.parse_state_abbrev(self, keys)
     81 if new_keys.isna().any():
     82     err = "Invalid state code in key column."
---> 83     raise ADRIOProcessingError(self, self.context, err)
     84 return new_keys

ADRIOProcessingError: Error processing epymorph.adrio.csv.CSVFileN: Invalid state code in key column.

(Good idea while we're at it to test more broadly to see if there are other issues around imperfect data.)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions