From caa1ad47a5976ac23354768791ee0124bcda97f6 Mon Sep 17 00:00:00 2001 From: jakeross Date: Sat, 14 Feb 2026 20:30:34 -0700 Subject: [PATCH 1/2] chore: enhance data quality mapping and handling in water levels transfer --- transfers/waterlevels_transfer.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index dedd72a9..31de0387 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -41,6 +41,7 @@ filter_by_valid_measuring_agency, lexicon_mapper, get_transfers_data_path, + replace_nans, ) # constants @@ -94,6 +95,7 @@ def __init__(self, *args, **kw): def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: input_df = read_csv(self.source_table, dtype={"MeasuredBy": str}) + input_df = replace_nans(input_df) cleaned_df = filter_to_valid_point_ids(input_df) cleaned_df = filter_by_valid_measuring_agency(cleaned_df) logger.info( @@ -314,7 +316,29 @@ def _make_observation( if dq_raw and pd.notna(dq_raw): dq_code = str(dq_raw).strip() try: - data_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") + mapped_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") + if pd.isna(mapped_quality): + logger.warning( + "%sMapped DataQuality '%s' to NaN for WaterLevels record %s; " + "storing NULL to satisfy FK", + SPACE_6, + dq_code, + row.GlobalID, + ) + data_quality = None + else: + mapped_quality_text = str(mapped_quality).strip() + if mapped_quality_text and mapped_quality_text.lower() != "nan": + data_quality = mapped_quality_text + else: + logger.warning( + "%sMapped DataQuality '%s' to empty value for WaterLevels " + "record %s; storing NULL to satisfy FK", + SPACE_6, + dq_code, + row.GlobalID, + ) + data_quality = None except KeyError: logger.warning( f"{SPACE_6}Unknown DataQuality code '{dq_code}' for WaterLevels record {row.GlobalID}" From 60ab1675882eff4c73e119f7a54c729b4368b149 Mon Sep 17 00:00:00 2001 From: jakeross Date: Sat, 14 Feb 2026 20:33:34 -0700 Subject: [PATCH 2/2] chore: enhance error capturing for DataQuality mapping in water levels transfer --- transfers/waterlevels_transfer.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py index 31de0387..43b66020 100644 --- a/transfers/waterlevels_transfer.py +++ b/transfers/waterlevels_transfer.py @@ -325,6 +325,11 @@ def _make_observation( dq_code, row.GlobalID, ) + self._capture_error( + row.PointID, + f"Mapped DataQuality '{dq_code}' to NaN; stored NULL", + "DataQuality", + ) data_quality = None else: mapped_quality_text = str(mapped_quality).strip() @@ -338,11 +343,21 @@ def _make_observation( dq_code, row.GlobalID, ) + self._capture_error( + row.PointID, + f"Mapped DataQuality '{dq_code}' to empty value; stored NULL", + "DataQuality", + ) data_quality = None except KeyError: logger.warning( f"{SPACE_6}Unknown DataQuality code '{dq_code}' for WaterLevels record {row.GlobalID}" ) + self._capture_error( + row.PointID, + f"Unknown DataQuality code '{dq_code}'", + "DataQuality", + ) # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) observation = Observation(