|
41 | 41 | filter_by_valid_measuring_agency, |
42 | 42 | lexicon_mapper, |
43 | 43 | get_transfers_data_path, |
| 44 | + replace_nans, |
44 | 45 | ) |
45 | 46 |
|
46 | 47 | # constants |
@@ -94,6 +95,7 @@ def __init__(self, *args, **kw): |
94 | 95 |
|
95 | 96 | def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: |
96 | 97 | input_df = read_csv(self.source_table, dtype={"MeasuredBy": str}) |
| 98 | + input_df = replace_nans(input_df) |
97 | 99 | cleaned_df = filter_to_valid_point_ids(input_df) |
98 | 100 | cleaned_df = filter_by_valid_measuring_agency(cleaned_df) |
99 | 101 | logger.info( |
@@ -314,7 +316,29 @@ def _make_observation( |
314 | 316 | if dq_raw and pd.notna(dq_raw): |
315 | 317 | dq_code = str(dq_raw).strip() |
316 | 318 | try: |
317 | | - data_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") |
| 319 | + mapped_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") |
| 320 | + if pd.isna(mapped_quality): |
| 321 | + logger.warning( |
| 322 | + "%sMapped DataQuality '%s' to NaN for WaterLevels record %s; " |
| 323 | + "storing NULL to satisfy FK", |
| 324 | + SPACE_6, |
| 325 | + dq_code, |
| 326 | + row.GlobalID, |
| 327 | + ) |
| 328 | + data_quality = None |
| 329 | + else: |
| 330 | + mapped_quality_text = str(mapped_quality).strip() |
| 331 | + if mapped_quality_text and mapped_quality_text.lower() != "nan": |
| 332 | + data_quality = mapped_quality_text |
| 333 | + else: |
| 334 | + logger.warning( |
| 335 | + "%sMapped DataQuality '%s' to empty value for WaterLevels " |
| 336 | + "record %s; storing NULL to satisfy FK", |
| 337 | + SPACE_6, |
| 338 | + dq_code, |
| 339 | + row.GlobalID, |
| 340 | + ) |
| 341 | + data_quality = None |
318 | 342 | except KeyError: |
319 | 343 | logger.warning( |
320 | 344 | f"{SPACE_6}Unknown DataQuality code '{dq_code}' for WaterLevels record {row.GlobalID}" |
|
0 commit comments