|
41 | 41 | filter_by_valid_measuring_agency, |
42 | 42 | lexicon_mapper, |
43 | 43 | get_transfers_data_path, |
| 44 | + replace_nans, |
44 | 45 | ) |
45 | 46 |
|
46 | 47 | # constants |
@@ -94,6 +95,7 @@ def __init__(self, *args, **kw): |
94 | 95 |
|
95 | 96 | def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]: |
96 | 97 | input_df = read_csv(self.source_table, dtype={"MeasuredBy": str}) |
| 98 | + input_df = replace_nans(input_df) |
97 | 99 | cleaned_df = filter_to_valid_point_ids(input_df) |
98 | 100 | cleaned_df = filter_by_valid_measuring_agency(cleaned_df) |
99 | 101 | logger.info( |
@@ -314,11 +316,48 @@ def _make_observation( |
314 | 316 | if dq_raw and pd.notna(dq_raw): |
315 | 317 | dq_code = str(dq_raw).strip() |
316 | 318 | try: |
317 | | - data_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") |
| 319 | + mapped_quality = lexicon_mapper.map_value(f"LU_DataQuality:{dq_code}") |
| 320 | + if pd.isna(mapped_quality): |
| 321 | + logger.warning( |
| 322 | + "%sMapped DataQuality '%s' to NaN for WaterLevels record %s; " |
| 323 | + "storing NULL to satisfy FK", |
| 324 | + SPACE_6, |
| 325 | + dq_code, |
| 326 | + row.GlobalID, |
| 327 | + ) |
| 328 | + self._capture_error( |
| 329 | + row.PointID, |
| 330 | + f"Mapped DataQuality '{dq_code}' to NaN; stored NULL", |
| 331 | + "DataQuality", |
| 332 | + ) |
| 333 | + data_quality = None |
| 334 | + else: |
| 335 | + mapped_quality_text = str(mapped_quality).strip() |
| 336 | + if mapped_quality_text and mapped_quality_text.lower() != "nan": |
| 337 | + data_quality = mapped_quality_text |
| 338 | + else: |
| 339 | + logger.warning( |
| 340 | + "%sMapped DataQuality '%s' to empty value for WaterLevels " |
| 341 | + "record %s; storing NULL to satisfy FK", |
| 342 | + SPACE_6, |
| 343 | + dq_code, |
| 344 | + row.GlobalID, |
| 345 | + ) |
| 346 | + self._capture_error( |
| 347 | + row.PointID, |
| 348 | + f"Mapped DataQuality '{dq_code}' to empty value; stored NULL", |
| 349 | + "DataQuality", |
| 350 | + ) |
| 351 | + data_quality = None |
318 | 352 | except KeyError: |
319 | 353 | logger.warning( |
320 | 354 | f"{SPACE_6}Unknown DataQuality code '{dq_code}' for WaterLevels record {row.GlobalID}" |
321 | 355 | ) |
| 356 | + self._capture_error( |
| 357 | + row.PointID, |
| 358 | + f"Unknown DataQuality code '{dq_code}'", |
| 359 | + "DataQuality", |
| 360 | + ) |
322 | 361 |
|
323 | 362 | # TODO: after sensors have been added to the database update sensor_id (or sensor) for waterlevels that come from db sensors (like e probes?) |
324 | 363 | observation = Observation( |
|
0 commit comments