Skip to content

Commit 04c943e

Browse files
committed
fix: add new well status term and normalize completion date handling
1 parent 38771fc commit 04c943e

4 files changed

Lines changed: 89 additions & 120 deletions

File tree

core/lexicon.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2260,7 +2260,12 @@
22602260
"categories": ["status_value"],
22612261
"term": "Open",
22622262
"definition": "The well is open."
2263-
},
2263+
},
2264+
{
2265+
"categories": ["status_value"],
2266+
"term": "Open (unequipped)",
2267+
"definition": "The well is open and unequipped."
2268+
},
22642269
{
22652270
"categories": ["status_value"],
22662271
"term": "Closed",

tests/test_transfer_legacy_dates.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import pytest
2929

3030
from db import Sample
31+
from transfers.well_transfer import _normalize_completion_date
3132
from transfers.util import make_location
3233
from transfers.waterlevels_transfer import WaterLevelTransferer
3334

@@ -207,6 +208,18 @@ def test_make_observation_maps_data_quality():
207208
assert observation.nma_data_quality == "Mapped Quality"
208209

209210

211+
def test_normalize_completion_date_drops_time_from_datetime():
212+
value = datetime.datetime(2024, 7, 3, 14, 15, 16)
213+
assert _normalize_completion_date(value) == datetime.date(2024, 7, 3)
214+
215+
216+
def test_normalize_completion_date_drops_time_from_timestamp_and_string():
217+
ts_value = pd.Timestamp("2021-05-06 23:59:00")
218+
str_value = "2021-05-06 23:59:00.000"
219+
assert _normalize_completion_date(ts_value) == datetime.date(2021, 5, 6)
220+
assert _normalize_completion_date(str_value) == datetime.date(2021, 5, 6)
221+
222+
210223
def test_get_dt_utc_respects_time_datum():
211224
transfer = WaterLevelTransferer.__new__(WaterLevelTransferer)
212225
transfer.errors = []

transfers/util.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,38 @@
5757
}
5858

5959

60+
DEFINED_RECORDING_INTERVALS = {
61+
"SA-0174": (1, "hour"),
62+
"SO-0140": (0.25, "hour"),
63+
"SO-0145": (0.25, "hour"),
64+
"SO-0146": (0.25, "hour"),
65+
"SO-0148": (0.25, "hour"),
66+
"SO-0160": (0.25, "hour"),
67+
"SO-0163": (0.25, "hour"),
68+
"SO-0165": (0.25, "hour"),
69+
"SO-0166": (0.25, "hour"),
70+
"SO-0175": (0.25, "hour"),
71+
"SO-0177": (0.25, "hour"),
72+
"SO-0189": (0.25, "hour"),
73+
"SO-0191": (0.25, "hour"),
74+
"SO-0194": (0.25, "hour"),
75+
"SO-0200": (0.25, "hour"),
76+
"SO-0204": (0.25, "hour"),
77+
"SO-0224": (0.25, "hour"),
78+
"SO-0238": (0.25, "hour"),
79+
"SO-0247": (0.25, "hour"),
80+
"SO-0249": (0.25, "hour"),
81+
"SO-0261": (0.25, "hour"),
82+
"SM-0055": (6.0, "hour"),
83+
"SM-0259": (12, "hour"),
84+
"HS-038": (12, "hour"),
85+
"EB-220": (12, "hour"),
86+
"SO-0144": (0.25, "hour"),
87+
"SO-0142": (0.25, "hour"),
88+
"SO-0190": (0.25, "hour"),
89+
}
90+
91+
6092
class MeasuringPointEstimator:
6193
def __init__(self):
6294
df = read_csv("WaterLevels")
@@ -123,6 +155,12 @@ def estimate_measuring_point_height(
123155
return mphs, mph_descs, start_dates, end_dates
124156

125157

158+
def _get_defined_recording_interval(pointid: str) -> tuple[int, str] | None:
159+
if pointid in DEFINED_RECORDING_INTERVALS:
160+
return DEFINED_RECORDING_INTERVALS[pointid]
161+
return None
162+
163+
126164
class SensorParameterEstimator:
127165
def __init__(self, sensor_type: str):
128166
if sensor_type == "Pressure Transducer":
@@ -156,7 +194,16 @@ def estimate_recording_interval(
156194
installation_date: datetime = None,
157195
removal_date: datetime = None,
158196
) -> tuple[int | None, str | None, str | None]:
197+
"""
198+
return estimated recording interval, unit, and error message if applicable
199+
"""
159200
point_id = record.PointID
201+
202+
# get statically defined recording interval provided by Ethan
203+
ri = _get_defined_recording_interval(point_id)
204+
if ri is not None:
205+
return ri[0], ri[1], None
206+
160207
cdf = self._get_values(point_id)
161208
if len(cdf) == 0:
162209
return None, None, f"No measurements found for PointID: {point_id}"

transfers/well_transfer.py

Lines changed: 23 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import time
1919
import traceback
2020
from concurrent.futures import ThreadPoolExecutor, as_completed
21-
from datetime import datetime, UTC
21+
from datetime import date, datetime, UTC
2222
from zoneinfo import ZoneInfo
2323

2424
import pandas as pd
@@ -95,6 +95,27 @@
9595
]
9696

9797

98+
def _normalize_completion_date(value):
99+
if value is None or pd.isna(value):
100+
return None
101+
102+
if isinstance(value, pd.Timestamp):
103+
return value.date()
104+
105+
if isinstance(value, datetime):
106+
return value.date()
107+
108+
if isinstance(value, date):
109+
return value
110+
111+
if isinstance(value, str):
112+
parsed = pd.to_datetime(value.strip(), errors="coerce")
113+
if not pd.isna(parsed):
114+
return parsed.date()
115+
116+
return value
117+
118+
98119
class WellTransferer(Transferer):
99120
source_table = "WellData"
100121

@@ -279,123 +300,6 @@ def _get_dfs(self):
279300
cleaned_df = cleaned_df[cleaned_df["PointID"].isin(self.pointids)]
280301
return input_df, cleaned_df
281302

282-
# def _step(self, session: Session, df: pd.DataFrame, i: int, row: pd.Series):
283-
#
284-
# try:
285-
# first_visit_date = get_first_visit_date(row)
286-
# well_purposes = (
287-
# [] if isna(row.CurrentUse) else self._extract_well_purposes(row)
288-
# )
289-
# well_casing_materials = (
290-
# [] if isna(row.CasingDescription) else extract_casing_materials(row)
291-
# )
292-
# well_pump_type = extract_well_pump_type(row)
293-
#
294-
# wcm = None
295-
# if notna(row.ConstructionMethod):
296-
# wcm = self._get_lexicon_value(
297-
# row, f"LU_ConstructionMethod:{row.ConstructionMethod}", "Unknown"
298-
# )
299-
#
300-
# mpheight = row.MPHeight
301-
# mpheight_description = row.MeasuringPoint
302-
# if mpheight is None:
303-
# mphs = self._measuring_point_estimator.estimate_measuring_point_height(
304-
# row
305-
# )
306-
# if mphs:
307-
# try:
308-
# mpheight = mphs[0][0]
309-
# mpheight_description = mphs[1][0]
310-
# except IndexError:
311-
# if self.verbose:
312-
# logger.warning(
313-
# f"Measuring point height estimation failed for well {row.PointID}, {mphs}"
314-
# )
315-
#
316-
# data = CreateWell(
317-
# location_id=0,
318-
# name=row.PointID,
319-
# first_visit_date=first_visit_date,
320-
# hole_depth=row.HoleDepth,
321-
# well_depth=row.WellDepth,
322-
# well_casing_diameter=(
323-
# row.CasingDiameter * 12 if row.CasingDiameter else None
324-
# ),
325-
# well_casing_depth=row.CasingDepth,
326-
# release_status="public" if row.PublicRelease else "private",
327-
# measuring_point_height=mpheight,
328-
# measuring_point_description=mpheight_description,
329-
# notes=(
330-
# [{"content": row.Notes, "note_type": "General"}]
331-
# if row.Notes
332-
# else []
333-
# ),
334-
# well_completion_date=row.CompletionDate,
335-
# well_driller_name=row.DrillerName,
336-
# well_construction_method=wcm,
337-
# well_pump_type=well_pump_type,
338-
# )
339-
#
340-
# CreateWell.model_validate(data)
341-
# except ValidationError as e:
342-
# self._capture_validation_error(row.PointID, e)
343-
# return
344-
#
345-
# well = None
346-
# try:
347-
# well_data = data.model_dump(exclude=EXCLUDED_FIELDS)
348-
# well_data["thing_type"] = "water well"
349-
# well_data["nma_pk_welldata"] = row.WellID
350-
# well_data["nma_pk_location"] = row.LocationId
351-
#
352-
# well = Thing(**well_data)
353-
# session.add(well)
354-
#
355-
# if well_purposes:
356-
# for wp in well_purposes:
357-
# # TODO: add validation logic here
358-
# if wp in WellPurposeEnum:
359-
# wp_obj = WellPurpose(thing=well, purpose=wp)
360-
# session.add(wp_obj)
361-
# else:
362-
# logger.critical(f"{well.name}. Invalid well purpose: {wp}")
363-
#
364-
# if well_casing_materials:
365-
# for wcm in well_casing_materials:
366-
# # TODO: add validation logic here
367-
# if wcm in WellCasingMaterialEnum:
368-
# wcm_obj = WellCasingMaterial(thing=well, material=wcm)
369-
# session.add(wcm_obj)
370-
# else:
371-
# logger.critical(
372-
# f"{well.name}. Invalid well casing material: {wcm}"
373-
# )
374-
# except Exception as e:
375-
# if well is not None:
376-
# session.expunge(well)
377-
#
378-
# self._capture_error(row.PointID, str(e), "UnknownField")
379-
#
380-
# logger.critical(f"Error creating well for {row.PointID}: {e}")
381-
# return
382-
#
383-
# try:
384-
# location, elevation_method, notes = make_location(
385-
# row, self._cached_elevations
386-
# )
387-
# session.add(location)
388-
# # session.flush()
389-
# self._added_locations[row.PointID] = (elevation_method, notes)
390-
# except Exception as e:
391-
# import traceback
392-
#
393-
# traceback.print_exc()
394-
# self._capture_error(row.PointID, str(e), str(e), "Location")
395-
# logger.critical(f"Error making location for {row.PointID}: {e}")
396-
#
397-
# return
398-
#
399303
def _extract_well_purposes(self, row) -> list[str]:
400304
cu = row.CurrentUse
401305

@@ -659,7 +563,7 @@ def _build_well_payload(self, row) -> CreateWell | None:
659563
if row.Notes
660564
else []
661565
),
662-
well_completion_date=row.CompletionDate,
566+
well_completion_date=_normalize_completion_date(row.CompletionDate),
663567
well_driller_name=row.DrillerName,
664568
well_construction_method=wcm,
665569
well_pump_type=well_pump_type,

0 commit comments

Comments
 (0)