From b2df9ab6fcd49db9dd2e08d99a6358ca5d1c89fb Mon Sep 17 00:00:00 2001 From: Kelsey Smuczynski Date: Tue, 17 Mar 2026 11:14:06 -0600 Subject: [PATCH 1/2] fix(well-inventory): normalize blank contact values and add missing organization terms - Treat blank contact organization and well status values as missing instead of persisting empty strings - Prevent foreign key failures caused by empty organization and status lexicon references during import - Add newly encountered organization terms to the lexicon so valid contact records can persist successfully --- core/lexicon.json | 105 +++++++++++++++++++++++++++++++++++ schemas/well_inventory.py | 17 +++--- tests/test_well_inventory.py | 18 ++++++ 3 files changed, 132 insertions(+), 8 deletions(-) diff --git a/core/lexicon.json b/core/lexicon.json index 2b786190..82942c48 100644 --- a/core/lexicon.json +++ b/core/lexicon.json @@ -4452,6 +4452,111 @@ "term": "Zamora Accounting Services", "definition": "Zamora Accounting Services" }, + { + "categories": [ + "organization" + ], + "term": "Agua Sana MWCD", + "definition": "Agua Sana MWCD" + }, + { + "categories": [ + "organization" + ], + "term": "Canada Los Alamos MDWCA", + "definition": "Canada Los Alamos MDWCA" + }, + { + "categories": [ + "organization" + ], + "term": "Canjilon Mutual Domestic Water System", + "definition": "Canjilon Mutual Domestic Water System" + }, + { + "categories": [ + "organization" + ], + "term": "Cebolla Mutual Domestic", + "definition": "Cebolla Mutual Domestic" + }, + { + "categories": [ + "organization" + ], + "term": "Chihuahuan Desert Rangeland Research Center (CDRRC)", + "definition": "Chihuahuan Desert Rangeland Research Center (CDRRC)" + }, + { + "categories": [ + "organization" + ], + "term": "East Rio Arriba SWCD", + "definition": "East Rio Arriba SWCD" + }, + { + "categories": [ + "organization" + ], + "term": "El Prado Municipal Water", + "definition": "El Prado Municipal Water" + }, + { + "categories": [ + "organization" + ], + "term": "Hachita Mutual Domestic", + "definition": "Hachita Mutual Domestic" + }, + { + "categories": [ + "organization" + ], + "term": "Jornada Experimental Range (JER)", + "definition": "Jornada Experimental Range (JER)" + }, + { + "categories": [ + "organization" + ], + "term": "La Canada Way HOA", + "definition": "La Canada Way HOA" + }, + { + "categories": [ + "organization" + ], + "term": "Los Ojos Mutual Domestic", + "definition": "Los Ojos Mutual Domestic" + }, + { + "categories": [ + "organization" + ], + "term": "The Nature Conservancy (TNC)", + "definition": "The Nature Conservancy (TNC)" + }, + { + "categories": [ + "organization" + ], + "term": "Smith Ranch LLC", + "definition": "Smith Ranch LLC" + }, + { + "categories": [ + "organization" + ], + "term": "Zia Pueblo", + "definition": "Zia Pueblo" + }, + { + "categories": [ + "organization" + ], + "term": "Our Lady of Guadalupe (OLG)", + "definition": "Our Lady of Guadalupe (OLG)" + }, { "categories": [ "organization" diff --git a/schemas/well_inventory.py b/schemas/well_inventory.py index 75d3edc3..49c1fbb7 100644 --- a/schemas/well_inventory.py +++ b/schemas/well_inventory.py @@ -190,6 +190,7 @@ def validator(v): EmailField: TypeAlias = Annotated[ Optional[str], BeforeValidator(email_validator_function) ] +OptionalText: TypeAlias = Annotated[Optional[str], BeforeValidator(empty_str_to_none)] OptionalBool: TypeAlias = Annotated[Optional[bool], BeforeValidator(empty_str_to_none)] OptionalPastOrTodayDateTime: TypeAlias = Annotated[ @@ -215,18 +216,18 @@ class WellInventoryRow(BaseModel): utm_zone: str # Optional fields - site_name: Optional[str] = None + site_name: OptionalText = None elevation_ft: OptionalFloat = None elevation_method: Annotated[ Optional[ElevationMethod], BeforeValidator(flexible_lexicon_validator(ElevationMethod)), ] = None measuring_point_height_ft: OptionalFloat = None - field_staff_2: Optional[str] = None - field_staff_3: Optional[str] = None + field_staff_2: OptionalText = None + field_staff_3: OptionalText = None - contact_1_name: Optional[str] = None - contact_1_organization: Optional[str] = None + contact_1_name: OptionalText = None + contact_1_organization: OptionalText = None contact_1_role: ContactRoleField = None contact_1_type: ContactTypeField = None contact_1_phone_1: PhoneField = None @@ -250,8 +251,8 @@ class WellInventoryRow(BaseModel): contact_1_address_2_city: Optional[str] = None contact_1_address_2_postal_code: PostalCodeField = None - contact_2_name: Optional[str] = None - contact_2_organization: Optional[str] = None + contact_2_name: OptionalText = None + contact_2_organization: OptionalText = None contact_2_role: ContactRoleField = None contact_2_type: ContactTypeField = None contact_2_phone_1: PhoneField = None @@ -296,7 +297,7 @@ class WellInventoryRow(BaseModel): measuring_point_description: Optional[str] = None well_purpose: WellPurposeField = None well_purpose_2: WellPurposeField = None - well_status: Optional[str] = Field( + well_status: OptionalText = Field( default=None, validation_alias=AliasChoices("well_status", "well_hole_status"), ) diff --git a/tests/test_well_inventory.py b/tests/test_well_inventory.py index dd7ccdcc..d9d814d9 100644 --- a/tests/test_well_inventory.py +++ b/tests/test_well_inventory.py @@ -1095,6 +1095,24 @@ def test_blank_depth_to_water_is_treated_as_none(self): ) assert model.depth_to_water_ft is None + def test_blank_contact_organization_is_treated_as_none(self): + row = _minimal_valid_well_inventory_row() + row["contact_1_name"] = "Test Contact" + row["contact_1_organization"] = "" + + model = WellInventoryRow(**row) + + assert model.contact_1_name == "Test Contact" + assert model.contact_1_organization is None + + def test_blank_well_status_is_treated_as_none(self): + row = _minimal_valid_well_inventory_row() + row["well_hole_status"] = "" + + model = WellInventoryRow(**row) + + assert model.well_status is None + def test_canonical_name_wins_when_alias_and_canonical_present(self): row = _minimal_valid_well_inventory_row() row["well_status"] = "Abandoned" From 27e06954875c47bf660a7ca75e53f6d0beae2930 Mon Sep 17 00:00:00 2001 From: Kelsey Smuczynski Date: Tue, 17 Mar 2026 12:05:56 -0600 Subject: [PATCH 2/2] =?UTF-8?q?=E2=80=A2=20fix(well-inventory):=20make=20C?= =?UTF-8?q?SV=20import=20reruns=20idempotent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Detect previously imported well inventory rows before inserting related records - Skip recreating field activity water-level samples and observations when the same row is reprocessed - Return serializable existing-row results so CLI reruns report cleanly instead of crashing --- services/well_inventory_csv.py | 42 ++++++++++++++++++++++++++++++++++ tests/test_well_inventory.py | 32 ++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/services/well_inventory_csv.py b/services/well_inventory_csv.py index e5ab09ea..9c62a620 100644 --- a/services/well_inventory_csv.py +++ b/services/well_inventory_csv.py @@ -438,6 +438,44 @@ def _generate_autogen_well_id(session, prefix: str, offset: int = 0) -> tuple[st return f"{prefix}{new_number:04d}", new_number +def _find_existing_imported_well( + session: Session, model: WellInventoryRow +) -> Thing | None: + if model.measurement_date_time is not None: + sample_name = ( + f"{model.well_name_point_id}-WL-" + f"{model.measurement_date_time.strftime('%Y%m%d%H%M')}" + ) + existing = session.scalars( + select(Thing) + .join(FieldEvent, FieldEvent.thing_id == Thing.id) + .join(FieldActivity, FieldActivity.field_event_id == FieldEvent.id) + .join(Sample, Sample.field_activity_id == FieldActivity.id) + .where( + Thing.name == model.well_name_point_id, + Thing.thing_type == "water well", + FieldActivity.activity_type == "well inventory", + Sample.sample_name == sample_name, + ) + .order_by(Thing.id.asc()) + ).first() + if existing is not None: + return existing + + return session.scalars( + select(Thing) + .join(FieldEvent, FieldEvent.thing_id == Thing.id) + .join(FieldActivity, FieldActivity.field_event_id == FieldEvent.id) + .where( + Thing.name == model.well_name_point_id, + Thing.thing_type == "water well", + FieldEvent.event_date == model.date_time, + FieldActivity.activity_type == "well inventory", + ) + .order_by(Thing.id.asc()) + ).first() + + def _make_row_models(rows, session): models = [] validation_errors = [] @@ -542,6 +580,10 @@ def _add_csv_row(session: Session, group: Group, model: WellInventoryRow, user) name = model.well_name_point_id date_time = model.date_time + existing_well = _find_existing_imported_well(session, model) + if existing_well is not None: + return existing_well.name + # -------------------- # Location and associated tables # -------------------- diff --git a/tests/test_well_inventory.py b/tests/test_well_inventory.py index d9d814d9..b9dab138 100644 --- a/tests/test_well_inventory.py +++ b/tests/test_well_inventory.py @@ -494,6 +494,38 @@ def test_blank_depth_to_water_still_creates_water_level_records(tmp_path): assert observations[0].measuring_point_height == 2.5 +def test_rerunning_same_well_inventory_csv_is_idempotent(): + """Re-importing the same CSV should not create duplicate well inventory records.""" + file = Path("tests/features/data/well-inventory-valid.csv") + assert file.exists(), "Test data file does not exist." + + first = well_inventory_csv(file) + assert first.exit_code == 0, first.stderr + + with session_ctx() as session: + counts_after_first = { + "things": session.query(Thing).count(), + "field_events": session.query(FieldEvent).count(), + "field_activities": session.query(FieldActivity).count(), + "samples": session.query(Sample).count(), + "observations": session.query(Observation).count(), + } + + second = well_inventory_csv(file) + assert second.exit_code == 0, second.stderr + + with session_ctx() as session: + counts_after_second = { + "things": session.query(Thing).count(), + "field_events": session.query(FieldEvent).count(), + "field_activities": session.query(FieldActivity).count(), + "samples": session.query(Sample).count(), + "observations": session.query(Observation).count(), + } + + assert counts_after_second == counts_after_first + + # ============================================================================= # Error Handling Tests - Cover API error paths # =============================================================================