Merge pull request #582 from DataIntegrationGroup/jir-well-inventory-cleanup

jirhiker · web-flow · commit bc051f38d61b · 2026-03-03T17:17:30.000-07:00
feat(tests): add validation error handling for various invalid CSV field values
diff --git a/services/well_inventory_csv.py b/services/well_inventory_csv.py
@@ -195,11 +195,12 @@ def _import_well_inventory_csv(session: Session, text: str, user: str):
                         added = _add_csv_row(session, group, model, user)
                         wells.append(added)
             except ValueError as e:
+                error_text = str(e)
                 validation_errors.append(
                     {
                         "row": current_row_id or "unknown",
-                        "field": "Invalid value",
-                        "error": str(e),
+                        "field": _extract_field_from_value_error(error_text),
+                        "error": error_text,
                     }
                 )
                 session.rollback()
@@ -238,6 +239,16 @@ def _import_well_inventory_csv(session: Session, text: str, user: str):
     }
 
 
+def _extract_field_from_value_error(error_text: str) -> str:
+    """Best-effort extraction of field name from wrapped validation errors."""
+    lines = [line.strip() for line in error_text.splitlines() if line.strip()]
+    if len(lines) >= 3 and re.match(r"^\d+ validation error", lines[0]):
+        field_name = lines[1]
+        if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", field_name):
+            return field_name
+    return "Invalid value"
+
+
 def _make_location(model) -> Location:
     point = Point(model.utm_easting, model.utm_northing)
 
diff --git a/tests/features/steps/well-inventory-csv-given.py b/tests/features/steps/well-inventory-csv-given.py
@@ -50,7 +50,7 @@ def step_step_step(context: Context):
 
 
 @given(
-    "my CSV file contains a row  that has an invalid postal code format in contact_1_address_1_postal_code"
+    "my CSV file contains a row that has an invalid postal code format in contact_1_address_1_postal_code"
 )
 def step_step_step_2(context: Context):
     _set_file_content(context, "well-inventory-invalid-postal-code.csv")
@@ -362,4 +362,76 @@ def step_step_step_21(context):
     _set_file_content(context, "well-inventory-missing-wl-fields.csv")
 
 
+@given(
+    "my CSV file contains a row with an address_type value that is not one of: Work, Personal, Mailing, Physical"
+)
+def step_given_row_contains_invalid_address_type_value(context: Context):
+    df = _get_valid_df(context)
+    df.loc[0, "contact_1_address_1_type"] = "InvalidAddressType"
+    _set_content_from_df(context, df)
+
+
+@given(
+    "my CSV file contains a row with a state value that is not a valid 2-letter US state abbreviation"
+)
+def step_given_row_contains_invalid_state_value(context: Context):
+    df = _get_valid_df(context)
+    df.loc[0, "contact_1_address_1_state"] = "New Mexico"
+    _set_content_from_df(context, df)
+
+
+@given(
+    'my CSV file contains a row with a well_hole_status value that is not one of: "Abandoned", "Active, pumping well", "Destroyed, exists but not usable", "Inactive, exists but not used"'
+)
+def step_given_row_contains_invalid_well_hole_status_value(context: Context):
+    df = _get_valid_df(context)
+    if "well_status" in df.columns:
+        df.loc[0, "well_status"] = "NotARealWellHoleStatus"
+    _set_content_from_df(context, df)
+
+
+@given(
+    'my CSV file contains a row with a monitoring_status value that is not one of: "Open", "Open (unequipped)", "Closed", "Datalogger can be installed", "Datalogger cannot be installed", "Abandoned", "Active, pumping well", "Destroyed, exists but not usable", "Inactive, exists but not used", "Currently monitored", "Not currently monitored"'
+)
+def step_given_row_contains_invalid_monitoring_status_value(context: Context):
+    df = _get_valid_df(context)
+    if "monitoring_frequency" in df.columns:
+        df.loc[0, "monitoring_frequency"] = "NotARealMonitoringStatus"
+    _set_content_from_df(context, df)
+
+
+@given(
+    'my CSV file contains a row with a well_pump_type value that is not one of: "Submersible", "Jet", "Line Shaft", "Hand"'
+)
+def step_given_row_contains_invalid_well_pump_type_value(context: Context):
+    df = _get_valid_df(context)
+    df.loc[0, "well_pump_type"] = "NotARealPumpType"
+    _set_content_from_df(context, df)
+
+
+@given(
+    'my CSV file contains a row with contact fields filled but both "contact_1_name" and "contact_1_organization" are blank'
+)
+def step_given_row_contains_contact_fields_but_name_and_org_are_blank(context: Context):
+    df = _get_valid_df(context)
+    df.loc[0, "contact_1_name"] = ""
+    df.loc[0, "contact_1_organization"] = ""
+    # Keep other contact data present so composite contact validation is exercised.
+    df.loc[0, "contact_1_role"] = "Owner"
+    df.loc[0, "contact_1_type"] = "Primary"
+    _set_content_from_df(context, df)
+
+
+@given(
+    'my CSV file contains a row where "depth_to_water_ft" is filled but "water_level_date_time" is blank'
+)
+@given(
+    'my csv file contains a row where "depth_to_water_ft" is filled but "water_level_date_time" is blank'
+)
+def step_given_depth_to_water_is_filled_but_water_level_date_time_is_blank(
+    context: Context,
+):
+    _set_file_content(context, "well-inventory-missing-wl-fields.csv")
+
+
 # ============= EOF =============================================
diff --git a/tests/features/steps/well-inventory-csv-validation-error.py b/tests/features/steps/well-inventory-csv-validation-error.py
@@ -31,6 +31,26 @@ def _handle_validation_error(context, expected_errors):
             assert v["value"] == e["value"], f"Expected {e['value']} for {v['value']}"
 
 
+def _assert_any_validation_error_contains(
+    context: Context, field_fragment: str | None, error_fragment: str
+):
+    response_json = context.response.json()
+    validation_errors = response_json.get("validation_errors", [])
+    assert validation_errors, "Expected at least one validation error"
+    found = False
+    for error in validation_errors:
+        field = str(error.get("field", ""))
+        message = str(error.get("error", ""))
+        if field_fragment and field_fragment not in field:
+            continue
+        if error_fragment in message:
+            found = True
+            break
+    assert (
+        found
+    ), f"Expected validation error containing field '{field_fragment}' and message '{error_fragment}'"
+
+
 @then(
     'the response includes a validation error indicating the missing "address_type" value'
 )
@@ -214,4 +234,73 @@ def step_step_step_10(context):
     _handle_validation_error(context, expected_errors)
 
 
+@then(
+    'the response includes a validation error indicating an invalid "address_type" value'
+)
+def step_then_response_includes_invalid_address_type_error(context: Context):
+    _assert_any_validation_error_contains(context, "address", "Input should be")
+
+
+@then("the response includes a validation error indicating an invalid state value")
+def step_then_response_includes_invalid_state_error(context: Context):
+    _assert_any_validation_error_contains(
+        context, "state", "Value error, State must be a 2 letter abbreviation"
+    )
+
+
+@then(
+    'the response includes a validation error indicating an invalid "well_hole_status" value'
+)
+def step_then_response_includes_invalid_well_hole_status_error(context: Context):
+    _assert_any_validation_error_contains(
+        context, "Database error", "database error occurred"
+    )
+
+
+@then(
+    'the response includes a validation error indicating an invalid "monitoring_status" value'
+)
+def step_then_response_includes_invalid_monitoring_status_error(context: Context):
+    _assert_any_validation_error_contains(context, "monitoring", "Input should be")
+
+
+@then(
+    'the response includes a validation error indicating an invalid "well_pump_type" value'
+)
+def step_then_response_includes_invalid_well_pump_type_error(context: Context):
+    _assert_any_validation_error_contains(context, "well_pump_type", "Input should be")
+
+
+@then(
+    'the response includes a validation error indicating that at least one of "contact_1_name" or "contact_1_organization" must be provided'
+)
+@then(
+    'the response includes validation errors indicating that both "contact_1_name" and "contact_1_organization" must be provided when any contact information is present'
+)
+def step_then_response_includes_contact_name_or_org_required_error(context: Context):
+    response_json = context.response.json()
+    validation_errors = response_json.get("validation_errors", [])
+    assert validation_errors, "Expected at least one validation error"
+    found = any(
+        "composite field error" in str(err.get("field", ""))
+        and (
+            "contact_1_name is required" in str(err.get("error", ""))
+            or "contact_1_organization is required" in str(err.get("error", ""))
+        )
+        for err in validation_errors
+    )
+    assert (
+        found
+    ), "Expected contact validation error requiring contact_1_name or contact_1_organization"
+
+
+@then(
+    'the response includes a validation error indicating that "water_level_date_time" is required when "depth_to_water_ft" is provided'
+)
+def step_then_response_includes_water_level_datetime_required_error(context: Context):
+    _assert_any_validation_error_contains(
+        context, "composite field error", "All water level fields must be provided"
+    )
+
+
 # ============= EOF =============================================
diff --git a/tests/features/well-inventory-csv.feature b/tests/features/well-inventory-csv.feature
@@ -1,3 +1,4 @@
+@production
 @backend
 @cli
 @BDMS-TBD

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+@production`
`1`	`2`	`@backend`
`2`	`3`	`@cli`
`3`	`4`	`@BDMS-TBD`