Skip to content

Commit 6599c42

Browse files
authored
Merge pull request #521 from DataIntegrationGroup/well-inventory-csv-fix
Improve well-inventory CLI feedback and validation handling; add real-user CSV feature coverage
2 parents 229e624 + f8ceb2c commit 6599c42

33 files changed

Lines changed: 1494 additions & 370 deletions

cli/cli.py

Lines changed: 436 additions & 7 deletions
Large diffs are not rendered by default.

cli/service_adapter.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,14 @@
2121
from dataclasses import dataclass
2222
from pathlib import Path
2323

24-
from fastapi import UploadFile
25-
from sqlalchemy import select
26-
2724
from db import Thing, Asset
2825
from db.engine import session_ctx
26+
from fastapi import UploadFile
2927
from services.asset_helper import upload_and_associate
3028
from services.gcs_helper import get_storage_bucket, make_blob_name_and_uri
3129
from services.water_level_csv import bulk_upload_water_levels
3230
from services.well_inventory_csv import import_well_inventory_csv
31+
from sqlalchemy import select
3332

3433

3534
@dataclass
@@ -73,7 +72,7 @@ def water_levels_csv(source_file: Path | str, *, pretty_json: bool = False):
7372
result = bulk_upload_water_levels(source_file, pretty_json=pretty_json)
7473
if result.stderr:
7574
print(result.stderr, file=sys.stderr)
76-
return result.exit_code
75+
return result
7776

7877

7978
def associate_assets(source_directory: Path | str) -> list[str]:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ dependencies = [
6969
"pyasn1==0.6.2",
7070
"pyasn1-modules==0.4.2",
7171
"pycparser==2.23",
72-
"pydantic==2.11.7",
73-
"pydantic-core==2.33.2",
72+
"pydantic==2.12.5",
73+
"pydantic-core==2.41.5",
7474
"pygments==2.19.2",
7575
"pyjwt==2.11.0",
7676
"pyproj==3.7.2",

requirements.txt

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,32 +1152,57 @@ pycparser==2.23 \
11521152
# via
11531153
# cffi
11541154
# ocotilloapi
1155-
pydantic==2.11.7 \
1156-
--hash=sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db \
1157-
--hash=sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b
1155+
pydantic==2.12.5 \
1156+
--hash=sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49 \
1157+
--hash=sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d
11581158
# via
11591159
# fastapi
11601160
# fastapi-pagination
11611161
# ocotilloapi
1162-
pydantic-core==2.33.2 \
1163-
--hash=sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56 \
1164-
--hash=sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef \
1165-
--hash=sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a \
1166-
--hash=sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f \
1167-
--hash=sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916 \
1168-
--hash=sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a \
1169-
--hash=sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849 \
1170-
--hash=sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e \
1171-
--hash=sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac \
1172-
--hash=sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162 \
1173-
--hash=sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc \
1174-
--hash=sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5 \
1175-
--hash=sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d \
1176-
--hash=sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9 \
1177-
--hash=sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9 \
1178-
--hash=sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5 \
1179-
--hash=sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9 \
1180-
--hash=sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6
1162+
pydantic-core==2.41.5 \
1163+
--hash=sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90 \
1164+
--hash=sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740 \
1165+
--hash=sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33 \
1166+
--hash=sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e \
1167+
--hash=sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0 \
1168+
--hash=sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34 \
1169+
--hash=sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14 \
1170+
--hash=sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375 \
1171+
--hash=sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf \
1172+
--hash=sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1 \
1173+
--hash=sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553 \
1174+
--hash=sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470 \
1175+
--hash=sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2 \
1176+
--hash=sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660 \
1177+
--hash=sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c \
1178+
--hash=sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008 \
1179+
--hash=sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a \
1180+
--hash=sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd \
1181+
--hash=sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586 \
1182+
--hash=sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869 \
1183+
--hash=sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66 \
1184+
--hash=sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d \
1185+
--hash=sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07 \
1186+
--hash=sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36 \
1187+
--hash=sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e \
1188+
--hash=sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612 \
1189+
--hash=sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11 \
1190+
--hash=sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c \
1191+
--hash=sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a \
1192+
--hash=sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf \
1193+
--hash=sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858 \
1194+
--hash=sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9 \
1195+
--hash=sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2 \
1196+
--hash=sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3 \
1197+
--hash=sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23 \
1198+
--hash=sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa \
1199+
--hash=sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3 \
1200+
--hash=sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d \
1201+
--hash=sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9 \
1202+
--hash=sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9 \
1203+
--hash=sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e \
1204+
--hash=sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb \
1205+
--hash=sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0
11811206
# via
11821207
# ocotilloapi
11831208
# pydantic

schemas/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from datetime import datetime, timezone, date
1717
from typing import Annotated
1818

19+
from core.enums import ReleaseStatus
1920
from pydantic import (
2021
BaseModel,
2122
ConfigDict,
@@ -26,8 +27,6 @@
2627
from pydantic.json_schema import JsonSchemaValue
2728
from pydantic_core import core_schema
2829

29-
from core.enums import ReleaseStatus
30-
3130
DT_FMT = "%Y-%m-%dT%H:%M:%SZ"
3231

3332

@@ -53,7 +52,12 @@ class BaseUpdateModel(BaseCreateModel):
5352
release_status: ReleaseStatus | None = None
5453

5554

56-
def past_or_today_validator(value: date | datetime) -> date | datetime:
55+
def past_or_today_validator(
56+
value: date | datetime | None,
57+
) -> date | datetime | None:
58+
if value is None:
59+
return None
60+
5761
if isinstance(value, datetime):
5862
if value.tzinfo is None:
5963
if value > datetime.now():

schemas/well_inventory.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@
1919

2020
import phonenumbers
2121
import utm
22-
from pydantic import (
23-
BaseModel,
24-
model_validator,
25-
BeforeValidator,
26-
validate_email,
27-
AfterValidator,
28-
field_validator,
29-
)
30-
3122
from core.constants import STATE_CODES
3223
from core.enums import (
3324
ElevationMethod,
@@ -39,6 +30,15 @@
3930
WellPurpose as WellPurposeEnum,
4031
MonitoringFrequency,
4132
)
33+
from phonenumbers import NumberParseException
34+
from pydantic import (
35+
BaseModel,
36+
model_validator,
37+
BeforeValidator,
38+
validate_email,
39+
AfterValidator,
40+
field_validator,
41+
)
4242
from schemas import past_or_today_validator, PastOrTodayDatetime
4343
from services.util import convert_dt_tz_naive_to_tz_aware
4444

@@ -96,14 +96,21 @@ def phone_validator(phone_number_str):
9696

9797
phone_number_str = phone_number_str.strip()
9898
if phone_number_str:
99-
parsed_number = phonenumbers.parse(phone_number_str, "US")
99+
try:
100+
parsed_number = phonenumbers.parse(phone_number_str, "US")
101+
except NumberParseException as e:
102+
raise ValueError(f"Invalid phone number. {phone_number_str}") from e
103+
100104
if phonenumbers.is_valid_number(parsed_number):
101105
formatted_number = phonenumbers.format_number(
102106
parsed_number, phonenumbers.PhoneNumberFormat.E164
103107
)
104108
return formatted_number
105-
else:
106-
raise ValueError(f"Invalid phone number. {phone_number_str}")
109+
110+
raise ValueError(f"Invalid phone number. {phone_number_str}")
111+
112+
# Explicitly return None for empty strings after stripping.
113+
return None
107114

108115

109116
def email_validator_function(email_str):

services/water_level_csv.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
import csv
1919
import io
2020
import json
21+
import re
2122
import uuid
2223
from dataclasses import dataclass
2324
from datetime import datetime
2425
from pathlib import Path
2526
from typing import Any, BinaryIO, Iterable, List
2627

28+
from db import Thing, FieldEvent, FieldActivity, Sample, Observation, Parameter
29+
from db.engine import session_ctx
2730
from pydantic import BaseModel, ConfigDict, ValidationError, field_validator
2831
from sqlalchemy import select
2932
from sqlalchemy.orm import Session
3033

31-
from db import Thing, FieldEvent, FieldActivity, Sample, Observation, Parameter
32-
from db.engine import session_ctx
33-
3434
# Required CSV columns for the bulk upload
3535
REQUIRED_FIELDS: List[str] = [
3636
"field_staff",
@@ -45,6 +45,11 @@
4545
"data_quality",
4646
]
4747

48+
HEADER_ALIASES: dict[str, str] = {
49+
"measuring_person": "sampler",
50+
"water_level_date_time": "measurement_date_time",
51+
}
52+
4853
# Allow-list values for validation. These represent early MVP lexicon values.
4954
VALID_LEVEL_STATUSES = {"stable", "rising", "falling"}
5055
VALID_DATA_QUALITIES = {"approved", "provisional"}
@@ -173,7 +178,7 @@ def bulk_upload_water_levels(
173178
headers, csv_rows = _read_csv(source_file)
174179
except FileNotFoundError:
175180
msg = f"File not found: {source_file}"
176-
payload = _build_payload([], [], 0, 0, [msg])
181+
payload = _build_payload([], [], 0, 0, 1, errors=[msg])
177182
stdout = _serialize_payload(payload, pretty_json)
178183
return BulkUploadResult(exit_code=1, stdout=stdout, stderr=msg, payload=payload)
179184

@@ -205,7 +210,7 @@ def bulk_upload_water_levels(
205210
summary = {
206211
"total_rows_processed": len(csv_rows),
207212
"total_rows_imported": len(created_rows) if not validation_errors else 0,
208-
"validation_errors_or_warnings": len(validation_errors),
213+
"validation_errors_or_warnings": _count_rows_with_issues(validation_errors),
209214
}
210215
payload = _build_payload(
211216
csv_rows, created_rows, **summary, errors=validation_errors
@@ -222,6 +227,22 @@ def _serialize_payload(payload: dict[str, Any], pretty: bool) -> str:
222227
return json.dumps(payload, indent=2 if pretty else None)
223228

224229

230+
def _count_rows_with_issues(errors: list[str]) -> int:
231+
"""
232+
Count unique row numbers represented in validation errors.
233+
Falls back to total error count when row numbers are unavailable.
234+
"""
235+
row_ids: set[int] = set()
236+
for err in errors:
237+
match = re.match(r"^Row\s+(\d+):", str(err))
238+
if match:
239+
row_ids.add(int(match.group(1)))
240+
241+
if row_ids:
242+
return len(row_ids)
243+
return len(errors)
244+
245+
225246
def _build_payload(
226247
csv_rows: Iterable[dict[str, Any]],
227248
created_rows: list[dict[str, Any]],
@@ -261,14 +282,23 @@ def _read_csv(
261282

262283
stream = io.StringIO(text)
263284
reader = csv.DictReader(stream)
264-
rows = [
265-
{
266-
k.strip(): (v.strip() if isinstance(v, str) else v or "")
267-
for k, v in row.items()
268-
}
269-
for row in reader
285+
rows: list[dict[str, str]] = []
286+
for row in reader:
287+
normalized_row: dict[str, str] = {}
288+
for k, v in row.items():
289+
if k is None:
290+
continue
291+
key = HEADER_ALIASES.get(k.strip(), k.strip())
292+
value = v.strip() if isinstance(v, str) else v or ""
293+
# If both alias and canonical header are present, preserve first non-empty value.
294+
if key in normalized_row and normalized_row[key] and not value:
295+
continue
296+
normalized_row[key] = value
297+
rows.append(normalized_row)
298+
299+
headers = [
300+
HEADER_ALIASES.get(h.strip(), h.strip()) for h in (reader.fieldnames or [])
270301
]
271-
headers = [h.strip() for h in reader.fieldnames or []]
272302
return headers, rows
273303

274304

0 commit comments

Comments
 (0)