Skip to content

Commit d03b553

Browse files
committed
chore: update CSV validation scenarios and improve auto-generation logic for well_name_point_id
1 parent 23ce228 commit d03b553

7 files changed

Lines changed: 277 additions & 163 deletions

File tree

cli/cli.py

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from collections import defaultdict
1717
from enum import Enum
1818
from pathlib import Path
19+
from textwrap import wrap
1920

2021
import typer
2122
from dotenv import load_dotenv
@@ -120,13 +121,20 @@ def _row_sort_key(row_value):
120121
except (TypeError, ValueError):
121122
return (1, str(row_value))
122123

123-
max_errors_to_show = 100
124+
max_errors_to_show = 1000
124125
shown = 0
126+
first_group = True
125127
for row in sorted(grouped_errors.keys(), key=_row_sort_key):
126128
if shown >= max_errors_to_show:
127129
break
128130

129131
row_errors = grouped_errors[row]
132+
if not first_group:
133+
typer.secho(
134+
" " + "-" * 56,
135+
fg=typer.colors.BRIGHT_BLACK,
136+
)
137+
first_group = False
130138
typer.secho(
131139
f" Row {row} ({len(row_errors)} issue{'s' if len(row_errors) != 1 else ''})",
132140
fg=typer.colors.CYAN,
@@ -138,11 +146,37 @@ def _row_sort_key(row_value):
138146
break
139147
field = err.get("field", "unknown")
140148
message = err.get("error") or err.get("msg") or "validation error"
141-
prefix = typer.style(" ! ", fg=typer.colors.BRIGHT_YELLOW)
142-
field_part = f"\033[1;38;5;208m{field}:\033[0m"
143-
message_part = typer.style(f" {message}", fg=typer.colors.BRIGHT_YELLOW)
144-
typer.echo(f"{prefix}{field_part}{message_part}")
149+
input_value = err.get("value")
150+
prefix_raw = " ! "
151+
field_raw = f"{field}:"
152+
msg_chunks = wrap(
153+
str(message),
154+
width=max(20, 200 - len(prefix_raw) - len(field_raw) - 1),
155+
) or [""]
156+
prefix = typer.style(prefix_raw, fg=typer.colors.BRIGHT_YELLOW)
157+
field_part = f"\033[1;38;5;208m{field_raw}\033[0m"
158+
first_msg_part = typer.style(
159+
msg_chunks[0], fg=typer.colors.BRIGHT_YELLOW
160+
)
161+
typer.echo(f"{prefix}{field_part} {first_msg_part}")
162+
msg_indent = " " * (len(prefix_raw) + len(field_raw) + 1)
163+
for chunk in msg_chunks[1:]:
164+
typer.secho(f"{msg_indent}{chunk}", fg=typer.colors.BRIGHT_YELLOW)
165+
if input_value is not None:
166+
input_prefix = " input="
167+
input_chunks = wrap(
168+
str(input_value), width=max(20, 200 - len(input_prefix))
169+
) or [""]
170+
typer.secho(
171+
f"{input_prefix}{input_chunks[0]}", fg=typer.colors.BRIGHT_WHITE
172+
)
173+
input_indent = " " * len(input_prefix)
174+
for chunk in input_chunks[1:]:
175+
typer.secho(
176+
f"{input_indent}{chunk}", fg=typer.colors.BRIGHT_WHITE
177+
)
145178
shown += 1
179+
typer.echo()
146180

147181
if len(validation_errors) > shown:
148182
typer.secho(
@@ -208,11 +242,18 @@ def _row_sort_key(row_value):
208242

209243
max_errors_to_show = 100
210244
shown = 0
245+
first_group = True
211246
for row in sorted(grouped_errors.keys(), key=_row_sort_key):
212247
if shown >= max_errors_to_show:
213248
break
214249

215250
row_errors = grouped_errors[row]
251+
if not first_group:
252+
typer.secho(
253+
" " + "-" * 56,
254+
fg=typer.colors.BRIGHT_BLACK,
255+
)
256+
first_group = False
216257
typer.secho(
217258
f" Row {row} ({len(row_errors)} issue{'s' if len(row_errors) != 1 else ''})",
218259
fg=typer.colors.CYAN,
@@ -224,13 +265,39 @@ def _row_sort_key(row_value):
224265
break
225266
field = err.get("field", "unknown")
226267
message = err.get("error") or err.get("msg") or "validation error"
227-
prefix = typer.style(" ! ", fg=typer.colors.BRIGHT_YELLOW)
268+
input_value = err.get("value")
269+
prefix_raw = " ! "
270+
field_raw = f"{field}:"
271+
msg_chunks = wrap(
272+
str(message),
273+
width=max(20, 200 - len(prefix_raw) - len(field_raw) - 1),
274+
) or [""]
275+
prefix = typer.style(prefix_raw, fg=typer.colors.BRIGHT_YELLOW)
228276
field_part = typer.style(
229-
f"{field}:", fg=typer.colors.BRIGHT_YELLOW, bold=True
277+
field_raw, fg=typer.colors.BRIGHT_YELLOW, bold=True
278+
)
279+
first_msg_part = typer.style(
280+
msg_chunks[0], fg=typer.colors.BRIGHT_YELLOW
230281
)
231-
message_part = typer.style(f" {message}", fg=typer.colors.BRIGHT_YELLOW)
232-
typer.echo(f"{prefix}{field_part}{message_part}")
282+
typer.echo(f"{prefix}{field_part} {first_msg_part}")
283+
msg_indent = " " * (len(prefix_raw) + len(field_raw) + 1)
284+
for chunk in msg_chunks[1:]:
285+
typer.secho(f"{msg_indent}{chunk}", fg=typer.colors.BRIGHT_YELLOW)
286+
if input_value is not None:
287+
input_prefix = " input="
288+
input_chunks = wrap(
289+
str(input_value), width=max(20, 200 - len(input_prefix))
290+
) or [""]
291+
typer.secho(
292+
f"{input_prefix}{input_chunks[0]}", fg=typer.colors.BRIGHT_WHITE
293+
)
294+
input_indent = " " * len(input_prefix)
295+
for chunk in input_chunks[1:]:
296+
typer.secho(
297+
f"{input_indent}{chunk}", fg=typer.colors.BRIGHT_WHITE
298+
)
233299
shown += 1
300+
typer.echo()
234301

235302
if len(validation_errors) > shown:
236303
typer.secho(

services/well_inventory_csv.py

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,6 @@
2323
from itertools import groupby
2424
from typing import Set
2525

26-
from pydantic import ValidationError
27-
from shapely import Point
28-
from sqlalchemy import select, and_
29-
from sqlalchemy.exc import DatabaseError
30-
from sqlalchemy.orm import Session
31-
from starlette.status import HTTP_400_BAD_REQUEST
32-
3326
from core.constants import SRID_UTM_ZONE_13N, SRID_UTM_ZONE_12N, SRID_WGS84
3427
from db import (
3528
Group,
@@ -43,14 +36,45 @@
4336
Thing,
4437
)
4538
from db.engine import session_ctx
39+
from pydantic import ValidationError
4640
from schemas.thing import CreateWell
4741
from schemas.well_inventory import WellInventoryRow
4842
from services.contact_helper import add_contact
4943
from services.exceptions_helper import PydanticStyleException
5044
from services.thing_helper import add_thing
5145
from services.util import transform_srid, convert_ft_to_m
46+
from shapely import Point
47+
from sqlalchemy import select, and_
48+
from sqlalchemy.exc import DatabaseError
49+
from sqlalchemy.orm import Session
50+
from starlette.status import HTTP_400_BAD_REQUEST
5251

53-
AUTOGEN_REGEX = re.compile(r"^[A-Za-z]{2}-$")
52+
AUTOGEN_DEFAULT_PREFIX = "NM-"
53+
AUTOGEN_PREFIX_REGEX = re.compile(r"^[A-Z]{2}-$")
54+
AUTOGEN_TOKEN_REGEX = re.compile(r"^(?P<prefix>[A-Z]{2,3})\s*-\s*(?:x{4}|X{4})$")
55+
56+
57+
def _extract_autogen_prefix(well_id: str) -> str | None:
58+
"""
59+
Return normalized auto-generation prefix when a placeholder token is provided.
60+
61+
Supported forms:
62+
- ``XY-`` (existing behavior)
63+
- ``WL-XXXX`` / ``SAC-XXXX`` / ``ABC-XXXX`` (2-3 uppercase letter prefixes)
64+
- blank value (uses default ``NM-`` prefix)
65+
"""
66+
value = (well_id or "").strip()
67+
if not value:
68+
return AUTOGEN_DEFAULT_PREFIX
69+
70+
if AUTOGEN_PREFIX_REGEX.match(value):
71+
return value
72+
73+
token_match = AUTOGEN_TOKEN_REGEX.match(value)
74+
if token_match:
75+
return f"{token_match.group('prefix')}-"
76+
77+
return None
5478

5579

5680
def import_well_inventory_csv(*args, **kw) -> dict:
@@ -127,6 +151,7 @@ def _import_well_inventory_csv(session: Session, text: str, user: str):
127151
"row": 0,
128152
"field": f"{duplicates}",
129153
"error": "Duplicate columns found",
154+
"value": duplicates,
130155
}
131156
]
132157

@@ -161,6 +186,7 @@ def _import_well_inventory_csv(session: Session, text: str, user: str):
161186
"row": current_row_id or "unknown",
162187
"field": "Invalid value",
163188
"error": str(e),
189+
"value": current_row_id,
164190
}
165191
)
166192
session.rollback()
@@ -174,6 +200,7 @@ def _import_well_inventory_csv(session: Session, text: str, user: str):
174200
"row": current_row_id or "unknown",
175201
"field": "Database error",
176202
"error": "A database error occurred while importing this row.",
203+
"value": current_row_id,
177204
}
178205
)
179206
session.rollback()
@@ -354,11 +381,14 @@ def _make_row_models(rows, session):
354381
raise ValueError("Duplicate header row")
355382

356383
well_id = row.get("well_name_point_id")
357-
if not well_id:
358-
raise ValueError("Field required")
359-
if AUTOGEN_REGEX.match(well_id):
360-
well_id, offset = _generate_autogen_well_id(session, well_id, offset)
384+
autogen_prefix = _extract_autogen_prefix(well_id)
385+
if autogen_prefix:
386+
well_id, offset = _generate_autogen_well_id(
387+
session, autogen_prefix, offset
388+
)
361389
row["well_name_point_id"] = well_id
390+
elif not well_id:
391+
raise ValueError("Field required")
362392

363393
if well_id in seen_ids:
364394
raise ValueError("Duplicate value for well_name_point_id")
@@ -394,8 +424,13 @@ def _make_row_models(rows, session):
394424
else:
395425
error_msg = "Invalid value"
396426

427+
if field == "header":
428+
value = ",".join(row.keys())
429+
else:
430+
value = row.get(field)
431+
397432
validation_errors.append(
398-
{"row": idx + 1, "field": field, "error": error_msg}
433+
{"row": idx + 1, "field": field, "error": error_msg, "value": value}
399434
)
400435
return models, validation_errors
401436

0 commit comments

Comments
 (0)