From 1195f1a15adf15703c6a8a6ef857aaed8ca84952 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Thu, 19 Feb 2026 09:15:00 -0700
Subject: [PATCH 01/14] feat: add WellTransferResultsBuilder for summarizing
 well transfer outcomes

---
 transfers/well_transfer_results.py | 332 +++++++++++++++++++++++++++++
 1 file changed, 332 insertions(+)
 create mode 100644 transfers/well_transfer_results.py

diff --git a/transfers/well_transfer_results.py b/transfers/well_transfer_results.py
new file mode 100644
index 00000000..555ab9f7
--- /dev/null
+++ b/transfers/well_transfer_results.py
@@ -0,0 +1,332 @@
+# ===============================================================================
+# Copyright 2026 ross
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+from __future__ import annotations
+
+import argparse
+import csv
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+import pandas as pd
+from sqlalchemy import select
+
+from db import Thing
+from db.engine import session_ctx
+from transfers.util import (
+    filter_non_transferred_wells,
+    get_transferable_wells,
+    read_csv,
+    replace_nans,
+)
+
+
+@dataclass
+class ValidationIssue:
+    pointid: str
+    table: str
+    field: str
+    error: str
+
+
+@dataclass
+class WellTransferResults:
+    source_count: int
+    committed_count: int
+    transferred_count: int
+    skipped_by_decision: list[str]
+    validation_issue_wells: list[str]
+    validation_issues: list[ValidationIssue]
+    metrics_file: Path | None
+    skipped_by_existing_destination: list[str]
+
+
+class WellTransferResultsBuilder:
+    """Build well transfer outcome summaries by comparing source and destination."""
+
+    def __init__(
+        self,
+        pointids: list[str] | None = None,
+        metrics_file: Path | None = None,
+        output_dir: Path | None = None,
+    ):
+        self.pointids = set(pointids or [])
+        self.metrics_file = metrics_file
+        self.output_dir = output_dir or (Path("transfers") / "metrics")
+
+    def build(self) -> WellTransferResults:
+        source_df = self._load_source_wells()
+        committed_df = self._load_committed_wells(source_df)
+        committed_without_existing_df = filter_non_transferred_wells(committed_df)
+
+        source_ids = self._point_ids(source_df)
+        committed_ids = self._point_ids(committed_df)
+        committed_without_existing_ids = self._point_ids(committed_without_existing_df)
+        destination_ids = self._load_destination_ids()
+
+        skipped_by_decision = sorted(source_ids - committed_ids)
+        skipped_by_existing_destination = sorted(
+            committed_ids - committed_without_existing_ids
+        )
+        transferred_ids = committed_ids & destination_ids
+        missing_committed_ids = committed_ids - transferred_ids
+
+        validation_issues = self._load_well_validation_issues(
+            self._resolve_metrics_file()
+        )
+        validation_issue_ids = {
+            issue.pointid for issue in validation_issues if issue.pointid in source_ids
+        }
+        validation_issue_wells = sorted(validation_issue_ids & missing_committed_ids)
+
+        return WellTransferResults(
+            source_count=len(source_ids),
+            committed_count=len(committed_ids),
+            transferred_count=len(transferred_ids),
+            skipped_by_decision=skipped_by_decision,
+            validation_issue_wells=validation_issue_wells,
+            validation_issues=validation_issues,
+            metrics_file=self._resolve_metrics_file(),
+            skipped_by_existing_destination=skipped_by_existing_destination,
+        )
+
+    def write_reports(self, results: WellTransferResults) -> dict[str, Path]:
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        stamp = datetime.now().strftime("%Y-%m-%dT%H_%M_%S")
+
+        summary_path = self.output_dir / f"well_transfer_results_{stamp}.txt"
+        not_migrated_path = self.output_dir / f"wells_not_migrated_{stamp}.csv"
+        validation_path = self.output_dir / f"wells_validation_issues_{stamp}.csv"
+        already_exists_path = (
+            self.output_dir / f"wells_already_in_destination_{stamp}.csv"
+        )
+
+        summary_lines = [
+            "Well Transfer Results",
+            f"source_count={results.source_count}",
+            f"committed_count={results.committed_count}",
+            f"transferred_count={results.transferred_count}",
+            f"not_transferred_by_decision_count={len(results.skipped_by_decision)}",
+            f"not_transferred_validation_count={len(results.validation_issue_wells)}",
+            (
+                f"already_in_destination_count="
+                f"{len(results.skipped_by_existing_destination)}"
+            ),
+            (
+                f"metrics_file={results.metrics_file}"
+                if results.metrics_file
+                else "metrics_file=None"
+            ),
+        ]
+        summary_path.write_text("\n".join(summary_lines) + "\n")
+
+        self._write_pointids(not_migrated_path, "pointid", results.skipped_by_decision)
+        self._write_pointids(
+            already_exists_path, "pointid", results.skipped_by_existing_destination
+        )
+        self._write_validation_issues(
+            validation_path,
+            [
+                issue
+                for issue in results.validation_issues
+                if issue.pointid in set(results.validation_issue_wells)
+            ],
+        )
+
+        return {
+            "summary": summary_path,
+            "not_migrated": not_migrated_path,
+            "validation_issues": validation_path,
+            "already_in_destination": already_exists_path,
+        }
+
+    def _load_source_wells(self) -> pd.DataFrame:
+        wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+        ldf = read_csv("Location")
+        ldf = ldf.drop(columns=["PointID", "SSMA_TimeStamp"], errors="ignore")
+        wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
+
+        wdf = wdf[wdf["SiteType"] == "GW"]
+        wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
+        wdf = replace_nans(wdf)
+
+        if self.pointids:
+            wdf = wdf[wdf["PointID"].isin(self.pointids)]
+
+        return wdf
+
+    def _load_committed_wells(self, source_df: pd.DataFrame) -> pd.DataFrame:
+        committed_df = get_transferable_wells(source_df)
+        if self.pointids:
+            committed_df = committed_df[committed_df["PointID"].isin(self.pointids)]
+
+        duplicates = committed_df["PointID"].duplicated(keep=False)
+        if duplicates.any():
+            duplicate_ids = set(committed_df.loc[duplicates, "PointID"].tolist())
+            committed_df = committed_df[~committed_df["PointID"].isin(duplicate_ids)]
+
+        return committed_df.sort_values("PointID")
+
+    @staticmethod
+    def _point_ids(df: pd.DataFrame) -> set[str]:
+        if df.empty:
+            return set()
+        return set(df["PointID"].dropna().astype(str).unique().tolist())
+
+    def _load_destination_ids(self) -> set[str]:
+        with session_ctx() as session:
+            ids = session.execute(
+                select(Thing.name).where(Thing.thing_type == "water well")
+            ).scalars()
+            thing_names = {str(name) for name in ids if name}
+
+        if self.pointids:
+            thing_names = thing_names & self.pointids
+
+        return thing_names
+
+    def _resolve_metrics_file(self) -> Path | None:
+        if self.metrics_file:
+            return self.metrics_file
+
+        metrics_dir = Path("transfers") / "metrics"
+        candidates = sorted(
+            metrics_dir.glob("metrics_*.csv"), key=lambda p: p.stat().st_mtime
+        )
+        if not candidates:
+            return None
+        return candidates[-1]
+
+    @staticmethod
+    def _load_well_validation_issues(
+        metrics_file: Path | None,
+    ) -> list[ValidationIssue]:
+        if metrics_file is None or not metrics_file.exists():
+            return []
+
+        issues: list[ValidationIssue] = []
+        current_model: str | None = None
+        with metrics_file.open(newline="") as f:
+            reader = csv.reader(f, delimiter="|")
+            for row in reader:
+                if not row:
+                    continue
+
+                if len(row) >= 5 and row[0] not in {"model", "PointID"}:
+                    current_model = row[0]
+                    continue
+
+                if row[0] == "PointID":
+                    continue
+
+                if len(row) < 4:
+                    continue
+
+                if current_model != "Well":
+                    continue
+
+                pointid, table, field, error = row[0], row[1], row[2], row[3]
+                if table != "WellData":
+                    continue
+                if "Validation Error" not in error:
+                    continue
+                issues.append(
+                    ValidationIssue(
+                        pointid=pointid,
+                        table=table,
+                        field=field,
+                        error=error,
+                    )
+                )
+        return issues
+
+    @staticmethod
+    def _write_pointids(path: Path, header: str, pointids: list[str]) -> None:
+        with path.open("w", newline="") as f:
+            writer = csv.writer(f)
+            writer.writerow([header])
+            for pointid in pointids:
+                writer.writerow([pointid])
+
+    @staticmethod
+    def _write_validation_issues(path: Path, issues: list[ValidationIssue]) -> None:
+        with path.open("w", newline="") as f:
+            writer = csv.writer(f)
+            writer.writerow(["pointid", "table", "field", "error"])
+            for issue in issues:
+                writer.writerow([issue.pointid, issue.table, issue.field, issue.error])
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Build transfer results for wells.")
+    parser.add_argument(
+        "--metrics-file",
+        type=Path,
+        default=None,
+        help="Optional metrics CSV to use for validation issue extraction.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path("transfers") / "metrics",
+        help="Directory where result files are written.",
+    )
+    parser.add_argument(
+        "--pointids",
+        default=None,
+        help="Optional comma-separated list of PointID values to scope the report.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = _parse_args()
+    pointids = args.pointids.split(",") if args.pointids else None
+    builder = WellTransferResultsBuilder(
+        pointids=pointids,
+        metrics_file=args.metrics_file,
+        output_dir=args.output_dir,
+    )
+    results = builder.build()
+    outputs = builder.write_reports(results)
+
+    print(f"Source wells: {results.source_count}")
+    print(f"Committed to migrate: {results.committed_count}")
+    print(f"Successfully transferred: {results.transferred_count}")
+    print(
+        f"Not transferred (decided not to migrate): {len(results.skipped_by_decision)}"
+    )
+    print(f"Not transferred (validation issues): {len(results.validation_issue_wells)}")
+    print(
+        f"Already in destination before migration filter: "
+        f"{len(results.skipped_by_existing_destination)}"
+    )
+    print(f"Summary file: {outputs['summary']}")
+    print(f"Not migrated wells file: {outputs['not_migrated']}")
+    print(f"Validation issue wells file: {outputs['validation_issues']}")
+    print(f"Already-in-destination wells file: {outputs['already_in_destination']}")
+
+    print("\nWells not transferred (decided not to migrate):")
+    for pointid in results.skipped_by_decision:
+        print(pointid)
+
+    print("\nWells not transferred (data validation issues):")
+    for pointid in results.validation_issue_wells:
+        print(pointid)
+
+
+if __name__ == "__main__":
+    main()

From e8d8bf35cdd937d97fea9dc4150c5d7d33a7ae16 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Thu, 19 Feb 2026 17:04:48 -0700
Subject: [PATCH 02/14] feat: implement TransferResultsBuilder and comparison
 specs for transfer input validation

---
 transfers/transfer_results.py         |  51 +++
 transfers/transfer_results_builder.py | 153 ++++++++
 transfers/transfer_results_specs.py   | 485 ++++++++++++++++++++++++++
 transfers/transfer_results_types.py   |  81 +++++
 transfers/well_transfer_results.py    | 332 ------------------
 5 files changed, 770 insertions(+), 332 deletions(-)
 create mode 100644 transfers/transfer_results.py
 create mode 100644 transfers/transfer_results_builder.py
 create mode 100644 transfers/transfer_results_specs.py
 create mode 100644 transfers/transfer_results_types.py
 delete mode 100644 transfers/well_transfer_results.py

diff --git a/transfers/transfer_results.py b/transfers/transfer_results.py
new file mode 100644
index 00000000..0483e7fd
--- /dev/null
+++ b/transfers/transfer_results.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from transfers.transfer_results_builder import TransferResultsBuilder
+from transfers.transfer_results_specs import (
+    TRANSFER_COMPARISON_SPECS,
+    TransferComparisonSpec,
+)
+from transfers.transfer_results_types import *  # noqa: F401,F403
+
+
+__all__ = [
+    "TransferResultsBuilder",
+    "TransferComparisonSpec",
+    "TRANSFER_COMPARISON_SPECS",
+]
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Compare each transfer input CSV against destination Postgres rows."
+    )
+    parser.add_argument(
+        "--summary-path",
+        type=Path,
+        default=Path("transfers") / "metrics" / "transfer_results_summary.md",
+        help="Output path for markdown summary table.",
+    )
+    parser.add_argument(
+        "--sample-limit",
+        type=int,
+        default=25,
+        help="Max missing/extra key samples stored per transfer.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = _parse_args()
+    builder = TransferResultsBuilder(sample_limit=args.sample_limit)
+    results = builder.build()
+    args.summary_path.parent.mkdir(parents=True, exist_ok=True)
+    TransferResultsBuilder.write_summary(args.summary_path, results)
+    print(f"Wrote comparison summary: {args.summary_path}")
+    print(f"Transfer comparisons: {len(results.results)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/transfers/transfer_results_builder.py b/transfers/transfer_results_builder.py
new file mode 100644
index 00000000..a8e384a7
--- /dev/null
+++ b/transfers/transfer_results_builder.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+from sqlalchemy import select, func
+
+from db.engine import session_ctx
+from transfers.transfer_results_specs import (
+    TRANSFER_COMPARISON_SPECS,
+    TransferComparisonSpec,
+)
+from transfers.transfer_results_types import (
+    TransferComparisonResults,
+    TransferResult,
+)
+from transfers.util import read_csv
+
+
+def _normalize_key(value: Any) -> str | None:
+    if value is None:
+        return None
+    try:
+        if pd.isna(value):
+            return None
+    except TypeError:
+        pass
+    s = str(value).strip()
+    if not s:
+        return None
+    return s.lower()
+
+
+def _source_keys(df: pd.DataFrame, key_col: str) -> set[str]:
+    if key_col not in df.columns:
+        return set()
+    return {
+        key
+        for key in (_normalize_key(v) for v in df[key_col].tolist())
+        if key is not None
+    }
+
+
+def _normalized_series(df: pd.DataFrame, key_col: str) -> pd.Series:
+    if key_col not in df.columns:
+        return pd.Series([], dtype=object)
+    s = df[key_col].map(_normalize_key).dropna()
+    if s.empty:
+        return pd.Series([], dtype=object)
+    return s.astype(str)
+
+
+class TransferResultsBuilder:
+    """Compare transfer input CSV keys to destination database keys per transfer."""
+
+    def __init__(self, sample_limit: int = 25):
+        self.sample_limit = sample_limit
+
+    def build(self) -> TransferComparisonResults:
+        results: dict[str, TransferResult] = {}
+        for spec in TRANSFER_COMPARISON_SPECS:
+            results[spec.transfer_name] = self._build_one(spec)
+        return TransferComparisonResults(
+            generated_at=pd.Timestamp.utcnow().isoformat(),
+            results=results,
+        )
+
+    def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
+        source_df = read_csv(spec.source_csv)
+        if spec.source_filter:
+            source_df = spec.source_filter(source_df)
+        source_series = _normalized_series(source_df, spec.source_key_column)
+        source_keys = set(source_series.unique().tolist())
+        source_keyed_row_count = int(source_series.shape[0])
+        source_duplicate_key_row_count = source_keyed_row_count - len(source_keys)
+        agreed_transfer_row_count = int(len(source_df))
+        if spec.agreed_row_counter is not None:
+            try:
+                agreed_transfer_row_count = int(spec.agreed_row_counter())
+            except Exception:
+                agreed_transfer_row_count = int(len(source_df))
+
+        model = spec.destination_model
+        key_col = getattr(model, spec.destination_key_column)
+        with session_ctx() as session:
+            key_sql = select(key_col).where(key_col.is_not(None))
+            count_sql = select(func.count()).select_from(model)
+
+            if spec.destination_where:
+                where_clause = spec.destination_where(model)
+                key_sql = key_sql.where(where_clause)
+                count_sql = count_sql.where(where_clause)
+
+            raw_dest_keys = session.execute(key_sql).scalars().all()
+            destination_row_count = int(session.execute(count_sql).scalar_one())
+
+        destination_series = pd.Series(
+            [_normalize_key(v) for v in raw_dest_keys], dtype=object
+        ).dropna()
+        if destination_series.empty:
+            destination_series = pd.Series([], dtype=object)
+        else:
+            destination_series = destination_series.astype(str)
+
+        destination_keys = set(destination_series.unique().tolist())
+        destination_keyed_row_count = int(destination_series.shape[0])
+        destination_duplicate_key_row_count = destination_keyed_row_count - len(
+            destination_keys
+        )
+
+        missing = sorted(source_keys - destination_keys)
+        extra = sorted(destination_keys - source_keys)
+
+        return spec.result_cls(
+            transfer_name=spec.transfer_name,
+            source_csv=spec.source_csv,
+            source_key_column=spec.source_key_column,
+            destination_model=model.__name__,
+            destination_key_column=spec.destination_key_column,
+            source_row_count=len(source_df),
+            agreed_transfer_row_count=agreed_transfer_row_count,
+            source_keyed_row_count=source_keyed_row_count,
+            source_key_count=len(source_keys),
+            source_duplicate_key_row_count=source_duplicate_key_row_count,
+            destination_row_count=destination_row_count,
+            destination_keyed_row_count=destination_keyed_row_count,
+            destination_key_count=len(destination_keys),
+            destination_duplicate_key_row_count=destination_duplicate_key_row_count,
+            matched_key_count=len(source_keys & destination_keys),
+            missing_in_destination_count=len(missing),
+            extra_in_destination_count=len(extra),
+            missing_in_destination_sample=missing[: self.sample_limit],
+            extra_in_destination_sample=extra[: self.sample_limit],
+        )
+
+    @staticmethod
+    def write_summary(path: Path, comparison: TransferComparisonResults) -> None:
+        lines = [
+            f"generated_at={comparison.generated_at}",
+            "",
+            "| Transfer | Source CSV | Source Rows | Agreed Rows | Dest Model | Dest Rows | Missing Agreed | Matched | Missing | Extra |",
+            "|---|---|---:|---:|---|---:|---:|---:|---:|---:|",
+        ]
+        for name in sorted(comparison.results.keys()):
+            r = comparison.results[name]
+            missing_agreed = r.agreed_transfer_row_count - r.destination_row_count
+            lines.append(
+                f"| {name} | {r.source_csv} | {r.source_row_count} | {r.agreed_transfer_row_count} | "
+                f"{r.destination_model} | {r.destination_row_count} | {missing_agreed} | "
+                f"{r.matched_key_count} | {r.missing_in_destination_count} | {r.extra_in_destination_count} |"
+            )
+        path.write_text("\n".join(lines) + "\n")
diff --git a/transfers/transfer_results_specs.py b/transfers/transfer_results_specs.py
new file mode 100644
index 00000000..f86e13b7
--- /dev/null
+++ b/transfers/transfer_results_specs.py
@@ -0,0 +1,485 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Callable
+
+import pandas as pd
+
+from transfers.associated_data import AssociatedDataTransferer
+from transfers.chemistry_sampleinfo import ChemistrySampleInfoTransferer
+from transfers.contact_transfer import ContactTransfer
+from transfers.field_parameters_transfer import FieldParametersTransferer
+from transfers.group_transfer import ProjectGroupTransferer
+from transfers.hydraulicsdata import HydraulicsDataTransferer
+from transfers.major_chemistry import MajorChemistryTransferer
+from transfers.minor_trace_chemistry_transfer import MinorTraceChemistryTransferer
+from transfers.ngwmn_views import (
+    NGWMNLithologyTransferer,
+    NGWMNWaterLevelsTransferer,
+    NGWMNWellConstructionTransferer,
+)
+from transfers.radionuclides import RadionuclidesTransferer
+from transfers.sensor_transfer import SensorTransferer
+from transfers.soil_rock_results import SoilRockResultsTransferer
+from transfers.stratigraphy_legacy import StratigraphyLegacyTransferer
+from transfers.surface_water_data import SurfaceWaterDataTransferer
+from transfers.surface_water_photos import SurfaceWaterPhotosTransferer
+from transfers.util import read_csv
+from transfers.waterlevels_transfer import WaterLevelTransferer
+from transfers.waterlevelscontinuous_pressure_daily import (
+    NMA_WaterLevelsContinuous_Pressure_DailyTransferer,
+)
+from transfers.weather_data import WeatherDataTransferer
+from transfers.weather_photos import WeatherPhotosTransferer
+from transfers.well_transfer import WellScreenTransferer, WellTransferer
+from db import (
+    Contact,
+    Group,
+    NMA_AssociatedData,
+    NMA_Chemistry_SampleInfo,
+    NMA_FieldParameters,
+    NMA_HydraulicsData,
+    NMA_MajorChemistry,
+    NMA_MinorTraceChemistry,
+    NMA_Radionuclides,
+    NMA_Soil_Rock_Results,
+    NMA_Stratigraphy,
+    NMA_SurfaceWaterData,
+    NMA_SurfaceWaterPhotos,
+    NMA_WaterLevelsContinuous_Pressure_Daily,
+    NMA_WeatherData,
+    NMA_WeatherPhotos,
+    NMA_view_NGWMN_Lithology,
+    NMA_view_NGWMN_WaterLevels,
+    NMA_view_NGWMN_WellConstruction,
+    Observation,
+    Sensor,
+    Thing,
+    WellScreen,
+)
+from transfers.transfer_results_types import (
+    AssociatedDataTransferResult,
+    ChemistrySampleInfoTransferResult,
+    DiversionOfSurfaceWaterTransferResult,
+    EphemeralStreamsTransferResult,
+    EquipmentTransferResult,
+    FieldParametersTransferResult,
+    HydraulicsDataTransferResult,
+    LakePondReservoirTransferResult,
+    MajorChemistryTransferResult,
+    MetStationsTransferResult,
+    MinorTraceChemistryTransferResult,
+    NGWMNLithologyTransferResult,
+    NGWMNWaterLevelsTransferResult,
+    NGWMNWellConstructionTransferResult,
+    OtherSiteTypesTransferResult,
+    OutfallWastewaterReturnFlowTransferResult,
+    OwnersDataTransferResult,
+    PerennialStreamsTransferResult,
+    PressureDailyTransferResult,
+    ProjectsTransferResult,
+    RadionuclidesTransferResult,
+    RockSampleLocationsTransferResult,
+    SoilGasSampleLocationsTransferResult,
+    SoilRockResultsTransferResult,
+    SpringsTransferResult,
+    StratigraphyTransferResult,
+    SurfaceWaterDataTransferResult,
+    SurfaceWaterPhotosTransferResult,
+    TransferResult,
+    WaterLevelsTransferResult,
+    WeatherDataTransferResult,
+    WeatherPhotosTransferResult,
+    WellDataTransferResult,
+    WellScreensTransferResult,
+)
+
+
+@dataclass(frozen=True)
+class TransferComparisonSpec:
+    transfer_name: str
+    result_cls: type[TransferResult]
+    source_csv: str
+    source_key_column: str
+    destination_model: Any
+    destination_key_column: str
+    source_filter: Callable[[pd.DataFrame], pd.DataFrame] | None = None
+    destination_where: Callable[[Any], Any] | None = None
+    agreed_row_counter: Callable[[], int] | None = None
+
+
+def _location_site_filter(site_type: str) -> Callable[[pd.DataFrame], pd.DataFrame]:
+    def _f(df: pd.DataFrame) -> pd.DataFrame:
+        if "SiteType" not in df.columns:
+            return df.iloc[0:0]
+        return df[df["SiteType"] == site_type]
+
+    return _f
+
+
+def _agreed_rows_from_transferer(transferer_cls) -> int:
+    transferer = transferer_cls()
+    _, cleaned_df = transferer._get_dfs()
+    return int(len(cleaned_df))
+
+
+def _agreed_rows_location(site_type: str) -> int:
+    df = read_csv("Location")
+    df = df[df["SiteType"] == site_type]
+    df = df[df["Easting"].notna() & df["Northing"].notna()]
+    return int(len(df))
+
+
+TRANSFER_COMPARISON_SPECS: list[TransferComparisonSpec] = [
+    TransferComparisonSpec(
+        "WellData",
+        WellDataTransferResult,
+        "WellData",
+        "WellID",
+        Thing,
+        "nma_pk_welldata",
+        destination_where=lambda m: m.thing_type == "water well",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(WellTransferer),
+    ),
+    TransferComparisonSpec(
+        "WellScreens",
+        WellScreensTransferResult,
+        "WellScreens",
+        "GlobalID",
+        WellScreen,
+        "nma_pk_wellscreens",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(WellScreenTransferer),
+    ),
+    TransferComparisonSpec(
+        "OwnersData",
+        OwnersDataTransferResult,
+        "OwnersData",
+        "OwnerKey",
+        Contact,
+        "nma_pk_owners",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(ContactTransfer),
+    ),
+    TransferComparisonSpec(
+        "WaterLevels",
+        WaterLevelsTransferResult,
+        "WaterLevels",
+        "GlobalID",
+        Observation,
+        "nma_pk_waterlevels",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(WaterLevelTransferer),
+    ),
+    TransferComparisonSpec(
+        "Equipment",
+        EquipmentTransferResult,
+        "Equipment",
+        "GlobalID",
+        Sensor,
+        "nma_pk_equipment",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(SensorTransferer),
+    ),
+    TransferComparisonSpec(
+        "Projects",
+        ProjectsTransferResult,
+        "Projects",
+        "Project",
+        Group,
+        "name",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(ProjectGroupTransferer),
+    ),
+    TransferComparisonSpec(
+        "SurfaceWaterPhotos",
+        SurfaceWaterPhotosTransferResult,
+        "SurfaceWaterPhotos",
+        "GlobalID",
+        NMA_SurfaceWaterPhotos,
+        "global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            SurfaceWaterPhotosTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "Soil_Rock_Results",
+        SoilRockResultsTransferResult,
+        "Soil_Rock_Results",
+        "Point_ID",
+        NMA_Soil_Rock_Results,
+        "nma_point_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            SoilRockResultsTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "WeatherPhotos",
+        WeatherPhotosTransferResult,
+        "WeatherPhotos",
+        "GlobalID",
+        NMA_WeatherPhotos,
+        "global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            WeatherPhotosTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "AssociatedData",
+        AssociatedDataTransferResult,
+        "AssociatedData",
+        "AssocID",
+        NMA_AssociatedData,
+        "nma_assoc_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            AssociatedDataTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "SurfaceWaterData",
+        SurfaceWaterDataTransferResult,
+        "SurfaceWaterData",
+        "OBJECTID",
+        NMA_SurfaceWaterData,
+        "object_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            SurfaceWaterDataTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "HydraulicsData",
+        HydraulicsDataTransferResult,
+        "HydraulicsData",
+        "GlobalID",
+        NMA_HydraulicsData,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            HydraulicsDataTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "Chemistry_SampleInfo",
+        ChemistrySampleInfoTransferResult,
+        "Chemistry_SampleInfo",
+        "SamplePtID",
+        NMA_Chemistry_SampleInfo,
+        "nma_sample_pt_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            ChemistrySampleInfoTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "view_NGWMN_WellConstruction",
+        NGWMNWellConstructionTransferResult,
+        "view_NGWMN_WellConstruction",
+        "PointID",
+        NMA_view_NGWMN_WellConstruction,
+        "point_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            NGWMNWellConstructionTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "view_NGWMN_WaterLevels",
+        NGWMNWaterLevelsTransferResult,
+        "view_NGWMN_WaterLevels",
+        "PointID",
+        NMA_view_NGWMN_WaterLevels,
+        "point_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            NGWMNWaterLevelsTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "view_NGWMN_Lithology",
+        NGWMNLithologyTransferResult,
+        "view_NGWMN_Lithology",
+        "PointID",
+        NMA_view_NGWMN_Lithology,
+        "point_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            NGWMNLithologyTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "WaterLevelsContinuous_Pressure_Daily",
+        PressureDailyTransferResult,
+        "WaterLevelsContinuous_Pressure_Daily",
+        "GlobalID",
+        NMA_WaterLevelsContinuous_Pressure_Daily,
+        "global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            NMA_WaterLevelsContinuous_Pressure_DailyTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "WeatherData",
+        WeatherDataTransferResult,
+        "WeatherData",
+        "OBJECTID",
+        NMA_WeatherData,
+        "object_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(WeatherDataTransferer),
+    ),
+    TransferComparisonSpec(
+        "Stratigraphy",
+        StratigraphyTransferResult,
+        "Stratigraphy",
+        "GlobalID",
+        NMA_Stratigraphy,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            StratigraphyLegacyTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "MajorChemistry",
+        MajorChemistryTransferResult,
+        "MajorChemistry",
+        "GlobalID",
+        NMA_MajorChemistry,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            MajorChemistryTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "Radionuclides",
+        RadionuclidesTransferResult,
+        "Radionuclides",
+        "GlobalID",
+        NMA_Radionuclides,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            RadionuclidesTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "MinorandTraceChemistry",
+        MinorTraceChemistryTransferResult,
+        "MinorandTraceChemistry",
+        "GlobalID",
+        NMA_MinorTraceChemistry,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            MinorTraceChemistryTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "FieldParameters",
+        FieldParametersTransferResult,
+        "FieldParameters",
+        "GlobalID",
+        NMA_FieldParameters,
+        "nma_global_id",
+        agreed_row_counter=lambda: _agreed_rows_from_transferer(
+            FieldParametersTransferer
+        ),
+    ),
+    TransferComparisonSpec(
+        "Springs",
+        SpringsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("SP"),
+        destination_where=lambda m: m.thing_type == "spring",
+        agreed_row_counter=lambda: _agreed_rows_location("SP"),
+    ),
+    TransferComparisonSpec(
+        "PerennialStreams",
+        PerennialStreamsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("PS"),
+        destination_where=lambda m: m.thing_type == "perennial stream",
+        agreed_row_counter=lambda: _agreed_rows_location("PS"),
+    ),
+    TransferComparisonSpec(
+        "EphemeralStreams",
+        EphemeralStreamsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("ES"),
+        destination_where=lambda m: m.thing_type == "ephemeral stream",
+        agreed_row_counter=lambda: _agreed_rows_location("ES"),
+    ),
+    TransferComparisonSpec(
+        "MetStations",
+        MetStationsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("M"),
+        destination_where=lambda m: m.thing_type == "meteorological station",
+        agreed_row_counter=lambda: _agreed_rows_location("M"),
+    ),
+    TransferComparisonSpec(
+        "RockSampleLocations",
+        RockSampleLocationsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("R"),
+        destination_where=lambda m: m.thing_type == "rock sample location",
+        agreed_row_counter=lambda: _agreed_rows_location("R"),
+    ),
+    TransferComparisonSpec(
+        "DiversionOfSurfaceWater",
+        DiversionOfSurfaceWaterTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("D"),
+        destination_where=lambda m: m.thing_type == "diversion of surface water, etc.",
+        agreed_row_counter=lambda: _agreed_rows_location("D"),
+    ),
+    TransferComparisonSpec(
+        "LakePondReservoir",
+        LakePondReservoirTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("L"),
+        destination_where=lambda m: m.thing_type == "lake, pond or reservoir",
+        agreed_row_counter=lambda: _agreed_rows_location("L"),
+    ),
+    TransferComparisonSpec(
+        "SoilGasSampleLocations",
+        SoilGasSampleLocationsTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("S"),
+        destination_where=lambda m: m.thing_type == "soil gas sample location",
+        agreed_row_counter=lambda: _agreed_rows_location("S"),
+    ),
+    TransferComparisonSpec(
+        "OtherSiteTypes",
+        OtherSiteTypesTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("OT"),
+        destination_where=lambda m: m.thing_type == "other",
+        agreed_row_counter=lambda: _agreed_rows_location("OT"),
+    ),
+    TransferComparisonSpec(
+        "OutfallWastewaterReturnFlow",
+        OutfallWastewaterReturnFlowTransferResult,
+        "Location",
+        "LocationId",
+        Thing,
+        "nma_pk_location",
+        source_filter=_location_site_filter("O"),
+        destination_where=lambda m: m.thing_type
+        == "outfall of wastewater or return flow",
+        agreed_row_counter=lambda: _agreed_rows_location("O"),
+    ),
+]
diff --git a/transfers/transfer_results_types.py b/transfers/transfer_results_types.py
new file mode 100644
index 00000000..dc58238a
--- /dev/null
+++ b/transfers/transfer_results_types.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class TransferResult:
+    transfer_name: str
+    source_csv: str
+    source_key_column: str
+    destination_model: str
+    destination_key_column: str
+    source_row_count: int = 0
+    agreed_transfer_row_count: int = 0
+    source_keyed_row_count: int = 0
+    source_key_count: int = 0
+    source_duplicate_key_row_count: int = 0
+    destination_row_count: int = 0
+    destination_keyed_row_count: int = 0
+    destination_key_count: int = 0
+    destination_duplicate_key_row_count: int = 0
+    matched_key_count: int = 0
+    missing_in_destination_count: int = 0
+    extra_in_destination_count: int = 0
+    missing_in_destination_sample: list[str] = field(default_factory=list)
+    extra_in_destination_sample: list[str] = field(default_factory=list)
+
+
+@dataclass
+class TransferComparisonResults:
+    generated_at: str
+    results: dict[str, TransferResult]
+
+
+_RESULT_CLASS_NAMES = [
+    "WellData",
+    "WellScreens",
+    "OwnersData",
+    "WaterLevels",
+    "Equipment",
+    "Projects",
+    "SurfaceWaterPhotos",
+    "SoilRockResults",
+    "WeatherPhotos",
+    "AssociatedData",
+    "SurfaceWaterData",
+    "HydraulicsData",
+    "ChemistrySampleInfo",
+    "NGWMNWellConstruction",
+    "NGWMNWaterLevels",
+    "NGWMNLithology",
+    "PressureDaily",
+    "WeatherData",
+    "Stratigraphy",
+    "MajorChemistry",
+    "Radionuclides",
+    "MinorTraceChemistry",
+    "FieldParameters",
+    "Springs",
+    "PerennialStreams",
+    "EphemeralStreams",
+    "MetStations",
+    "RockSampleLocations",
+    "DiversionOfSurfaceWater",
+    "LakePondReservoir",
+    "SoilGasSampleLocations",
+    "OtherSiteTypes",
+    "OutfallWastewaterReturnFlow",
+]
+
+for _name in _RESULT_CLASS_NAMES:
+    globals()[f"{_name}TransferResult"] = type(
+        f"{_name}TransferResult", (TransferResult,), {}
+    )
+
+
+__all__ = [
+    "TransferResult",
+    "TransferComparisonResults",
+    *[f"{name}TransferResult" for name in _RESULT_CLASS_NAMES],
+]
diff --git a/transfers/well_transfer_results.py b/transfers/well_transfer_results.py
deleted file mode 100644
index 555ab9f7..00000000
--- a/transfers/well_transfer_results.py
+++ /dev/null
@@ -1,332 +0,0 @@
-# ===============================================================================
-# Copyright 2026 ross
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ===============================================================================
-from __future__ import annotations
-
-import argparse
-import csv
-from dataclasses import dataclass
-from datetime import datetime
-from pathlib import Path
-
-import pandas as pd
-from sqlalchemy import select
-
-from db import Thing
-from db.engine import session_ctx
-from transfers.util import (
-    filter_non_transferred_wells,
-    get_transferable_wells,
-    read_csv,
-    replace_nans,
-)
-
-
-@dataclass
-class ValidationIssue:
-    pointid: str
-    table: str
-    field: str
-    error: str
-
-
-@dataclass
-class WellTransferResults:
-    source_count: int
-    committed_count: int
-    transferred_count: int
-    skipped_by_decision: list[str]
-    validation_issue_wells: list[str]
-    validation_issues: list[ValidationIssue]
-    metrics_file: Path | None
-    skipped_by_existing_destination: list[str]
-
-
-class WellTransferResultsBuilder:
-    """Build well transfer outcome summaries by comparing source and destination."""
-
-    def __init__(
-        self,
-        pointids: list[str] | None = None,
-        metrics_file: Path | None = None,
-        output_dir: Path | None = None,
-    ):
-        self.pointids = set(pointids or [])
-        self.metrics_file = metrics_file
-        self.output_dir = output_dir or (Path("transfers") / "metrics")
-
-    def build(self) -> WellTransferResults:
-        source_df = self._load_source_wells()
-        committed_df = self._load_committed_wells(source_df)
-        committed_without_existing_df = filter_non_transferred_wells(committed_df)
-
-        source_ids = self._point_ids(source_df)
-        committed_ids = self._point_ids(committed_df)
-        committed_without_existing_ids = self._point_ids(committed_without_existing_df)
-        destination_ids = self._load_destination_ids()
-
-        skipped_by_decision = sorted(source_ids - committed_ids)
-        skipped_by_existing_destination = sorted(
-            committed_ids - committed_without_existing_ids
-        )
-        transferred_ids = committed_ids & destination_ids
-        missing_committed_ids = committed_ids - transferred_ids
-
-        validation_issues = self._load_well_validation_issues(
-            self._resolve_metrics_file()
-        )
-        validation_issue_ids = {
-            issue.pointid for issue in validation_issues if issue.pointid in source_ids
-        }
-        validation_issue_wells = sorted(validation_issue_ids & missing_committed_ids)
-
-        return WellTransferResults(
-            source_count=len(source_ids),
-            committed_count=len(committed_ids),
-            transferred_count=len(transferred_ids),
-            skipped_by_decision=skipped_by_decision,
-            validation_issue_wells=validation_issue_wells,
-            validation_issues=validation_issues,
-            metrics_file=self._resolve_metrics_file(),
-            skipped_by_existing_destination=skipped_by_existing_destination,
-        )
-
-    def write_reports(self, results: WellTransferResults) -> dict[str, Path]:
-        self.output_dir.mkdir(parents=True, exist_ok=True)
-        stamp = datetime.now().strftime("%Y-%m-%dT%H_%M_%S")
-
-        summary_path = self.output_dir / f"well_transfer_results_{stamp}.txt"
-        not_migrated_path = self.output_dir / f"wells_not_migrated_{stamp}.csv"
-        validation_path = self.output_dir / f"wells_validation_issues_{stamp}.csv"
-        already_exists_path = (
-            self.output_dir / f"wells_already_in_destination_{stamp}.csv"
-        )
-
-        summary_lines = [
-            "Well Transfer Results",
-            f"source_count={results.source_count}",
-            f"committed_count={results.committed_count}",
-            f"transferred_count={results.transferred_count}",
-            f"not_transferred_by_decision_count={len(results.skipped_by_decision)}",
-            f"not_transferred_validation_count={len(results.validation_issue_wells)}",
-            (
-                f"already_in_destination_count="
-                f"{len(results.skipped_by_existing_destination)}"
-            ),
-            (
-                f"metrics_file={results.metrics_file}"
-                if results.metrics_file
-                else "metrics_file=None"
-            ),
-        ]
-        summary_path.write_text("\n".join(summary_lines) + "\n")
-
-        self._write_pointids(not_migrated_path, "pointid", results.skipped_by_decision)
-        self._write_pointids(
-            already_exists_path, "pointid", results.skipped_by_existing_destination
-        )
-        self._write_validation_issues(
-            validation_path,
-            [
-                issue
-                for issue in results.validation_issues
-                if issue.pointid in set(results.validation_issue_wells)
-            ],
-        )
-
-        return {
-            "summary": summary_path,
-            "not_migrated": not_migrated_path,
-            "validation_issues": validation_path,
-            "already_in_destination": already_exists_path,
-        }
-
-    def _load_source_wells(self) -> pd.DataFrame:
-        wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
-        ldf = read_csv("Location")
-        ldf = ldf.drop(columns=["PointID", "SSMA_TimeStamp"], errors="ignore")
-        wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
-
-        wdf = wdf[wdf["SiteType"] == "GW"]
-        wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
-        wdf = replace_nans(wdf)
-
-        if self.pointids:
-            wdf = wdf[wdf["PointID"].isin(self.pointids)]
-
-        return wdf
-
-    def _load_committed_wells(self, source_df: pd.DataFrame) -> pd.DataFrame:
-        committed_df = get_transferable_wells(source_df)
-        if self.pointids:
-            committed_df = committed_df[committed_df["PointID"].isin(self.pointids)]
-
-        duplicates = committed_df["PointID"].duplicated(keep=False)
-        if duplicates.any():
-            duplicate_ids = set(committed_df.loc[duplicates, "PointID"].tolist())
-            committed_df = committed_df[~committed_df["PointID"].isin(duplicate_ids)]
-
-        return committed_df.sort_values("PointID")
-
-    @staticmethod
-    def _point_ids(df: pd.DataFrame) -> set[str]:
-        if df.empty:
-            return set()
-        return set(df["PointID"].dropna().astype(str).unique().tolist())
-
-    def _load_destination_ids(self) -> set[str]:
-        with session_ctx() as session:
-            ids = session.execute(
-                select(Thing.name).where(Thing.thing_type == "water well")
-            ).scalars()
-            thing_names = {str(name) for name in ids if name}
-
-        if self.pointids:
-            thing_names = thing_names & self.pointids
-
-        return thing_names
-
-    def _resolve_metrics_file(self) -> Path | None:
-        if self.metrics_file:
-            return self.metrics_file
-
-        metrics_dir = Path("transfers") / "metrics"
-        candidates = sorted(
-            metrics_dir.glob("metrics_*.csv"), key=lambda p: p.stat().st_mtime
-        )
-        if not candidates:
-            return None
-        return candidates[-1]
-
-    @staticmethod
-    def _load_well_validation_issues(
-        metrics_file: Path | None,
-    ) -> list[ValidationIssue]:
-        if metrics_file is None or not metrics_file.exists():
-            return []
-
-        issues: list[ValidationIssue] = []
-        current_model: str | None = None
-        with metrics_file.open(newline="") as f:
-            reader = csv.reader(f, delimiter="|")
-            for row in reader:
-                if not row:
-                    continue
-
-                if len(row) >= 5 and row[0] not in {"model", "PointID"}:
-                    current_model = row[0]
-                    continue
-
-                if row[0] == "PointID":
-                    continue
-
-                if len(row) < 4:
-                    continue
-
-                if current_model != "Well":
-                    continue
-
-                pointid, table, field, error = row[0], row[1], row[2], row[3]
-                if table != "WellData":
-                    continue
-                if "Validation Error" not in error:
-                    continue
-                issues.append(
-                    ValidationIssue(
-                        pointid=pointid,
-                        table=table,
-                        field=field,
-                        error=error,
-                    )
-                )
-        return issues
-
-    @staticmethod
-    def _write_pointids(path: Path, header: str, pointids: list[str]) -> None:
-        with path.open("w", newline="") as f:
-            writer = csv.writer(f)
-            writer.writerow([header])
-            for pointid in pointids:
-                writer.writerow([pointid])
-
-    @staticmethod
-    def _write_validation_issues(path: Path, issues: list[ValidationIssue]) -> None:
-        with path.open("w", newline="") as f:
-            writer = csv.writer(f)
-            writer.writerow(["pointid", "table", "field", "error"])
-            for issue in issues:
-                writer.writerow([issue.pointid, issue.table, issue.field, issue.error])
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(description="Build transfer results for wells.")
-    parser.add_argument(
-        "--metrics-file",
-        type=Path,
-        default=None,
-        help="Optional metrics CSV to use for validation issue extraction.",
-    )
-    parser.add_argument(
-        "--output-dir",
-        type=Path,
-        default=Path("transfers") / "metrics",
-        help="Directory where result files are written.",
-    )
-    parser.add_argument(
-        "--pointids",
-        default=None,
-        help="Optional comma-separated list of PointID values to scope the report.",
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = _parse_args()
-    pointids = args.pointids.split(",") if args.pointids else None
-    builder = WellTransferResultsBuilder(
-        pointids=pointids,
-        metrics_file=args.metrics_file,
-        output_dir=args.output_dir,
-    )
-    results = builder.build()
-    outputs = builder.write_reports(results)
-
-    print(f"Source wells: {results.source_count}")
-    print(f"Committed to migrate: {results.committed_count}")
-    print(f"Successfully transferred: {results.transferred_count}")
-    print(
-        f"Not transferred (decided not to migrate): {len(results.skipped_by_decision)}"
-    )
-    print(f"Not transferred (validation issues): {len(results.validation_issue_wells)}")
-    print(
-        f"Already in destination before migration filter: "
-        f"{len(results.skipped_by_existing_destination)}"
-    )
-    print(f"Summary file: {outputs['summary']}")
-    print(f"Not migrated wells file: {outputs['not_migrated']}")
-    print(f"Validation issue wells file: {outputs['validation_issues']}")
-    print(f"Already-in-destination wells file: {outputs['already_in_destination']}")
-
-    print("\nWells not transferred (decided not to migrate):")
-    for pointid in results.skipped_by_decision:
-        print(pointid)
-
-    print("\nWells not transferred (data validation issues):")
-    for pointid in results.validation_issue_wells:
-        print(pointid)
-
-
-if __name__ == "__main__":
-    main()

From cfb576e226bdab534c09de0c7d5d358044f0d1ef Mon Sep 17 00:00:00 2001
From: jirhiker <2035568+jirhiker@users.noreply.github.com>
Date: Fri, 20 Feb 2026 00:05:14 +0000
Subject: [PATCH 03/14] Formatting changes

---
 transfers/transfer_results.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/transfers/transfer_results.py b/transfers/transfer_results.py
index 0483e7fd..36337d52 100644
--- a/transfers/transfer_results.py
+++ b/transfers/transfer_results.py
@@ -10,7 +10,6 @@
 )
 from transfers.transfer_results_types import *  # noqa: F401,F403
 
-
 __all__ = [
     "TransferResultsBuilder",
     "TransferComparisonSpec",

From 2d4d8ff185690ef10e79ca2b9715511d47ef5e30 Mon Sep 17 00:00:00 2001
From: jross <jake.ross@nmt.edu>
Date: Thu, 19 Feb 2026 17:27:48 -0700
Subject: [PATCH 04/14] feat(migrations): make NMA_SurfaceWaterData.thing_id
 nullable

---
 ...ke_surface_water_data_thing_id_nullable.py |  57 +++++++
 db/nma_legacy.py                              |  19 +--
 transfers/surface_water_data.py               |  18 +--
 transfers/transfer_results_builder.py         |  58 ++++++--
 transfers/transfer_results_specs.py           | 139 +++++-------------
 5 files changed, 143 insertions(+), 148 deletions(-)
 create mode 100644 alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py

diff --git a/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py b/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py
new file mode 100644
index 00000000..0b0f00a2
--- /dev/null
+++ b/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py
@@ -0,0 +1,57 @@
+"""Make NMA_SurfaceWaterData.thing_id nullable.
+
+Revision ID: i2c3d4e5f6a7
+Revises: f1a2b3c4d5e6
+Create Date: 2026-02-20 17:40:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy import inspect
+
+# revision identifiers, used by Alembic.
+revision: str = "i2c3d4e5f6a7"
+down_revision: Union[str, Sequence[str], None] = "f1a2b3c4d5e6"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Allow orphan legacy SurfaceWaterData rows without a mapped Thing."""
+    bind = op.get_bind()
+    inspector = inspect(bind)
+    if not inspector.has_table("NMA_SurfaceWaterData"):
+        return
+
+    columns = {col["name"] for col in inspector.get_columns("NMA_SurfaceWaterData")}
+    if "thing_id" not in columns:
+        return
+
+    op.alter_column(
+        "NMA_SurfaceWaterData",
+        "thing_id",
+        existing_type=sa.Integer(),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    """Revert to NOT NULL only when no null thing_id values exist."""
+    bind = op.get_bind()
+    inspector = inspect(bind)
+    if not inspector.has_table("NMA_SurfaceWaterData"):
+        return
+
+    columns = {col["name"] for col in inspector.get_columns("NMA_SurfaceWaterData")}
+    if "thing_id" not in columns:
+        return
+
+    op.execute('DELETE FROM "NMA_SurfaceWaterData" WHERE thing_id IS NULL')
+    op.alter_column(
+        "NMA_SurfaceWaterData",
+        "thing_id",
+        existing_type=sa.Integer(),
+        nullable=False,
+    )
diff --git a/db/nma_legacy.py b/db/nma_legacy.py
index cab2014e..8c01eae6 100644
--- a/db/nma_legacy.py
+++ b/db/nma_legacy.py
@@ -578,9 +578,9 @@ class NMA_SurfaceWaterData(Base):
     object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True)
 
     # FK
-    # FK to Thing - required for all SurfaceWaterData records
-    thing_id: Mapped[int] = mapped_column(
-        Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False
+    # FK to Thing - optional when legacy rows cannot be mapped to a Thing.
+    thing_id: Mapped[Optional[int]] = mapped_column(
+        Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=True
     )
 
     # Legacy PK (for audit)
@@ -615,16 +615,9 @@ class NMA_SurfaceWaterData(Base):
     data_source: Mapped[Optional[str]] = mapped_column("DataSource", String(255))
 
     # Relationships
-    thing: Mapped["Thing"] = relationship("Thing", back_populates="surface_water_data")
-
-    @validates("thing_id")
-    def validate_thing_id(self, key, value):
-        """Prevent orphan NMA_SurfaceWaterData - must have a parent Thing."""
-        if value is None:
-            raise ValueError(
-                "NMA_SurfaceWaterData requires a parent Thing (thing_id cannot be None)"
-            )
-        return value
+    thing: Mapped[Optional["Thing"]] = relationship(
+        "Thing", back_populates="surface_water_data"
+    )
 
 
 class NMA_SurfaceWaterPhotos(Base):
diff --git a/transfers/surface_water_data.py b/transfers/surface_water_data.py
index 9b4a6e32..e4e8a908 100644
--- a/transfers/surface_water_data.py
+++ b/transfers/surface_water_data.py
@@ -62,22 +62,12 @@ def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
 
     def _transfer_hook(self, session: Session) -> None:
         rows: list[dict[str, Any]] = []
-        skipped_missing_thing = 0
         for raw in self.cleaned_df.to_dict("records"):
             record = self._row_dict(raw)
-            if record is None:
-                skipped_missing_thing += 1
-                continue
             rows.append(record)
 
         rows = self._dedupe_rows(rows, key="OBJECTID", include_missing=True)
 
-        if skipped_missing_thing:
-            logger.warning(
-                "Skipped %s SurfaceWaterData rows without matching Thing",
-                skipped_missing_thing,
-            )
-
         insert_stmt = insert(NMA_SurfaceWaterData)
         excluded = insert_stmt.excluded
 
@@ -111,7 +101,7 @@ def _transfer_hook(self, session: Session) -> None:
             session.commit()
             session.expunge_all()
 
-    def _row_dict(self, row: dict[str, Any]) -> Optional[dict[str, Any]]:
+    def _row_dict(self, row: dict[str, Any]) -> dict[str, Any]:
         def val(key: str) -> Optional[Any]:
             v = row.get(key)
             if pd.isna(v):
@@ -133,12 +123,6 @@ def to_uuid(v: Any) -> Optional[uuid.UUID]:
 
         location_id = to_uuid(val("LocationId"))
         thing_id = self._resolve_thing_id(location_id)
-        if thing_id is None:
-            logger.warning(
-                "Skipping SurfaceWaterData LocationId=%s - Thing not found",
-                location_id,
-            )
-            return None
 
         return {
             "LocationId": location_id,
diff --git a/transfers/transfer_results_builder.py b/transfers/transfer_results_builder.py
index a8e384a7..15ba47c8 100644
--- a/transfers/transfer_results_builder.py
+++ b/transfers/transfer_results_builder.py
@@ -7,6 +7,7 @@
 from sqlalchemy import select, func
 
 from db.engine import session_ctx
+from transfers.transfer import load_transfer_options
 from transfers.transfer_results_specs import (
     TRANSFER_COMPARISON_SPECS,
     TransferComparisonSpec,
@@ -15,7 +16,12 @@
     TransferComparisonResults,
     TransferResult,
 )
-from transfers.util import read_csv
+from transfers.util import (
+    read_csv,
+    replace_nans,
+    get_transferable_wells,
+)
+import os
 
 
 def _normalize_key(value: Any) -> str | None:
@@ -56,6 +62,8 @@ class TransferResultsBuilder:
 
     def __init__(self, sample_limit: int = 25):
         self.sample_limit = sample_limit
+        self.transfer_options = load_transfer_options()
+        self.transfer_limit = int(os.getenv("TRANSFER_LIMIT", "1000"))
 
     def build(self) -> TransferComparisonResults:
         results: dict[str, TransferResult] = {}
@@ -70,16 +78,18 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
         source_df = read_csv(spec.source_csv)
         if spec.source_filter:
             source_df = spec.source_filter(source_df)
-        source_series = _normalized_series(source_df, spec.source_key_column)
+        comparison_df = source_df
+        enabled = self._is_enabled(spec)
+        if not enabled:
+            comparison_df = source_df.iloc[0:0]
+        elif spec.transfer_name == "WellData":
+            comparison_df = self._agreed_welldata_df()
+
+        source_series = _normalized_series(comparison_df, spec.source_key_column)
         source_keys = set(source_series.unique().tolist())
         source_keyed_row_count = int(source_series.shape[0])
         source_duplicate_key_row_count = source_keyed_row_count - len(source_keys)
-        agreed_transfer_row_count = int(len(source_df))
-        if spec.agreed_row_counter is not None:
-            try:
-                agreed_transfer_row_count = int(spec.agreed_row_counter())
-            except Exception:
-                agreed_transfer_row_count = int(len(source_df))
+        agreed_transfer_row_count = int(len(comparison_df))
 
         model = spec.destination_model
         key_col = getattr(model, spec.destination_key_column)
@@ -134,20 +144,44 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
             extra_in_destination_sample=extra[: self.sample_limit],
         )
 
+    def _is_enabled(self, spec: TransferComparisonSpec) -> bool:
+        if not spec.option_field:
+            return True
+        return bool(getattr(self.transfer_options, spec.option_field, True))
+
+    def _agreed_welldata_df(self) -> pd.DataFrame:
+        wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+        ldf = read_csv("Location")
+        ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1, errors="ignore")
+        wdf = wdf.join(ldf.set_index("LocationId"), on="LocationId")
+        wdf = wdf[wdf["SiteType"] == "GW"]
+        wdf = wdf[wdf["Easting"].notna() & wdf["Northing"].notna()]
+        wdf = replace_nans(wdf)
+
+        cleaned_df = get_transferable_wells(wdf)
+
+        dupes = cleaned_df["PointID"].duplicated(keep=False)
+        if dupes.any():
+            dup_ids = set(cleaned_df.loc[dupes, "PointID"])
+            cleaned_df = cleaned_df[~cleaned_df["PointID"].isin(dup_ids)]
+
+        if self.transfer_limit > 0:
+            cleaned_df = cleaned_df.head(self.transfer_limit)
+        return cleaned_df
+
     @staticmethod
     def write_summary(path: Path, comparison: TransferComparisonResults) -> None:
         lines = [
             f"generated_at={comparison.generated_at}",
             "",
-            "| Transfer | Source CSV | Source Rows | Agreed Rows | Dest Model | Dest Rows | Missing Agreed | Matched | Missing | Extra |",
-            "|---|---|---:|---:|---|---:|---:|---:|---:|---:|",
+            "| Transfer | Source CSV | Source Rows | Agreed Rows | Dest Model | Dest Rows | Missing Agreed |",
+            "|---|---|---:|---:|---|---:|---:|",
         ]
         for name in sorted(comparison.results.keys()):
             r = comparison.results[name]
             missing_agreed = r.agreed_transfer_row_count - r.destination_row_count
             lines.append(
                 f"| {name} | {r.source_csv} | {r.source_row_count} | {r.agreed_transfer_row_count} | "
-                f"{r.destination_model} | {r.destination_row_count} | {missing_agreed} | "
-                f"{r.matched_key_count} | {r.missing_in_destination_count} | {r.extra_in_destination_count} |"
+                f"{r.destination_model} | {r.destination_row_count} | {missing_agreed} |"
             )
         path.write_text("\n".join(lines) + "\n")
diff --git a/transfers/transfer_results_specs.py b/transfers/transfer_results_specs.py
index f86e13b7..3cfd7c05 100644
--- a/transfers/transfer_results_specs.py
+++ b/transfers/transfer_results_specs.py
@@ -5,33 +5,6 @@
 
 import pandas as pd
 
-from transfers.associated_data import AssociatedDataTransferer
-from transfers.chemistry_sampleinfo import ChemistrySampleInfoTransferer
-from transfers.contact_transfer import ContactTransfer
-from transfers.field_parameters_transfer import FieldParametersTransferer
-from transfers.group_transfer import ProjectGroupTransferer
-from transfers.hydraulicsdata import HydraulicsDataTransferer
-from transfers.major_chemistry import MajorChemistryTransferer
-from transfers.minor_trace_chemistry_transfer import MinorTraceChemistryTransferer
-from transfers.ngwmn_views import (
-    NGWMNLithologyTransferer,
-    NGWMNWaterLevelsTransferer,
-    NGWMNWellConstructionTransferer,
-)
-from transfers.radionuclides import RadionuclidesTransferer
-from transfers.sensor_transfer import SensorTransferer
-from transfers.soil_rock_results import SoilRockResultsTransferer
-from transfers.stratigraphy_legacy import StratigraphyLegacyTransferer
-from transfers.surface_water_data import SurfaceWaterDataTransferer
-from transfers.surface_water_photos import SurfaceWaterPhotosTransferer
-from transfers.util import read_csv
-from transfers.waterlevels_transfer import WaterLevelTransferer
-from transfers.waterlevelscontinuous_pressure_daily import (
-    NMA_WaterLevelsContinuous_Pressure_DailyTransferer,
-)
-from transfers.weather_data import WeatherDataTransferer
-from transfers.weather_photos import WeatherPhotosTransferer
-from transfers.well_transfer import WellScreenTransferer, WellTransferer
 from db import (
     Contact,
     Group,
@@ -105,7 +78,7 @@ class TransferComparisonSpec:
     destination_key_column: str
     source_filter: Callable[[pd.DataFrame], pd.DataFrame] | None = None
     destination_where: Callable[[Any], Any] | None = None
-    agreed_row_counter: Callable[[], int] | None = None
+    option_field: str | None = None
 
 
 def _location_site_filter(site_type: str) -> Callable[[pd.DataFrame], pd.DataFrame]:
@@ -117,19 +90,6 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
     return _f
 
 
-def _agreed_rows_from_transferer(transferer_cls) -> int:
-    transferer = transferer_cls()
-    _, cleaned_df = transferer._get_dfs()
-    return int(len(cleaned_df))
-
-
-def _agreed_rows_location(site_type: str) -> int:
-    df = read_csv("Location")
-    df = df[df["SiteType"] == site_type]
-    df = df[df["Easting"].notna() & df["Northing"].notna()]
-    return int(len(df))
-
-
 TRANSFER_COMPARISON_SPECS: list[TransferComparisonSpec] = [
     TransferComparisonSpec(
         "WellData",
@@ -139,7 +99,6 @@ def _agreed_rows_location(site_type: str) -> int:
         Thing,
         "nma_pk_welldata",
         destination_where=lambda m: m.thing_type == "water well",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(WellTransferer),
     ),
     TransferComparisonSpec(
         "WellScreens",
@@ -148,7 +107,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         WellScreen,
         "nma_pk_wellscreens",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(WellScreenTransferer),
+        option_field="transfer_screens",
     ),
     TransferComparisonSpec(
         "OwnersData",
@@ -157,7 +116,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "OwnerKey",
         Contact,
         "nma_pk_owners",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(ContactTransfer),
+        option_field="transfer_contacts",
     ),
     TransferComparisonSpec(
         "WaterLevels",
@@ -166,7 +125,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         Observation,
         "nma_pk_waterlevels",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(WaterLevelTransferer),
+        option_field="transfer_waterlevels",
     ),
     TransferComparisonSpec(
         "Equipment",
@@ -175,7 +134,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         Sensor,
         "nma_pk_equipment",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(SensorTransferer),
+        option_field="transfer_sensors",
     ),
     TransferComparisonSpec(
         "Projects",
@@ -184,7 +143,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "Project",
         Group,
         "name",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(ProjectGroupTransferer),
+        option_field="transfer_groups",
     ),
     TransferComparisonSpec(
         "SurfaceWaterPhotos",
@@ -193,9 +152,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_SurfaceWaterPhotos,
         "global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            SurfaceWaterPhotosTransferer
-        ),
+        option_field="transfer_surface_water_photos",
     ),
     TransferComparisonSpec(
         "Soil_Rock_Results",
@@ -204,9 +161,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "Point_ID",
         NMA_Soil_Rock_Results,
         "nma_point_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            SoilRockResultsTransferer
-        ),
+        option_field="transfer_soil_rock_results",
     ),
     TransferComparisonSpec(
         "WeatherPhotos",
@@ -215,9 +170,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_WeatherPhotos,
         "global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            WeatherPhotosTransferer
-        ),
+        option_field="transfer_weather_photos",
     ),
     TransferComparisonSpec(
         "AssociatedData",
@@ -226,9 +179,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "AssocID",
         NMA_AssociatedData,
         "nma_assoc_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            AssociatedDataTransferer
-        ),
+        option_field="transfer_associated_data",
     ),
     TransferComparisonSpec(
         "SurfaceWaterData",
@@ -237,9 +188,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "OBJECTID",
         NMA_SurfaceWaterData,
         "object_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            SurfaceWaterDataTransferer
-        ),
+        option_field="transfer_surface_water_data",
     ),
     TransferComparisonSpec(
         "HydraulicsData",
@@ -248,9 +197,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_HydraulicsData,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            HydraulicsDataTransferer
-        ),
+        option_field="transfer_hydraulics_data",
     ),
     TransferComparisonSpec(
         "Chemistry_SampleInfo",
@@ -259,9 +206,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "SamplePtID",
         NMA_Chemistry_SampleInfo,
         "nma_sample_pt_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            ChemistrySampleInfoTransferer
-        ),
+        option_field="transfer_chemistry_sampleinfo",
     ),
     TransferComparisonSpec(
         "view_NGWMN_WellConstruction",
@@ -270,9 +215,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "PointID",
         NMA_view_NGWMN_WellConstruction,
         "point_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            NGWMNWellConstructionTransferer
-        ),
+        option_field="transfer_ngwmn_views",
     ),
     TransferComparisonSpec(
         "view_NGWMN_WaterLevels",
@@ -281,9 +224,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "PointID",
         NMA_view_NGWMN_WaterLevels,
         "point_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            NGWMNWaterLevelsTransferer
-        ),
+        option_field="transfer_ngwmn_views",
     ),
     TransferComparisonSpec(
         "view_NGWMN_Lithology",
@@ -292,9 +233,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "PointID",
         NMA_view_NGWMN_Lithology,
         "point_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            NGWMNLithologyTransferer
-        ),
+        option_field="transfer_ngwmn_views",
     ),
     TransferComparisonSpec(
         "WaterLevelsContinuous_Pressure_Daily",
@@ -303,9 +242,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_WaterLevelsContinuous_Pressure_Daily,
         "global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            NMA_WaterLevelsContinuous_Pressure_DailyTransferer
-        ),
+        option_field="transfer_pressure_daily",
     ),
     TransferComparisonSpec(
         "WeatherData",
@@ -314,7 +251,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "OBJECTID",
         NMA_WeatherData,
         "object_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(WeatherDataTransferer),
+        option_field="transfer_weather_data",
     ),
     TransferComparisonSpec(
         "Stratigraphy",
@@ -323,9 +260,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_Stratigraphy,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            StratigraphyLegacyTransferer
-        ),
+        option_field="transfer_nma_stratigraphy",
     ),
     TransferComparisonSpec(
         "MajorChemistry",
@@ -334,9 +269,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_MajorChemistry,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            MajorChemistryTransferer
-        ),
+        option_field="transfer_major_chemistry",
     ),
     TransferComparisonSpec(
         "Radionuclides",
@@ -345,9 +278,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_Radionuclides,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            RadionuclidesTransferer
-        ),
+        option_field="transfer_radionuclides",
     ),
     TransferComparisonSpec(
         "MinorandTraceChemistry",
@@ -356,9 +287,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_MinorTraceChemistry,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            MinorTraceChemistryTransferer
-        ),
+        option_field="transfer_minor_trace_chemistry",
     ),
     TransferComparisonSpec(
         "FieldParameters",
@@ -367,9 +296,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "GlobalID",
         NMA_FieldParameters,
         "nma_global_id",
-        agreed_row_counter=lambda: _agreed_rows_from_transferer(
-            FieldParametersTransferer
-        ),
+        option_field="transfer_field_parameters",
     ),
     TransferComparisonSpec(
         "Springs",
@@ -380,7 +307,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("SP"),
         destination_where=lambda m: m.thing_type == "spring",
-        agreed_row_counter=lambda: _agreed_rows_location("SP"),
+        option_field="transfer_springs",
     ),
     TransferComparisonSpec(
         "PerennialStreams",
@@ -391,7 +318,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("PS"),
         destination_where=lambda m: m.thing_type == "perennial stream",
-        agreed_row_counter=lambda: _agreed_rows_location("PS"),
+        option_field="transfer_perennial_streams",
     ),
     TransferComparisonSpec(
         "EphemeralStreams",
@@ -402,7 +329,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("ES"),
         destination_where=lambda m: m.thing_type == "ephemeral stream",
-        agreed_row_counter=lambda: _agreed_rows_location("ES"),
+        option_field="transfer_ephemeral_streams",
     ),
     TransferComparisonSpec(
         "MetStations",
@@ -413,7 +340,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("M"),
         destination_where=lambda m: m.thing_type == "meteorological station",
-        agreed_row_counter=lambda: _agreed_rows_location("M"),
+        option_field="transfer_met_stations",
     ),
     TransferComparisonSpec(
         "RockSampleLocations",
@@ -424,7 +351,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("R"),
         destination_where=lambda m: m.thing_type == "rock sample location",
-        agreed_row_counter=lambda: _agreed_rows_location("R"),
+        option_field="transfer_rock_sample_locations",
     ),
     TransferComparisonSpec(
         "DiversionOfSurfaceWater",
@@ -435,7 +362,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("D"),
         destination_where=lambda m: m.thing_type == "diversion of surface water, etc.",
-        agreed_row_counter=lambda: _agreed_rows_location("D"),
+        option_field="transfer_diversion_of_surface_water",
     ),
     TransferComparisonSpec(
         "LakePondReservoir",
@@ -446,7 +373,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("L"),
         destination_where=lambda m: m.thing_type == "lake, pond or reservoir",
-        agreed_row_counter=lambda: _agreed_rows_location("L"),
+        option_field="transfer_lake_pond_reservoir",
     ),
     TransferComparisonSpec(
         "SoilGasSampleLocations",
@@ -457,7 +384,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("S"),
         destination_where=lambda m: m.thing_type == "soil gas sample location",
-        agreed_row_counter=lambda: _agreed_rows_location("S"),
+        option_field="transfer_soil_gas_sample_locations",
     ),
     TransferComparisonSpec(
         "OtherSiteTypes",
@@ -468,7 +395,7 @@ def _agreed_rows_location(site_type: str) -> int:
         "nma_pk_location",
         source_filter=_location_site_filter("OT"),
         destination_where=lambda m: m.thing_type == "other",
-        agreed_row_counter=lambda: _agreed_rows_location("OT"),
+        option_field="transfer_other_site_types",
     ),
     TransferComparisonSpec(
         "OutfallWastewaterReturnFlow",
@@ -480,6 +407,6 @@ def _agreed_rows_location(site_type: str) -> int:
         source_filter=_location_site_filter("O"),
         destination_where=lambda m: m.thing_type
         == "outfall of wastewater or return flow",
-        agreed_row_counter=lambda: _agreed_rows_location("O"),
+        option_field="transfer_outfall_wastewater_return_flow",
     ),
 ]

From ba7881bccf444a643ac5aae17a38c5e2597e5d63 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Thu, 19 Feb 2026 18:27:02 -0700
Subject: [PATCH 05/14] fix: enforce required thing_id for NMA_SurfaceWaterData
 and add validation

---
 ...ke_surface_water_data_thing_id_nullable.py | 57 -------------------
 db/nma_legacy.py                              | 19 +++++--
 2 files changed, 13 insertions(+), 63 deletions(-)
 delete mode 100644 alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py

diff --git a/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py b/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py
deleted file mode 100644
index 0b0f00a2..00000000
--- a/alembic/versions/i2c3d4e5f6a7_make_surface_water_data_thing_id_nullable.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Make NMA_SurfaceWaterData.thing_id nullable.
-
-Revision ID: i2c3d4e5f6a7
-Revises: f1a2b3c4d5e6
-Create Date: 2026-02-20 17:40:00.000000
-"""
-
-from typing import Sequence, Union
-
-import sqlalchemy as sa
-from alembic import op
-from sqlalchemy import inspect
-
-# revision identifiers, used by Alembic.
-revision: str = "i2c3d4e5f6a7"
-down_revision: Union[str, Sequence[str], None] = "f1a2b3c4d5e6"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    """Allow orphan legacy SurfaceWaterData rows without a mapped Thing."""
-    bind = op.get_bind()
-    inspector = inspect(bind)
-    if not inspector.has_table("NMA_SurfaceWaterData"):
-        return
-
-    columns = {col["name"] for col in inspector.get_columns("NMA_SurfaceWaterData")}
-    if "thing_id" not in columns:
-        return
-
-    op.alter_column(
-        "NMA_SurfaceWaterData",
-        "thing_id",
-        existing_type=sa.Integer(),
-        nullable=True,
-    )
-
-
-def downgrade() -> None:
-    """Revert to NOT NULL only when no null thing_id values exist."""
-    bind = op.get_bind()
-    inspector = inspect(bind)
-    if not inspector.has_table("NMA_SurfaceWaterData"):
-        return
-
-    columns = {col["name"] for col in inspector.get_columns("NMA_SurfaceWaterData")}
-    if "thing_id" not in columns:
-        return
-
-    op.execute('DELETE FROM "NMA_SurfaceWaterData" WHERE thing_id IS NULL')
-    op.alter_column(
-        "NMA_SurfaceWaterData",
-        "thing_id",
-        existing_type=sa.Integer(),
-        nullable=False,
-    )
diff --git a/db/nma_legacy.py b/db/nma_legacy.py
index 8c01eae6..cab2014e 100644
--- a/db/nma_legacy.py
+++ b/db/nma_legacy.py
@@ -578,9 +578,9 @@ class NMA_SurfaceWaterData(Base):
     object_id: Mapped[int] = mapped_column("OBJECTID", Integer, primary_key=True)
 
     # FK
-    # FK to Thing - optional when legacy rows cannot be mapped to a Thing.
-    thing_id: Mapped[Optional[int]] = mapped_column(
-        Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=True
+    # FK to Thing - required for all SurfaceWaterData records
+    thing_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("thing.id", ondelete="CASCADE"), nullable=False
     )
 
     # Legacy PK (for audit)
@@ -615,9 +615,16 @@ class NMA_SurfaceWaterData(Base):
     data_source: Mapped[Optional[str]] = mapped_column("DataSource", String(255))
 
     # Relationships
-    thing: Mapped[Optional["Thing"]] = relationship(
-        "Thing", back_populates="surface_water_data"
-    )
+    thing: Mapped["Thing"] = relationship("Thing", back_populates="surface_water_data")
+
+    @validates("thing_id")
+    def validate_thing_id(self, key, value):
+        """Prevent orphan NMA_SurfaceWaterData - must have a parent Thing."""
+        if value is None:
+            raise ValueError(
+                "NMA_SurfaceWaterData requires a parent Thing (thing_id cannot be None)"
+            )
+        return value
 
 
 class NMA_SurfaceWaterPhotos(Base):

From b4764b2e9e06d93fdf536b8e38a3bf058f8ee215 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Thu, 19 Feb 2026 20:30:57 -0700
Subject: [PATCH 06/14] feat: add transfer-results command for generating
 transfer results summary

---
 ...e6_merge_migrations_after_staging_merge.py |  25 --
 ...add_unique_index_ngwmn_wellconstruction.py |   4 +-
 cli/cli.py                                    |  27 ++
 pyproject.toml                                |   2 +-
 tests/test_cli_commands.py                    |  56 ++-
 transfers/transfer.py                         |   8 +-
 transfers/transfer_results.py                 |  50 ---
 transfers/transfer_results_builder.py         |   9 +-
 transfers/transfer_results_specs.py           | 322 ++++++++++++++++++
 9 files changed, 414 insertions(+), 89 deletions(-)
 delete mode 100644 alembic/versions/43bc34504ee6_merge_migrations_after_staging_merge.py
 delete mode 100644 transfers/transfer_results.py

diff --git a/alembic/versions/43bc34504ee6_merge_migrations_after_staging_merge.py b/alembic/versions/43bc34504ee6_merge_migrations_after_staging_merge.py
deleted file mode 100644
index 86943385..00000000
--- a/alembic/versions/43bc34504ee6_merge_migrations_after_staging_merge.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""merge_migrations_after_staging_merge
-
-Revision ID: 43bc34504ee6
-Revises: 3cb924ca51fd
-Create Date: 2026-01-30 11:52:41.932306
-
-"""
-
-from typing import Sequence, Union
-
-# revision identifiers, used by Alembic.
-revision: str = "43bc34504ee6"
-down_revision: Union[str, Sequence[str], None] = "3cb924ca51fd"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    """Upgrade schema."""
-    pass
-
-
-def downgrade() -> None:
-    """Downgrade schema."""
-    pass
diff --git a/alembic/versions/50d1c2a3b4c5_add_unique_index_ngwmn_wellconstruction.py b/alembic/versions/50d1c2a3b4c5_add_unique_index_ngwmn_wellconstruction.py
index ceffbdaa..edf6fb8e 100644
--- a/alembic/versions/50d1c2a3b4c5_add_unique_index_ngwmn_wellconstruction.py
+++ b/alembic/versions/50d1c2a3b4c5_add_unique_index_ngwmn_wellconstruction.py
@@ -1,7 +1,7 @@
 """Add unique index for NGWMN well construction
 
 Revision ID: 50d1c2a3b4c5
-Revises: 43bc34504ee6
+Revises: 3cb924ca51fd
 Create Date: 2026-01-31 00:27:12.204176
 
 """
@@ -12,7 +12,7 @@
 
 # revision identifiers, used by Alembic.
 revision: str = "50d1c2a3b4c5"
-down_revision: Union[str, Sequence[str], None] = "43bc34504ee6"
+down_revision: Union[str, Sequence[str], None] = "3cb924ca51fd"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
diff --git a/cli/cli.py b/cli/cli.py
index 6be0e16e..c84c862a 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -109,6 +109,33 @@ def associate_assets_command(
     associate_assets(root_directory)
 
 
+@cli.command("transfer-results")
+def transfer_results(
+    summary_path: Path = typer.Option(
+        Path("transfers") / "metrics" / "transfer_results_summary.md",
+        "--summary-path",
+        help="Output path for markdown summary table.",
+    ),
+    sample_limit: int = typer.Option(
+        25,
+        "--sample-limit",
+        min=1,
+        help="Max missing/extra key samples stored per transfer.",
+    ),
+    theme: ThemeMode = typer.Option(
+        ThemeMode.auto, "--theme", help="Color theme: auto, light, dark."
+    ),
+):
+    from transfers.transfer_results_builder import TransferResultsBuilder
+
+    builder = TransferResultsBuilder(sample_limit=sample_limit)
+    results = builder.build()
+    summary_path.parent.mkdir(parents=True, exist_ok=True)
+    TransferResultsBuilder.write_summary(summary_path, results)
+    typer.echo(f"Wrote comparison summary: {summary_path}")
+    typer.echo(f"Transfer comparisons: {len(results.results)}")
+
+
 @cli.command("well-inventory-csv")
 def well_inventory_csv(
     file_path: str = typer.Argument(
diff --git a/pyproject.toml b/pyproject.toml
index 70d4bae8..45f81453 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,7 +108,7 @@ dependencies = [
 package = true
 
 [tool.setuptools]
-packages = ["alembic", "cli", "core", "db", "schemas", "services"]
+packages = ["alembic", "cli", "core", "db", "schemas", "services", "transfers"]
 
 [project.scripts]
 oco = "cli.cli:cli"
diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index f70d8613..8bdc2f9c 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -18,13 +18,15 @@
 import textwrap
 import uuid
 from pathlib import Path
+from types import SimpleNamespace
+
+from sqlalchemy import select
+from typer.testing import CliRunner
 
 from cli.cli import cli
 from cli.service_adapter import WellInventoryResult
 from db import FieldActivity, FieldEvent, Observation, Sample
 from db.engine import session_ctx
-from sqlalchemy import select
-from typer.testing import CliRunner
 
 
 def test_initialize_lexicon_invokes_initializer(monkeypatch):
@@ -95,6 +97,50 @@ def fake_well_inventory(file_path):
     assert "[WELL INVENTORY IMPORT] SUCCESS" in result.output
 
 
+def test_transfer_results_command_writes_summary(monkeypatch, tmp_path):
+    captured: dict[str, object] = {}
+
+    class FakeBuilder:
+        def __init__(self, sample_limit: int = 25):
+            captured["sample_limit"] = sample_limit
+
+        def build(self):
+            captured["built"] = True
+            return SimpleNamespace(
+                results={"WellData": object(), "WaterLevels": object()}
+            )
+
+        @staticmethod
+        def write_summary(path, comparison):
+            captured["summary_path"] = Path(path)
+            captured["result_count"] = len(comparison.results)
+
+    monkeypatch.setattr(
+        "transfers.transfer_results_builder.TransferResultsBuilder", FakeBuilder
+    )
+
+    summary_path = tmp_path / "metrics" / "summary.md"
+    runner = CliRunner()
+    result = runner.invoke(
+        cli,
+        [
+            "transfer-results",
+            "--summary-path",
+            str(summary_path),
+            "--sample-limit",
+            "11",
+        ],
+    )
+
+    assert result.exit_code == 0, result.output
+    assert captured["sample_limit"] == 11
+    assert captured["built"] is True
+    assert captured["summary_path"] == summary_path
+    assert captured["result_count"] == 2
+    assert f"Wrote comparison summary: {summary_path}" in result.output
+    assert "Transfer comparisons: 2" in result.output
+
+
 def test_well_inventory_csv_command_reports_validation_errors(monkeypatch, tmp_path):
     inventory_file = tmp_path / "inventory.csv"
     inventory_file.write_text("header\nvalue\n")
@@ -198,10 +244,12 @@ def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
     """
 
     def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(f"""\
+        csv_text = textwrap.dedent(
+            f"""\
             field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
             CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """)
+            """
+        )
         path.write_text(csv_text)
 
     unique_notes = f"pytest-{uuid.uuid4()}"
diff --git a/transfers/transfer.py b/transfers/transfer.py
index 1e50accb..83b8df3b 100644
--- a/transfers/transfer.py
+++ b/transfers/transfer.py
@@ -20,6 +20,7 @@
 from dataclasses import dataclass
 
 from dotenv import load_dotenv
+
 from transfers.thing_transfer import (
     transfer_rock_sample_locations,
     transfer_springs,
@@ -698,9 +699,10 @@ def main():
     profile_artifacts = transfer_all(metrics)
 
     metrics.close()
-    metrics.save_to_storage_bucket()
-    save_log_to_bucket()
-    upload_profile_artifacts(profile_artifacts)
+    if get_bool_env("SAVE_TO_BUCKET", False):
+        metrics.save_to_storage_bucket()
+        save_log_to_bucket()
+        upload_profile_artifacts(profile_artifacts)
     message("END--------------------------------------")
 
 
diff --git a/transfers/transfer_results.py b/transfers/transfer_results.py
deleted file mode 100644
index 36337d52..00000000
--- a/transfers/transfer_results.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import annotations
-
-import argparse
-from pathlib import Path
-
-from transfers.transfer_results_builder import TransferResultsBuilder
-from transfers.transfer_results_specs import (
-    TRANSFER_COMPARISON_SPECS,
-    TransferComparisonSpec,
-)
-from transfers.transfer_results_types import *  # noqa: F401,F403
-
-__all__ = [
-    "TransferResultsBuilder",
-    "TransferComparisonSpec",
-    "TRANSFER_COMPARISON_SPECS",
-]
-
-
-def _parse_args() -> argparse.Namespace:
-    parser = argparse.ArgumentParser(
-        description="Compare each transfer input CSV against destination Postgres rows."
-    )
-    parser.add_argument(
-        "--summary-path",
-        type=Path,
-        default=Path("transfers") / "metrics" / "transfer_results_summary.md",
-        help="Output path for markdown summary table.",
-    )
-    parser.add_argument(
-        "--sample-limit",
-        type=int,
-        default=25,
-        help="Max missing/extra key samples stored per transfer.",
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    args = _parse_args()
-    builder = TransferResultsBuilder(sample_limit=args.sample_limit)
-    results = builder.build()
-    args.summary_path.parent.mkdir(parents=True, exist_ok=True)
-    TransferResultsBuilder.write_summary(args.summary_path, results)
-    print(f"Wrote comparison summary: {args.summary_path}")
-    print(f"Transfer comparisons: {len(results.results)}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/transfers/transfer_results_builder.py b/transfers/transfer_results_builder.py
index 15ba47c8..1a2392c0 100644
--- a/transfers/transfer_results_builder.py
+++ b/transfers/transfer_results_builder.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import os
 from pathlib import Path
 from typing import Any
 
@@ -21,7 +22,6 @@
     replace_nans,
     get_transferable_wells,
 )
-import os
 
 
 def _normalize_key(value: Any) -> str | None:
@@ -79,9 +79,11 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
         if spec.source_filter:
             source_df = spec.source_filter(source_df)
         comparison_df = source_df
+        if spec.agreed_filter:
+            comparison_df = spec.agreed_filter(comparison_df)
         enabled = self._is_enabled(spec)
         if not enabled:
-            comparison_df = source_df.iloc[0:0]
+            comparison_df = comparison_df.iloc[0:0]
         elif spec.transfer_name == "WellData":
             comparison_df = self._agreed_welldata_df()
 
@@ -179,9 +181,8 @@ def write_summary(path: Path, comparison: TransferComparisonResults) -> None:
         ]
         for name in sorted(comparison.results.keys()):
             r = comparison.results[name]
-            missing_agreed = r.agreed_transfer_row_count - r.destination_row_count
             lines.append(
                 f"| {name} | {r.source_csv} | {r.source_row_count} | {r.agreed_transfer_row_count} | "
-                f"{r.destination_model} | {r.destination_row_count} | {missing_agreed} |"
+                f"{r.destination_model} | {r.destination_row_count} | {r.missing_in_destination_count} |"
             )
         path.write_text("\n".join(lines) + "\n")
diff --git a/transfers/transfer_results_specs.py b/transfers/transfer_results_specs.py
index 3cfd7c05..449ffa89 100644
--- a/transfers/transfer_results_specs.py
+++ b/transfers/transfer_results_specs.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import json
 from dataclasses import dataclass
 from typing import Any, Callable
+from uuid import UUID
 
 import pandas as pd
 
@@ -29,6 +31,15 @@
     Sensor,
     Thing,
     WellScreen,
+    Location,
+    LocationThingAssociation,
+)
+from db.engine import session_ctx
+from transfers.contact_transfer import (
+    _get_organization,
+    _make_name,
+    _safe_make_name,
+    _select_ownerkey_col,
 )
 from transfers.transfer_results_types import (
     AssociatedDataTransferResult,
@@ -66,6 +77,13 @@
     WellDataTransferResult,
     WellScreensTransferResult,
 )
+from transfers.util import (
+    filter_by_valid_measuring_agency,
+    filter_to_valid_point_ids,
+    get_transfers_data_path,
+    read_csv,
+    replace_nans,
+)
 
 
 @dataclass(frozen=True)
@@ -77,6 +95,7 @@ class TransferComparisonSpec:
     destination_model: Any
     destination_key_column: str
     source_filter: Callable[[pd.DataFrame], pd.DataFrame] | None = None
+    agreed_filter: Callable[[pd.DataFrame], pd.DataFrame] | None = None
     destination_where: Callable[[Any], Any] | None = None
     option_field: str | None = None
 
@@ -90,6 +109,297 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
     return _f
 
 
+def _chemistry_sampleinfo_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror ChemistrySampleInfoTransferer filters:
+    # 1) valid LocationId that resolves to a Thing via LocationThingAssociation
+    # 2) valid UUID SamplePtID
+    if "LocationId" not in df.columns or "SamplePtID" not in df.columns:
+        return df.iloc[0:0]
+
+    with session_ctx() as session:
+        rows = (
+            session.query(Location.nma_pk_location)
+            .join(
+                LocationThingAssociation,
+                Location.id == LocationThingAssociation.location_id,
+            )
+            .filter(Location.nma_pk_location.isnot(None))
+            .all()
+        )
+        valid_location_ids = {
+            str(nma_pk_location).strip().lower() for (nma_pk_location,) in rows
+        }
+
+    def _normalize_location(value: Any) -> str | None:
+        if pd.isna(value):
+            return None
+        text = str(value).strip().lower()
+        return text or None
+
+    def _is_valid_uuid(value: Any) -> bool:
+        if pd.isna(value):
+            return False
+        try:
+            UUID(str(value))
+        except (TypeError, ValueError):
+            return False
+        return True
+
+    location_mask = df["LocationId"].apply(_normalize_location).isin(valid_location_ids)
+    sample_pt_mask = df["SamplePtID"].apply(_is_valid_uuid)
+    return df[location_mask & sample_pt_mask].copy()
+
+
+def _chemistry_child_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror ChemistryTransferer._filter_to_valid_sample_infos:
+    # keep only rows whose SamplePtID resolves to an existing ChemistrySampleInfo.
+    if "SamplePtID" not in df.columns:
+        return df.iloc[0:0]
+
+    with session_ctx() as session:
+        rows = (
+            session.query(NMA_Chemistry_SampleInfo.nma_sample_pt_id)
+            .filter(NMA_Chemistry_SampleInfo.nma_sample_pt_id.isnot(None))
+            .all()
+        )
+        valid_sample_pt_ids = {sample_pt_id for (sample_pt_id,) in rows}
+
+    def _uuid_or_none(value: Any) -> UUID | None:
+        if pd.isna(value):
+            return None
+        try:
+            return UUID(str(value))
+        except (TypeError, ValueError):
+            return None
+
+    sample_pt_mask = df["SamplePtID"].map(_uuid_or_none).isin(valid_sample_pt_ids)
+    return df[sample_pt_mask].copy()
+
+
+def _waterlevels_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror WaterLevelTransferer._get_dfs filtering stage.
+    cleaned_df = replace_nans(df.copy())
+    cleaned_df = filter_to_valid_point_ids(cleaned_df)
+    cleaned_df = filter_by_valid_measuring_agency(cleaned_df)
+    return cleaned_df
+
+
+def _stratigraphy_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror StratigraphyLegacyTransferer._get_dfs filtering stage.
+    cleaned_df = replace_nans(df.copy())
+    cleaned_df = filter_to_valid_point_ids(cleaned_df)
+    return cleaned_df
+
+
+def _hydraulics_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror HydraulicsDataTransferer._filter_to_valid_things:
+    # keep only rows whose PointID exists in Thing.name.
+    if "PointID" not in df.columns:
+        return df.iloc[0:0]
+
+    with session_ctx() as session:
+        thing_names = {
+            name
+            for (name,) in session.query(Thing.name)
+            .filter(Thing.name.isnot(None))
+            .all()
+        }
+
+    return df[df["PointID"].isin(thing_names)].copy()
+
+
+def _ngwmn_waterlevels_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror NGWMNWaterLevelsTransferer dedupe key:
+    # conflict columns are (PointID, DateMeasured), with later rows winning.
+    if "PointID" not in df.columns or "DateMeasured" not in df.columns:
+        return df.iloc[0:0]
+
+    dedupe_df = df.copy()
+    dedupe_df["_pointid_norm"] = dedupe_df["PointID"].astype(str)
+    parsed_dates = pd.to_datetime(dedupe_df["DateMeasured"], errors="coerce")
+    dedupe_df["_date_measured_norm"] = parsed_dates.dt.date
+    # Match transfer _dedupe_rows(..., include_missing=True):
+    # rows with missing key parts are not deduped.
+    missing_key_mask = (
+        dedupe_df["_pointid_norm"].isna() | dedupe_df["_date_measured_norm"].isna()
+    )
+    non_missing = dedupe_df.loc[~missing_key_mask].drop_duplicates(
+        subset=["_pointid_norm", "_date_measured_norm"], keep="last"
+    )
+    missing = dedupe_df.loc[missing_key_mask]
+    out = pd.concat([non_missing, missing], axis=0)
+    return out.drop(columns=["_pointid_norm", "_date_measured_norm"])
+
+
+def _ngwmn_wellconstruction_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror NGWMNWellConstructionTransferer dedupe key:
+    # conflict columns are (PointID, CasingTop, ScreenTop), with later rows winning.
+    required = {"PointID", "CasingTop", "ScreenTop"}
+    if not required.issubset(df.columns):
+        return df.iloc[0:0]
+
+    def _float_or_none(value: Any) -> float | None:
+        if value is None or pd.isna(value):
+            return None
+        if isinstance(value, (int, float)):
+            return float(value)
+        if isinstance(value, str):
+            import re
+
+            match = re.search(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", value)
+            if match:
+                try:
+                    return float(match.group(0))
+                except ValueError:
+                    return None
+        return None
+
+    dedupe_df = df.copy()
+    dedupe_df["_pointid_norm"] = dedupe_df["PointID"].astype(str)
+    dedupe_df["_casing_top_norm"] = dedupe_df["CasingTop"].map(_float_or_none)
+    dedupe_df["_screen_top_norm"] = dedupe_df["ScreenTop"].map(_float_or_none)
+    # Match transfer _dedupe_rows(..., include_missing=True):
+    # rows with missing key parts are not deduped.
+    missing_key_mask = (
+        dedupe_df["_pointid_norm"].isna()
+        | dedupe_df["_casing_top_norm"].isna()
+        | dedupe_df["_screen_top_norm"].isna()
+    )
+    non_missing = dedupe_df.loc[~missing_key_mask].drop_duplicates(
+        subset=["_pointid_norm", "_casing_top_norm", "_screen_top_norm"],
+        keep="last",
+    )
+    missing = dedupe_df.loc[missing_key_mask]
+    out = pd.concat([non_missing, missing], axis=0)
+    return out.drop(columns=["_pointid_norm", "_casing_top_norm", "_screen_top_norm"])
+
+
+def _load_json_mapping(path: str) -> dict[str, str]:
+    try:
+        with open(path, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        return {}
+
+
+def _ownersdata_agreed_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror ContactTransfer fan-out:
+    # one OwnersData source row can produce 0/1/2 Contact rows.
+    odf = df.drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+    ldf = read_csv("OwnerLink").drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+    locdf = read_csv("Location")
+    ldf = ldf.join(locdf.set_index("LocationId"), on="LocationId")
+
+    owner_key_col = _select_ownerkey_col(odf, "OwnersData")
+    link_owner_key_col = _select_ownerkey_col(ldf, "OwnerLink")
+
+    ownerkey_mapper = _load_json_mapping(
+        str(get_transfers_data_path("owners_ownerkey_mapper.json"))
+    )
+    org_mapper = _load_json_mapping(
+        str(get_transfers_data_path("owners_organization_mapper.json"))
+    )
+
+    if ownerkey_mapper:
+        odf["ownerkey_canonical"] = odf[owner_key_col].replace(ownerkey_mapper)
+        ldf["ownerkey_canonical"] = ldf[link_owner_key_col].replace(ownerkey_mapper)
+    else:
+        odf["ownerkey_canonical"] = odf[owner_key_col]
+        ldf["ownerkey_canonical"] = ldf[link_owner_key_col]
+
+    odf["ownerkey_norm"] = (
+        odf["ownerkey_canonical"]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+    ldf["ownerkey_norm"] = (
+        ldf["ownerkey_canonical"]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+
+    ldf_join = ldf.set_index("ownerkey_norm")
+    overlap_cols = [col for col in ldf_join.columns if col in odf.columns]
+    if overlap_cols:
+        ldf_join = ldf_join.drop(columns=overlap_cols, errors="ignore")
+    odf = odf.join(ldf_join, on="ownerkey_norm")
+
+    odf = replace_nans(odf)
+    odf = filter_to_valid_point_ids(odf)
+
+    # Emulate ContactTransfer + _make_contact_and_assoc semantics:
+    # 1) dedupe by (OwnerKey, ContactType)
+    # 2) then dedupe by (name, organization) via in-memory "added" list
+    # 3) only successful CreateContact payloads count as agreed.
+    agreed_rows: list[dict[str, Any]] = []
+    created_owner_type: set[tuple[str, str]] = set()
+    added_name_org: set[tuple[str | None, str | None]] = set()
+
+    ordered = odf.sort_values(by=["PointID"], kind="stable")
+
+    def _record_new_contact(
+        owner_key: Any,
+        contact_type: str,
+        name: str | None,
+        organization: str | None,
+    ) -> bool:
+        if name is None and organization is None:
+            return False
+
+        owner_key_text = None if owner_key is None else str(owner_key)
+        owner_type_key = None
+        if owner_key_text:
+            owner_type_key = (owner_key_text, contact_type)
+
+        if owner_type_key and owner_type_key in created_owner_type:
+            return False
+
+        name_org_key = (name, organization)
+        if name_org_key in added_name_org:
+            return False
+
+        if owner_type_key:
+            created_owner_type.add(owner_type_key)
+        added_name_org.add(name_org_key)
+        agreed_rows.append({"OwnerKey": owner_key})
+        return True
+
+    for row in ordered.itertuples():
+        owner_key = getattr(row, owner_key_col, None)
+        organization = _get_organization(row, org_mapper)
+
+        primary_name = _safe_make_name(
+            getattr(row, "FirstName", None),
+            getattr(row, "LastName", None),
+            owner_key,
+            organization,
+        )
+        _record_new_contact(owner_key, "Primary", primary_name, organization)
+
+        has_secondary_input = not all(
+            [
+                getattr(row, "SecondFirstName", None) is None,
+                getattr(row, "SecondLastName", None) is None,
+                getattr(row, "SecondCtctEmail", None) is None,
+                getattr(row, "SecondCtctPhone", None) is None,
+            ]
+        )
+        if has_secondary_input:
+            secondary_name = _make_name(
+                getattr(row, "SecondFirstName", None),
+                getattr(row, "SecondLastName", None),
+            )
+            _record_new_contact(owner_key, "Secondary", secondary_name, organization)
+
+    return pd.DataFrame(agreed_rows, columns=["OwnerKey"])
+
+
 TRANSFER_COMPARISON_SPECS: list[TransferComparisonSpec] = [
     TransferComparisonSpec(
         "WellData",
@@ -116,6 +426,8 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "OwnerKey",
         Contact,
         "nma_pk_owners",
+        agreed_filter=_ownersdata_agreed_filter,
+        destination_where=lambda m: m.nma_pk_owners.is_not(None),
         option_field="transfer_contacts",
     ),
     TransferComparisonSpec(
@@ -125,6 +437,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         Observation,
         "nma_pk_waterlevels",
+        agreed_filter=_waterlevels_filter,
         option_field="transfer_waterlevels",
     ),
     TransferComparisonSpec(
@@ -197,6 +510,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_HydraulicsData,
         "nma_global_id",
+        agreed_filter=_hydraulics_filter,
         option_field="transfer_hydraulics_data",
     ),
     TransferComparisonSpec(
@@ -206,6 +520,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "SamplePtID",
         NMA_Chemistry_SampleInfo,
         "nma_sample_pt_id",
+        agreed_filter=_chemistry_sampleinfo_filter,
         option_field="transfer_chemistry_sampleinfo",
     ),
     TransferComparisonSpec(
@@ -215,6 +530,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "PointID",
         NMA_view_NGWMN_WellConstruction,
         "point_id",
+        agreed_filter=_ngwmn_wellconstruction_filter,
         option_field="transfer_ngwmn_views",
     ),
     TransferComparisonSpec(
@@ -224,6 +540,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "PointID",
         NMA_view_NGWMN_WaterLevels,
         "point_id",
+        agreed_filter=_ngwmn_waterlevels_filter,
         option_field="transfer_ngwmn_views",
     ),
     TransferComparisonSpec(
@@ -260,6 +577,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_Stratigraphy,
         "nma_global_id",
+        agreed_filter=_stratigraphy_filter,
         option_field="transfer_nma_stratigraphy",
     ),
     TransferComparisonSpec(
@@ -269,6 +587,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_MajorChemistry,
         "nma_global_id",
+        agreed_filter=_chemistry_child_filter,
         option_field="transfer_major_chemistry",
     ),
     TransferComparisonSpec(
@@ -278,6 +597,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_Radionuclides,
         "nma_global_id",
+        agreed_filter=_chemistry_child_filter,
         option_field="transfer_radionuclides",
     ),
     TransferComparisonSpec(
@@ -287,6 +607,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_MinorTraceChemistry,
         "nma_global_id",
+        agreed_filter=_chemistry_child_filter,
         option_field="transfer_minor_trace_chemistry",
     ),
     TransferComparisonSpec(
@@ -296,6 +617,7 @@ def _f(df: pd.DataFrame) -> pd.DataFrame:
         "GlobalID",
         NMA_FieldParameters,
         "nma_global_id",
+        agreed_filter=_chemistry_child_filter,
         option_field="transfer_field_parameters",
     ),
     TransferComparisonSpec(

From 35287180aa4a02c5ebceedcfd8804d7a12a4f256 Mon Sep 17 00:00:00 2001
From: jirhiker <2035568+jirhiker@users.noreply.github.com>
Date: Fri, 20 Feb 2026 03:31:20 +0000
Subject: [PATCH 07/14] Formatting changes

---
 tests/test_cli_commands.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 8bdc2f9c..412ebea3 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -244,12 +244,10 @@ def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
     """
 
     def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(
-            f"""\
+        csv_text = textwrap.dedent(f"""\
             field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
             CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """
-        )
+            """)
         path.write_text(csv_text)
 
     unique_notes = f"pytest-{uuid.uuid4()}"

From fd7e2430c8f51eed6dcdb9d71799f532bf656bd1 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 22 Feb 2026 14:24:18 -0700
Subject: [PATCH 08/14] feat: make various fields nullable and enhance data
 transfer handling

---
 ...3b_make_measuring_point_height_nullable.py |  36 +++
 ...d3e4f_make_address_postal_code_nullable.py |  36 +++
 ...e_deployment_installation_date_nullable.py |  36 +++
 ...5e6f7a8_make_wellscreen_depths_nullable.py |  48 ++++
 ...f7a8b9_make_address_city_state_nullable.py |  48 ++++
 api/README.md                                 |  18 ++
 cli/README.md                                 |  25 ++
 cli/cli.py                                    | 142 ++++++++++
 core/lexicon.json                             |   1 +
 db/README.md                                  |  22 ++
 db/contact.py                                 |   6 +-
 db/deployment.py                              |   2 +-
 db/measuring_point_history.py                 |   2 +-
 db/thing.py                                   |   4 +-
 schemas/contact.py                            |  15 +-
 schemas/deployment.py                         |   2 +-
 schemas/sample.py                             |   4 +-
 schemas/thing.py                              |  53 ++--
 tests/README.md                               |  31 +++
 tests/features/environment.py                 |  15 +-
 tests/test_cli_commands.py                    |   6 +-
 tests/test_util.py                            |  26 +-
 .../unit/test_contact_transfer_email_utils.py |  19 ++
 transfers/README.md                           |  27 ++
 transfers/contact_transfer.py                 | 258 ++++++++++++++----
 transfers/geologic_formation_transfer.py      | 105 +++----
 transfers/link_ids_transfer.py                | 189 ++++++++-----
 transfers/logger.py                           |  23 +-
 transfers/relaxed_constraints.md              |  10 +
 transfers/sensor_transfer.py                  |  20 +-
 transfers/thing_transfer.py                   | 180 ++++++++++--
 transfers/transfer_results_builder.py         | 161 ++++++++++-
 transfers/transfer_results_specs.py           |  91 +++++-
 transfers/transfer_results_types.py           |   2 +
 transfers/transferer.py                       |  10 -
 transfers/util.py                             |  23 ++
 transfers/waterlevels_transfer.py             |  46 +---
 transfers/well_transfer.py                    |  62 +++--
 38 files changed, 1449 insertions(+), 355 deletions(-)
 create mode 100644 alembic/versions/8c9d0e1f2a3b_make_measuring_point_height_nullable.py
 create mode 100644 alembic/versions/9a0b1c2d3e4f_make_address_postal_code_nullable.py
 create mode 100644 alembic/versions/a1b2c3d4e5f7_make_deployment_installation_date_nullable.py
 create mode 100644 alembic/versions/b3c4d5e6f7a8_make_wellscreen_depths_nullable.py
 create mode 100644 alembic/versions/c4d5e6f7a8b9_make_address_city_state_nullable.py
 create mode 100644 api/README.md
 create mode 100644 cli/README.md
 create mode 100644 db/README.md
 create mode 100644 tests/README.md
 create mode 100644 tests/unit/test_contact_transfer_email_utils.py
 create mode 100644 transfers/README.md
 create mode 100644 transfers/relaxed_constraints.md

diff --git a/alembic/versions/8c9d0e1f2a3b_make_measuring_point_height_nullable.py b/alembic/versions/8c9d0e1f2a3b_make_measuring_point_height_nullable.py
new file mode 100644
index 00000000..58a3050c
--- /dev/null
+++ b/alembic/versions/8c9d0e1f2a3b_make_measuring_point_height_nullable.py
@@ -0,0 +1,36 @@
+"""make measuring_point_history.measuring_point_height nullable
+
+Revision ID: 8c9d0e1f2a3b
+Revises: 5336a52336df
+Create Date: 2026-02-21 12:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "8c9d0e1f2a3b"
+down_revision: Union[str, Sequence[str], None] = "5336a52336df"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "measuring_point_history",
+        "measuring_point_height",
+        existing_type=sa.Numeric(),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "measuring_point_history",
+        "measuring_point_height",
+        existing_type=sa.Numeric(),
+        nullable=False,
+    )
diff --git a/alembic/versions/9a0b1c2d3e4f_make_address_postal_code_nullable.py b/alembic/versions/9a0b1c2d3e4f_make_address_postal_code_nullable.py
new file mode 100644
index 00000000..05138add
--- /dev/null
+++ b/alembic/versions/9a0b1c2d3e4f_make_address_postal_code_nullable.py
@@ -0,0 +1,36 @@
+"""make address.postal_code nullable
+
+Revision ID: 9a0b1c2d3e4f
+Revises: 8c9d0e1f2a3b
+Create Date: 2026-02-21 13:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "9a0b1c2d3e4f"
+down_revision: Union[str, Sequence[str], None] = "8c9d0e1f2a3b"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "address",
+        "postal_code",
+        existing_type=sa.String(length=20),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "address",
+        "postal_code",
+        existing_type=sa.String(length=20),
+        nullable=False,
+    )
diff --git a/alembic/versions/a1b2c3d4e5f7_make_deployment_installation_date_nullable.py b/alembic/versions/a1b2c3d4e5f7_make_deployment_installation_date_nullable.py
new file mode 100644
index 00000000..59f899a6
--- /dev/null
+++ b/alembic/versions/a1b2c3d4e5f7_make_deployment_installation_date_nullable.py
@@ -0,0 +1,36 @@
+"""make deployment installation_date nullable
+
+Revision ID: a1b2c3d4e5f7
+Revises: 9a0b1c2d3e4f
+Create Date: 2026-02-21 14:32:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "a1b2c3d4e5f7"
+down_revision: Union[str, Sequence[str], None] = "9a0b1c2d3e4f"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "deployment",
+        "installation_date",
+        existing_type=sa.Date(),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "deployment",
+        "installation_date",
+        existing_type=sa.Date(),
+        nullable=False,
+    )
diff --git a/alembic/versions/b3c4d5e6f7a8_make_wellscreen_depths_nullable.py b/alembic/versions/b3c4d5e6f7a8_make_wellscreen_depths_nullable.py
new file mode 100644
index 00000000..7e1bca3a
--- /dev/null
+++ b/alembic/versions/b3c4d5e6f7a8_make_wellscreen_depths_nullable.py
@@ -0,0 +1,48 @@
+"""make wellscreen depth fields nullable
+
+Revision ID: b3c4d5e6f7a8
+Revises: a1b2c3d4e5f7
+Create Date: 2026-02-21 15:20:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "b3c4d5e6f7a8"
+down_revision: Union[str, Sequence[str], None] = "a1b2c3d4e5f7"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "well_screen",
+        "screen_depth_top",
+        existing_type=sa.Float(),
+        nullable=True,
+    )
+    op.alter_column(
+        "well_screen",
+        "screen_depth_bottom",
+        existing_type=sa.Float(),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "well_screen",
+        "screen_depth_bottom",
+        existing_type=sa.Float(),
+        nullable=False,
+    )
+    op.alter_column(
+        "well_screen",
+        "screen_depth_top",
+        existing_type=sa.Float(),
+        nullable=False,
+    )
diff --git a/alembic/versions/c4d5e6f7a8b9_make_address_city_state_nullable.py b/alembic/versions/c4d5e6f7a8b9_make_address_city_state_nullable.py
new file mode 100644
index 00000000..fb55e860
--- /dev/null
+++ b/alembic/versions/c4d5e6f7a8b9_make_address_city_state_nullable.py
@@ -0,0 +1,48 @@
+"""make address.city and address.state nullable
+
+Revision ID: c4d5e6f7a8b9
+Revises: b3c4d5e6f7a8
+Create Date: 2026-02-21 16:30:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "c4d5e6f7a8b9"
+down_revision: Union[str, Sequence[str], None] = "b3c4d5e6f7a8"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.alter_column(
+        "address",
+        "city",
+        existing_type=sa.String(length=100),
+        nullable=True,
+    )
+    op.alter_column(
+        "address",
+        "state",
+        existing_type=sa.String(length=50),
+        nullable=True,
+    )
+
+
+def downgrade() -> None:
+    op.alter_column(
+        "address",
+        "city",
+        existing_type=sa.String(length=100),
+        nullable=False,
+    )
+    op.alter_column(
+        "address",
+        "state",
+        existing_type=sa.String(length=50),
+        nullable=False,
+    )
diff --git a/api/README.md b/api/README.md
new file mode 100644
index 00000000..fd6767de
--- /dev/null
+++ b/api/README.md
@@ -0,0 +1,18 @@
+# API
+
+This directory contains FastAPI route modules grouped by resource/domain.
+
+## Structure
+
+- One module per domain (for example `thing.py`, `contact.py`, `observation.py`)
+- `api/ogc/` contains OGC-specific endpoints
+
+## Guidelines
+
+- Keep endpoints focused on transport concerns (request/response, status codes).
+- Put transfer/business logic in service or transfer modules.
+- Ensure response schemas match `schemas/` definitions.
+
+## Running locally
+
+Use project entrypoint from repo root (see top-level README for full setup).
diff --git a/cli/README.md b/cli/README.md
new file mode 100644
index 00000000..42d557c8
--- /dev/null
+++ b/cli/README.md
@@ -0,0 +1,25 @@
+# CLI
+
+This directory contains Typer-based command entrypoints for operational and migration workflows.
+
+## Main entrypoint
+
+- `cli/cli.py`
+
+Run commands from repo root:
+
+```bash
+source .venv/bin/activate
+python -m cli.cli --help
+```
+
+## Common commands
+
+- `python -m cli.cli transfer-results`
+- `python -m cli.cli compare-duplicated-welldata`
+- `python -m cli.cli alembic-upgrade-and-data`
+
+## Notes
+
+- CLI logging is written to `cli/logs/`.
+- Keep CLI commands thin; move heavy logic into service/transfer modules.
diff --git a/cli/cli.py b/cli/cli.py
index c84c862a..cb29338e 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -20,10 +20,12 @@
 from pathlib import Path
 from textwrap import shorten, wrap
 
+import pandas as pd
 import typer
 from dotenv import load_dotenv
 
 load_dotenv()
+os.environ.setdefault("OCO_LOG_CONTEXT", "cli")
 
 cli = typer.Typer(help="Command line interface for managing the application.")
 water_levels = typer.Typer(help="Water-level utilities")
@@ -136,6 +138,146 @@ def transfer_results(
     typer.echo(f"Transfer comparisons: {len(results.results)}")
 
 
+@cli.command("compare-duplicated-welldata")
+def compare_duplicated_welldata(
+    pointid: list[str] = typer.Option(
+        None,
+        "--pointid",
+        help="Optional PointID filter. Repeat --pointid for multiple values.",
+    ),
+    apply_transfer_filters: bool = typer.Option(
+        True,
+        "--apply-transfer-filters/--no-apply-transfer-filters",
+        help=(
+            "Apply WellTransferer-like pre-filters (GW + coordinates + transferable), "
+            "excluding DB-dependent non-transferred filtering."
+        ),
+    ),
+    summary_path: Path = typer.Option(
+        Path("transfers") / "metrics" / "welldata_duplicate_comparison_summary.csv",
+        "--summary-path",
+        help="Output CSV path for duplicate PointID summary.",
+    ),
+    detail_path: Path = typer.Option(
+        Path("transfers") / "metrics" / "welldata_duplicate_comparison_detail.csv",
+        "--detail-path",
+        help="Output CSV path for row x differing-column detail values.",
+    ),
+    theme: ThemeMode = typer.Option(
+        ThemeMode.auto, "--theme", help="Color theme: auto, light, dark."
+    ),
+):
+    from transfers.util import get_transferable_wells, read_csv, replace_nans
+
+    df = read_csv("WellData", dtype={"OSEWelltagID": str})
+
+    if apply_transfer_filters:
+        if "LocationId" in df.columns:
+            ldf = read_csv("Location")
+            ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1, errors="ignore")
+            df = df.join(ldf.set_index("LocationId"), on="LocationId")
+
+        if "SiteType" in df.columns:
+            df = df[df["SiteType"] == "GW"]
+
+        if "Easting" in df.columns and "Northing" in df.columns:
+            df = df[df["Easting"].notna() & df["Northing"].notna()]
+
+        df = replace_nans(df)
+        df = get_transferable_wells(df)
+    else:
+        df = replace_nans(df)
+
+    if pointid:
+        requested = {pid.strip() for pid in pointid if pid and pid.strip()}
+        df = df[df["PointID"].isin(requested)]
+
+    if "PointID" not in df.columns:
+        typer.echo("WellData has no PointID column after filtering.")
+        raise typer.Exit(code=1)
+
+    dup_mask = df["PointID"].duplicated(keep=False)
+    dup_df = df.loc[dup_mask].copy()
+
+    summary_rows: list[dict] = []
+    detail_rows: list[dict] = []
+
+    if not dup_df.empty:
+        for pid, group in dup_df.groupby("PointID", sort=True):
+            diff_cols: list[str] = []
+            for col in group.columns:
+                series = group[col]
+                non_null = series[~series.isna()]
+                if non_null.empty:
+                    continue
+                if len({str(v) for v in non_null}) > 1:
+                    diff_cols.append(col)
+
+            summary_rows.append(
+                {
+                    "pointid": pid,
+                    "duplicate_row_count": int(len(group)),
+                    "differing_column_count": int(len(diff_cols)),
+                    "differing_columns": "|".join(diff_cols),
+                }
+            )
+
+            normalized = group.reset_index(drop=False).rename(
+                columns={"index": "source_row_index"}
+            )
+            for row_num, row in normalized.iterrows():
+                for col in diff_cols:
+                    value = row.get(col, None)
+                    detail_rows.append(
+                        {
+                            "pointid": pid,
+                            "row_number": int(row_num),
+                            "source_row_index": int(row["source_row_index"]),
+                            "column": col,
+                            "value": value,
+                        }
+                    )
+
+    summary_df = pd.DataFrame(summary_rows)
+    if not summary_df.empty:
+        summary_df = summary_df.sort_values(
+            by=["duplicate_row_count", "pointid"], ascending=[False, True]
+        )
+
+    detail_df = pd.DataFrame(detail_rows)
+    if not detail_df.empty:
+        detail_df = detail_df.sort_values(
+            by=["pointid", "row_number", "column"], ascending=[True, True, True]
+        )
+
+    summary_path.parent.mkdir(parents=True, exist_ok=True)
+    detail_path.parent.mkdir(parents=True, exist_ok=True)
+    summary_df.to_csv(summary_path, index=False)
+    detail_df.to_csv(detail_path, index=False)
+
+    if summary_df.empty:
+        typer.echo("No duplicated WellData PointIDs found for current filters.")
+        typer.echo(f"Wrote empty summary: {summary_path}")
+        typer.echo(f"Wrote empty detail: {detail_path}")
+        return
+
+    total_dup_rows = int(len(dup_df))
+    total_dup_pointids = int(summary_df["pointid"].nunique())
+    typer.echo(
+        f"Found {total_dup_pointids} duplicated PointIDs across {total_dup_rows} rows."
+    )
+    typer.echo(f"Wrote summary: {summary_path}")
+    typer.echo(f"Wrote detail: {detail_path}")
+
+    preview = summary_df.head(20)
+    typer.echo("\nTop duplicate PointIDs:")
+    for row in preview.itertuples(index=False):
+        typer.echo(
+            f"- {row.pointid}: rows={row.duplicate_row_count}, "
+            f"differing_columns={row.differing_column_count}"
+        )
+
+
 @cli.command("well-inventory-csv")
 def well_inventory_csv(
     file_path: str = typer.Argument(
diff --git a/core/lexicon.json b/core/lexicon.json
index 9da523f9..07b32c30 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -421,6 +421,7 @@
         "elevation_method",
         "sample_method",
         "coordinate_method",
+        "well_construction_method",
         "well_purpose",
         "status",
         "organization",
diff --git a/db/README.md b/db/README.md
new file mode 100644
index 00000000..02556c22
--- /dev/null
+++ b/db/README.md
@@ -0,0 +1,22 @@
+# DB
+
+This directory contains SQLAlchemy models, engine/session setup, and database initialization helpers.
+
+## Key files
+
+- `db/base.py`: shared ORM base mixins and common fields
+- `db/engine.py`: engine/session configuration
+- `db/initialization.py`: schema/bootstrap utilities
+
+## Schema changes
+
+- Use Alembic migrations under `alembic/versions/` for all DDL changes.
+- Keep model nullability/defaults aligned with migrations.
+- Prefer idempotent data migrations and safe re-runs.
+
+## Local usage
+
+```bash
+source .venv/bin/activate
+alembic upgrade head
+```
diff --git a/db/contact.py b/db/contact.py
index fa3146df..0fb59473 100644
--- a/db/contact.py
+++ b/db/contact.py
@@ -188,9 +188,9 @@ class Address(Base, AutoBaseMixin, ReleaseMixin):
     )
     address_line_1: Mapped[str] = mapped_column(String(255), nullable=False)
     address_line_2: Mapped[str | None] = mapped_column(String(255), nullable=True)
-    city: Mapped[str] = mapped_column(String(100), nullable=False)
-    state: Mapped[str] = mapped_column(String(50), nullable=False)
-    postal_code: Mapped[str] = mapped_column(String(20), nullable=False)
+    city: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    state: Mapped[str | None] = mapped_column(String(50), nullable=True)
+    postal_code: Mapped[str] = mapped_column(String(20), nullable=True)
     country: Mapped[str] = mapped_column(
         String(50), default="United States", nullable=False
     )
diff --git a/db/deployment.py b/db/deployment.py
index 6f07830a..60377c4d 100644
--- a/db/deployment.py
+++ b/db/deployment.py
@@ -33,7 +33,7 @@ class Deployment(Base, AutoBaseMixin, ReleaseMixin):
     )
 
     # --- Columns ---
-    installation_date: Mapped[Date] = mapped_column(Date, nullable=False)
+    installation_date: Mapped[Date | None] = mapped_column(Date, nullable=True)
     removal_date: Mapped[Date] = mapped_column(Date, nullable=True)
     recording_interval: Mapped[int] = mapped_column(Integer, nullable=True)
     recording_interval_units: Mapped[str] = lexicon_term(nullable=True)
diff --git a/db/measuring_point_history.py b/db/measuring_point_history.py
index 7d23518a..16857a23 100644
--- a/db/measuring_point_history.py
+++ b/db/measuring_point_history.py
@@ -37,7 +37,7 @@ class MeasuringPointHistory(Base, AutoBaseMixin, ReleaseMixin):
     # --- Columns ---
     measuring_point_height: Mapped[float] = mapped_column(
         Numeric,
-        nullable=False,
+        nullable=True,
         comment="The official, surveyed height of the measuring point relative to ground surface (in feet).",
     )
     measuring_point_description: Mapped[str] = mapped_column(
diff --git a/db/thing.py b/db/thing.py
index a0f3db3b..f5fbff5b 100644
--- a/db/thing.py
+++ b/db/thing.py
@@ -594,10 +594,10 @@ class WellScreen(Base, AutoBaseMixin, ReleaseMixin):
     geologic_formation_id: Mapped[int] = mapped_column(
         ForeignKey("geologic_formation.id", ondelete="SET NULL"), nullable=True
     )
-    screen_depth_top: Mapped[float] = mapped_column(
+    screen_depth_top: Mapped[float | None] = mapped_column(
         info={"unit": "feet below ground surface"}, nullable=True
     )
-    screen_depth_bottom: Mapped[float] = mapped_column(
+    screen_depth_bottom: Mapped[float | None] = mapped_column(
         info={"unit": "feet below ground surface"}, nullable=True
     )
     screen_type: Mapped[str] = lexicon_term(nullable=True)  # e.g., "PVC", "Steel", etc.
diff --git a/schemas/contact.py b/schemas/contact.py
index a9302daa..248ff173 100644
--- a/schemas/contact.py
+++ b/schemas/contact.py
@@ -24,6 +24,7 @@
 from schemas import BaseResponseModel, BaseCreateModel, BaseUpdateModel
 from schemas.notes import CreateNote, NoteResponse
 
+
 # -------- VALIDATORS ----------
 
 
@@ -123,10 +124,12 @@ class CreateAddress(BaseCreateModel):
     # todo: use a postal API to validate address and suggest corrections
     address_line_1: str  # Required (e.g., "123 Main St")
     address_line_2: str | None = None  # Optional (e.g., "Apt 4B", "Suite 200")
-    city: str
+    city: str | None = None
     # todo: add validation.  Should state be required? what about foreign addresses?
-    state: str = "NM"  # Default to New Mexico
-    postal_code: str
+    state: str | None = "NM"  # Default to New Mexico
+
+    # todo: make postal code required?
+    postal_code: str | None = None
     country: str = "United States"  # Default to United States
     address_type: AddressType = "Primary"
 
@@ -193,9 +196,9 @@ class AddressResponse(BaseItemResponse):
 
     address_line_1: str
     address_line_2: str | None = None
-    city: str
-    state: str
-    postal_code: str
+    city: str | None = None
+    state: str | None = None
+    postal_code: str | None = None
     country: str
     address_type: AddressType
 
diff --git a/schemas/deployment.py b/schemas/deployment.py
index 5bd05014..2e7df9f8 100644
--- a/schemas/deployment.py
+++ b/schemas/deployment.py
@@ -7,7 +7,7 @@
 class DeploymentResponse(BaseResponseModel):
     thing_id: int
     sensor: SensorResponse
-    installation_date: date
+    installation_date: date | None
     removal_date: date | None
     recording_interval: int | None
     recording_interval_units: str | None
diff --git a/schemas/sample.py b/schemas/sample.py
index 4d821e57..8dce646b 100644
--- a/schemas/sample.py
+++ b/schemas/sample.py
@@ -91,7 +91,7 @@ def convert_sample_date_to_utc(sample_date: AwareDatetime) -> AwareDatetime:
 # -------- CREATE ----------
 class CreateSample(BaseCreateModel, ValidateSample):
     field_activity_id: int
-    field_event_participant_id: int
+    field_event_participant_id: int | None = None
     sample_date: Annotated[AwareDatetime, PastDatetime()]
     sample_name: str
     sample_matrix: SampleMatrix
@@ -130,7 +130,7 @@ class SampleResponse(BaseResponseModel):
     thing: ThingResponse
     field_event: FieldEventResponse
     field_activity: FieldActivityResponse
-    contact: ContactResponse
+    contact: ContactResponse | None
     sample_date: UTCAwareDatetime
     sample_name: str
     sample_matrix: SampleMatrix
diff --git a/schemas/thing.py b/schemas/thing.py
index 60dfce42..a6080923 100644
--- a/schemas/thing.py
+++ b/schemas/thing.py
@@ -35,6 +35,7 @@
 from schemas.notes import NoteResponse, CreateNote
 from schemas.permission_history import PermissionHistoryResponse
 
+
 # -------- VALIDATE ----------
 
 
@@ -47,6 +48,9 @@ class ValidateWell(BaseModel):
 
     @model_validator(mode="after")
     def validate_values(self):
+        # todo: reenable depth validation. removed for transfer
+        return self
+
         if self.hole_depth is not None:
             if self.well_depth is not None and self.well_depth > self.hole_depth:
                 raise ValueError(
@@ -66,25 +70,6 @@ def validate_values(self):
             elif self.hole_depth is not None and self.well_pump_depth > self.hole_depth:
                 raise ValueError("well pump depth must be less than hole depth")
 
-        # if self.measuring_point_height is not None:
-        #     if (
-        #         self.hole_depth is not None
-        #         and self.measuring_point_height >= self.hole_depth
-        #     ):
-        #         raise ValueError("measuring point height must be less than hole depth")
-        #     elif (
-        #         self.well_casing_depth is not None
-        #         and self.measuring_point_height >= self.well_casing_depth
-        #     ):
-        #         raise ValueError(
-        #             "measuring point height must be less than well casing depth"
-        #         )
-        #     elif (
-        #         self.well_depth is not None
-        #         and self.measuring_point_height >= self.well_depth
-        #     ):
-        #         raise ValueError("measuring point height must be less than well depth")
-
         return self
 
 
@@ -145,7 +130,9 @@ class CreateWell(CreateBaseThing, ValidateWell):
         default=None, gt=0, description="Well casing depth in feet"
     )
     well_casing_materials: list[CasingMaterial] | None = None
-    measuring_point_height: float = Field(description="Measuring point height in feet")
+    measuring_point_height: float | None = Field(
+        default=None, description="Measuring point height in feet"
+    )
     measuring_point_description: str | None = None
     well_completion_date: PastOrTodayDate | None = None
     well_completion_date_source: str | None = None
@@ -177,18 +164,26 @@ class CreateWellScreen(BaseCreateModel):
     thing_id: int
     aquifer_system_id: int | None = None
     geologic_formation_id: int | None = None
-    screen_depth_bottom: float = Field(gt=0, description="Screen depth bottom in feet")
-    screen_depth_top: float = Field(gt=0, description="Screen depth top in feet")
+    screen_depth_bottom: float | None = Field(
+        default=None, ge=0, description="Screen depth bottom in feet"
+    )
+    screen_depth_top: float | None = Field(
+        default=None, ge=0, description="Screen depth top in feet"
+    )
     screen_type: ScreenType | None = None
     screen_description: str | None = None
 
     # validate that screen depth bottom is greater than top
     @model_validator(mode="after")
     def check_depths(self):
-        if self.screen_depth_bottom < self.screen_depth_top:
-            raise ValueError(
-                "screen_depth_bottom must be greater than screen_depth_top"
-            )
+        # todo: reenable depth validation. removed for transfer
+        return self
+
+        if self.screen_depth_bottom or self.screen_depth_top:
+            if self.screen_depth_bottom < self.screen_depth_top:
+                raise ValueError(
+                    "screen_depth_bottom must be greater than screen_depth_top"
+                )
         return self
 
 
@@ -260,7 +255,7 @@ class WellResponse(BaseThingResponse):
     well_status: str | None
     open_status: str | None
     datalogger_suitability_status: str | None
-    measuring_point_height: float
+    measuring_point_height: float | None
     measuring_point_height_unit: str = "ft"
     measuring_point_description: str | None
     aquifers: list[dict] = []
@@ -352,9 +347,9 @@ class WellScreenResponse(BaseResponseModel):
     aquifer_type: str | None = None
     geologic_formation_id: int | None = None
     geologic_formation: str | None = None
-    screen_depth_bottom: float
+    screen_depth_bottom: float | None = None
     screen_depth_bottom_unit: str = "ft"
-    screen_depth_top: float
+    screen_depth_top: float | None = None
     screen_depth_top_unit: str = "ft"
     screen_type: str | None = None
     screen_description: str | None = None
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000..2593c593
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,31 @@
+# Tests
+
+This directory contains automated tests (unit, integration, transfer, and API behavior).
+
+## Layout
+
+- `tests/unit/`: focused unit tests
+- `tests/integration/`: cross-component tests
+- `tests/transfers/`: transfer-focused tests
+- `tests/features/`: BDD-style feature tests
+
+## Running tests
+
+From repo root:
+
+```bash
+source .venv/bin/activate
+set -a; source .env; set +a
+pytest -q
+```
+
+Run a subset:
+
+```bash
+pytest -q tests/transfers
+```
+
+## Notes
+
+- Many tests depend on database settings from `.env`.
+- Keep tests deterministic and idempotent where possible.
diff --git a/tests/features/environment.py b/tests/features/environment.py
index 266df26f..4f3a6d2b 100644
--- a/tests/features/environment.py
+++ b/tests/features/environment.py
@@ -19,6 +19,8 @@
 
 from alembic import command
 from alembic.config import Config
+from sqlalchemy import select
+
 from core.initializers import init_lexicon, init_parameter
 from db import (
     Location,
@@ -51,7 +53,7 @@
 )
 from db.engine import session_ctx
 from db.initialization import recreate_public_schema, sync_search_vector_triggers
-from sqlalchemy import select
+from services.util import get_bool_env
 
 
 def add_context_object_container(name):
@@ -521,6 +523,10 @@ def _initialize_test_schema() -> None:
 
 def before_all(context):
     context.objects = {}
+
+    if not get_bool_env("DROP_AND_REBUILD_DB"):
+        return
+
     _initialize_test_schema()
 
     with session_ctx() as session:
@@ -711,6 +717,9 @@ def before_all(context):
 
 
 def after_all(context):
+    if not get_bool_env("DROP_AND_REBUILD_DB"):
+        return
+
     with session_ctx() as session:
         for table in reversed(Base.metadata.sorted_tables):
             if table.name in ("alembic_version", "parameter"):
@@ -731,6 +740,10 @@ def before_scenario(context, scenario):
 
 
 def after_scenario(context, scenario):
+
+    if not get_bool_env("DROP_AND_REBUILD_DB"):
+        return
+
     # runs after EVERY scenario
     # e.g. clean up temp files, close db sessions
     if scenario.name.startswith(
diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 412ebea3..8bdc2f9c 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -244,10 +244,12 @@ def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
     """
 
     def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(f"""\
+        csv_text = textwrap.dedent(
+            f"""\
             field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
             CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """)
+            """
+        )
         path.write_text(csv_text)
 
     unique_notes = f"pytest-{uuid.uuid4()}"
diff --git a/tests/test_util.py b/tests/test_util.py
index dea033ee..8a637b6d 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -54,6 +54,30 @@ def test_measuring_point_estimator_handles_missing_point(monkeypatch):
     assert mph_descs == []
 
 
+def test_measuring_point_estimator_rounds_estimated_height_to_two_sig_figs(monkeypatch):
+    monkeypatch.setattr(
+        "transfers.util.read_csv", lambda name: _mock_waterlevels_df().copy()
+    )
+    estimator = MeasuringPointEstimator()
+    row = SimpleNamespace(PointID="A", MPHeight=None, MeasuringPoint=None)
+
+    mphs, _, _, _ = estimator.estimate_measuring_point_height(row)
+
+    assert mphs[0] == 1.2
+
+
+def test_measuring_point_estimator_keeps_explicit_height_unrounded(monkeypatch):
+    monkeypatch.setattr(
+        "transfers.util.read_csv", lambda name: _mock_waterlevels_df().copy()
+    )
+    estimator = MeasuringPointEstimator()
+    row = SimpleNamespace(PointID="A", MPHeight=1.234, MeasuringPoint="top of casing")
+
+    mphs, _, _, _ = estimator.estimate_measuring_point_height(row)
+
+    assert mphs == [1.234]
+
+
 def _mock_waterlevels_df():
     return pd.DataFrame(
         {
@@ -63,7 +87,7 @@ def _mock_waterlevels_df():
                 "2024-01-01",
                 "2023-12-01",
             ],
-            "DepthToWater": [10.0, 11.0, 5.0],
+            "DepthToWater": [10.0, 11.234, 5.0],
             "DepthToWaterBGS": [9.0, 10.0, 4.5],
         }
     )
diff --git a/tests/unit/test_contact_transfer_email_utils.py b/tests/unit/test_contact_transfer_email_utils.py
new file mode 100644
index 00000000..65ab9d03
--- /dev/null
+++ b/tests/unit/test_contact_transfer_email_utils.py
@@ -0,0 +1,19 @@
+from transfers.contact_transfer import _looks_like_phone_in_email_field, _make_email
+
+
+def test_make_email_strips_email_prefix_and_trailing_punctuation():
+    email = _make_email(
+        "first",
+        "owner",
+        email="Email: dlglnd@verizon.net.",
+        email_type="Primary",
+        release_status="private",
+    )
+    assert email is not None
+    assert email.email == "dlglnd@verizon.net"
+
+
+def test_phone_like_email_field_detection():
+    assert _looks_like_phone_in_email_field("(505)-470-5877") is True
+    assert _looks_like_phone_in_email_field("(505) 259-1757") is True
+    assert _looks_like_phone_in_email_field("francisco_rael@hotmail.com") is False
diff --git a/transfers/README.md b/transfers/README.md
new file mode 100644
index 00000000..48a5743a
--- /dev/null
+++ b/transfers/README.md
@@ -0,0 +1,27 @@
+# Transfers
+
+This directory contains legacy-to-target ETL transfer logic.
+
+## Main orchestration
+
+- `transfers/transfer.py`
+
+## Important supporting modules
+
+- `transfers/transferer.py`: base transfer patterns
+- `transfers/util.py`: shared parsing/mapping helpers
+- `transfers/logger.py`: transfer logging
+- `transfers/metrics.py`: metrics capture
+
+## Performance rules
+
+For high-volume tables, prefer Core batch inserts:
+
+- `session.execute(insert(Model), rows)`
+
+Avoid ORM-heavy per-row object construction for bulk workloads.
+
+## Outputs
+
+- Logs: `transfers/logs/`
+- Metrics: `transfers/metrics/`
diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py
index dc649fc0..1e99d88b 100644
--- a/transfers/contact_transfer.py
+++ b/transfers/contact_transfer.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ===============================================================================
 import json
+import re
 
 import pandas as pd
 from pandas import DataFrame
@@ -93,7 +94,26 @@ def __init__(self, *args, **kw):
             )
             self._ownerkey_mapper = {}
 
-        self._added = []
+        self._added: set[tuple[str | None, str | None]] = set()
+        self._contact_by_owner_type: dict[tuple[str, str], Contact] = {}
+        self._contact_by_name_org: dict[tuple[str | None, str | None], Contact] = {}
+        self._commit_step = 500
+
+    def _build_contact_caches(self, session: Session) -> None:
+        contacts = session.query(Contact).all()
+        owner_type: dict[tuple[str, str], Contact] = {}
+        name_org: dict[tuple[str | None, str | None], Contact] = {}
+        for contact in contacts:
+            if contact.nma_pk_owners and contact.contact_type:
+                owner_type[(contact.nma_pk_owners, contact.contact_type)] = contact
+            name_org[(contact.name, contact.organization)] = contact
+        self._contact_by_owner_type = owner_type
+        self._contact_by_name_org = name_org
+        logger.info(
+            "Built contact caches: owner_type=%s name_org=%s",
+            len(self._contact_by_owner_type),
+            len(self._contact_by_name_org),
+        )
 
     def calculate_missing_organizations(self):
         input_df, cleaned_df = self._get_dfs()
@@ -184,6 +204,47 @@ def _get_dfs(self):
     def _get_prepped_group(self, group) -> DataFrame:
         return group.sort_values(by=["PointID"])
 
+    def _transfer_hook(self, session: Session):
+        self._build_contact_caches(session)
+
+        groups = self._get_group()
+        pointids = [
+            idx[0] if isinstance(idx, tuple) else idx for idx in groups.groups.keys()
+        ]
+        things = session.query(Thing).filter(Thing.name.in_(pointids)).all()
+        thing_by_name = {thing.name: thing for thing in things}
+        logger.info(
+            "Prepared ContactTransfer caches: %s grouped PointIDs, %s matching Things",
+            len(pointids),
+            len(thing_by_name),
+        )
+
+        processed_groups = 0
+        for index, group in groups:
+            pointid = index[0] if isinstance(index, tuple) else index
+            db_item = thing_by_name.get(pointid)
+            if db_item is None:
+                logger.warning(f"Thing with PointID {pointid} not found in database.")
+                continue
+
+            prepped_group = self._get_prepped_group(group)
+            for row in prepped_group.itertuples():
+                try:
+                    self._group_step(session, row, db_item)
+                except Exception as e:
+                    logger.critical(
+                        f"Could not add contact(s) for PointID {pointid}: {e}"
+                    )
+                    self._capture_error(pointid, str(e), "UnknownField")
+
+            processed_groups += 1
+            if processed_groups % self._commit_step == 0:
+                session.commit()
+                logger.info(
+                    "Committed ContactTransfer progress: %s groups processed",
+                    processed_groups,
+                )
+
     def _group_step(self, session: Session, row: pd.Series, db_item: Base):
         organization = _get_organization(row, self._co_to_org_mapper)
         for adder, tag in (_add_first_contact, "first"), (
@@ -197,6 +258,8 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
                     db_item,
                     organization,
                     self._added,
+                    self._contact_by_owner_type,
+                    self._contact_by_name_org,
                 )
                 if contact is not None:
                     session.flush([contact])
@@ -209,7 +272,6 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
                 ):
                     note = contact.add_note(row.OwnerComment, "OwnerComment")
                     session.add(note)
-                session.commit()
                 logger.info(f"added {tag} contact for PointID {row.PointID}")
             except ValidationError as e:
                 logger.critical(
@@ -225,14 +287,26 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
 
 
 def _add_first_contact(
-    session: Session, row: pd.Series, thing: Thing, organization: str, added: list
+    session: Session,
+    row: pd.Series,
+    thing: Thing,
+    organization: str,
+    added: set[tuple[str | None, str | None]],
+    contact_by_owner_type: dict[tuple[str, str], Contact],
+    contact_by_name_org: dict[tuple[str | None, str | None], Contact],
 ) -> Contact | None:
     # TODO: extract role from OwnerComment
     # role = extract_owner_role(row.OwnerComment)
     role = "Owner"
     release_status = "private"
 
-    name = _safe_make_name(row.FirstName, row.LastName, row.OwnerKey, organization)
+    name = _safe_make_name(
+        row.FirstName,
+        row.LastName,
+        row.OwnerKey,
+        organization,
+        fallback_suffix="primary",
+    )
 
     contact_data = {
         "thing_id": thing.id,
@@ -247,23 +321,47 @@ def _add_first_contact(
         "phones": [],
     }
 
-    contact, new = _make_contact_and_assoc(session, contact_data, thing, added)
+    contact, new = _make_contact_and_assoc(
+        session,
+        contact_data,
+        thing,
+        added,
+        contact_by_owner_type,
+        contact_by_name_org,
+    )
 
     if not new:
         return None
-    else:
-        added.append((name, organization))
 
     if row.Email:
-        email = _make_email(
-            "first",
-            row.OwnerKey,
-            email=row.Email.strip(),
-            email_type="Primary",
-            release_status=release_status,
-        )
-        if email:
-            contact.emails.append(email)
+        raw_email = str(row.Email).strip()
+        if _looks_like_phone_in_email_field(raw_email):
+            logger.warning(
+                "first '%s' Email field looked like a phone number; storing as phone instead.",
+                row.OwnerKey,
+            )
+            phone, complete = _make_phone(
+                "first",
+                row.OwnerKey,
+                phone_number=raw_email,
+                phone_type="Primary",
+                release_status=release_status,
+            )
+            if phone:
+                if complete:
+                    contact.phones.append(phone)
+                else:
+                    contact.incomplete_nma_phones.append(phone)
+        else:
+            email = _make_email(
+                "first",
+                row.OwnerKey,
+                email=raw_email,
+                email_type="Primary",
+                release_status=release_status,
+            )
+            if email:
+                contact.emails.append(email)
 
     if row.Phone:
         phone, complete = _make_phone(
@@ -327,20 +425,33 @@ def _add_first_contact(
 
 
 def _safe_make_name(
-    first: str | None, last: str | None, ownerkey: str, organization: str | None
+    first: str | None,
+    last: str | None,
+    ownerkey: str,
+    organization: str | None,
+    fallback_suffix: str | None = None,
 ) -> str | None:
     name = _make_name(first, last)
     if name is None and organization is None:
+        fallback = str(ownerkey) if ownerkey is not None else None
+        if fallback and fallback_suffix:
+            fallback = f"{fallback}-{fallback_suffix}"
         logger.warning(
             f"Missing both first and last name and organization for OwnerKey {ownerkey}; "
-            f"using OwnerKey as fallback name."
+            f"using OwnerKey fallback name '{fallback}'."
         )
-        return ownerkey
+        return fallback
     return name
 
 
 def _add_second_contact(
-    session: Session, row: pd.Series, thing: Thing, organization: str, added: list
+    session: Session,
+    row: pd.Series,
+    thing: Thing,
+    organization: str,
+    added: set[tuple[str | None, str | None]],
+    contact_by_owner_type: dict[tuple[str, str], Contact],
+    contact_by_name_org: dict[tuple[str | None, str | None], Contact],
 ) -> None:
     if all(
         [
@@ -352,7 +463,13 @@ def _add_second_contact(
         return
 
     release_status = "private"
-    name = _make_name(row.SecondFirstName, row.SecondLastName)
+    name = _safe_make_name(
+        row.SecondFirstName,
+        row.SecondLastName,
+        row.OwnerKey,
+        organization,
+        fallback_suffix="secondary",
+    )
 
     contact_data = {
         "thing_id": thing.id,
@@ -367,22 +484,46 @@ def _add_second_contact(
         "phones": [],
     }
 
-    contact, new = _make_contact_and_assoc(session, contact_data, thing, added)
+    contact, new = _make_contact_and_assoc(
+        session,
+        contact_data,
+        thing,
+        added,
+        contact_by_owner_type,
+        contact_by_name_org,
+    )
     if not new:
         return
-    else:
-        added.append((name, organization))
 
     if row.SecondCtctEmail:
-        email = _make_email(
-            "second",
-            row.OwnerKey,
-            email=row.SecondCtctEmail,
-            email_type="Primary",
-            release_status=release_status,
-        )
-        if email:
-            contact.emails.append(email)
+        raw_email = str(row.SecondCtctEmail).strip()
+        if _looks_like_phone_in_email_field(raw_email):
+            logger.warning(
+                "second '%s' Email field looked like a phone number; storing as phone instead.",
+                row.OwnerKey,
+            )
+            phone, complete = _make_phone(
+                "second",
+                row.OwnerKey,
+                phone_number=raw_email,
+                phone_type="Primary",
+                release_status=release_status,
+            )
+            if phone:
+                if complete:
+                    contact.phones.append(phone)
+                else:
+                    contact.incomplete_nma_phones.append(phone)
+        else:
+            email = _make_email(
+                "second",
+                row.OwnerKey,
+                email=raw_email,
+                email_type="Primary",
+                release_status=release_status,
+            )
+            if email:
+                contact.emails.append(email)
 
     if row.SecondCtctPhone:
         phone, complete = _make_phone(
@@ -428,7 +569,12 @@ def _make_email(first_second: str, ownerkey: str, **kw) -> Email | None:
 
     try:
         if "email" in kw:
-            kw["email"] = kw["email"].strip()
+            email = kw["email"].strip()
+            # Normalize legacy values like "Email: user@example.com"
+            email = re.sub(r"^\s*email\s*:\s*", "", email, flags=re.IGNORECASE)
+            # Normalize trailing punctuation from data-entry notes (e.g., "user@aol.com.")
+            email = re.sub(r"[.,;:]+$", "", email)
+            kw["email"] = email
 
         email = CreateEmail(**kw)
         return Email(**email.model_dump())
@@ -438,6 +584,21 @@ def _make_email(first_second: str, ownerkey: str, **kw) -> Email | None:
         )
 
 
+def _looks_like_phone_in_email_field(value: str | None) -> bool:
+    if not value:
+        return False
+
+    text = value.strip()
+    if "@" in text:
+        return False
+
+    # Accept common phone formatting chars, require enough digits to be a phone number.
+    if not re.fullmatch(r"[\d\s().+\-]+", text):
+        return False
+    digits = re.sub(r"\D", "", text)
+    return len(digits) >= 7
+
+
 def _make_phone(first_second: str, ownerkey: str, **kw) -> tuple[Phone | None, bool]:
     from schemas.contact import CreatePhone
 
@@ -473,41 +634,40 @@ def _make_address(first_second: str, ownerkey: str, kind: str, **kw) -> Address
 
 
 def _make_contact_and_assoc(
-    session: Session, data: dict, thing: Thing, added: list
+    session: Session,
+    data: dict,
+    thing: Thing,
+    added: set[tuple[str | None, str | None]],
+    contact_by_owner_type: dict[tuple[str, str], Contact],
+    contact_by_name_org: dict[tuple[str | None, str | None], Contact],
 ) -> tuple[Contact, bool]:
     new_contact = True
     contact = None
 
-    # Prefer OwnerKey-based dedupe so fallback names don't split the same owner
-    # into multiple contacts when some rows have real names and others do not.
     owner_key = data.get("nma_pk_owners")
     contact_type = data.get("contact_type")
     if owner_key and contact_type:
-        contact = (
-            session.query(Contact)
-            .filter_by(nma_pk_owners=owner_key, contact_type=contact_type)
-            .first()
-        )
+        contact = contact_by_owner_type.get((owner_key, contact_type))
         if contact is not None:
             new_contact = False
 
-    if contact is None and (data["name"], data["organization"]) in added:
-        contact = (
-            session.query(Contact)
-            .filter_by(name=data["name"], organization=data["organization"])
-            .first()
-        )
+    name_org_key = (data["name"], data["organization"])
+    if contact is None and name_org_key in added:
+        contact = contact_by_name_org.get(name_org_key)
         if contact is not None:
             new_contact = False
 
     if contact is None:
-
         from schemas.contact import CreateContact
 
         contact = CreateContact(**data)
         contact_data = contact.model_dump(exclude=["thing_id", "notes"])
         contact = Contact(**contact_data)
         session.add(contact)
+        if owner_key and contact_type:
+            contact_by_owner_type[(owner_key, contact_type)] = contact
+        contact_by_name_org[name_org_key] = contact
+        added.add(name_org_key)
 
     assoc = ThingContactAssociation()
     assoc.thing = thing
diff --git a/transfers/geologic_formation_transfer.py b/transfers/geologic_formation_transfer.py
index 4b8250c7..9d633682 100644
--- a/transfers/geologic_formation_transfer.py
+++ b/transfers/geologic_formation_transfer.py
@@ -1,6 +1,5 @@
-import time
-
 from pydantic import ValidationError
+from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.orm import Session
 
 from db import GeologicFormation
@@ -27,12 +26,13 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
     # 2. Replace NaNs with None
     cleaned_df = replace_nans(input_df)
 
+    if limit is not None:
+        cleaned_df = cleaned_df.head(limit)
+
     # 3. Initialize tracking variables for logging
     n = len(cleaned_df)
-    step = 25
-    start_time = time.time()
     errors = []
-    created_count = 0
+    prepared_count = 0
     skipped_count = 0
 
     logger.info(
@@ -40,46 +40,34 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
         n,
     )
 
-    # 4. Process each row
-    for i, row in enumerate(cleaned_df.itertuples()):
-        # Log progress every 'step' rows
-        if i and not i % step:
-            logger.info(
-                f"Processing row {i} of {n}. Avg rows per second: {step / (time.time() - start_time):.2f}"
-            )
-            start_time = time.time()
+    # 4. Build a deduplicated, validated payload for a set-based insert.
+    rows_to_insert: list[dict] = []
+    seen_codes: set[str] = set()
+    for i, row in enumerate(cleaned_df.itertuples(index=False), start=1):
+        if i % 1000 == 0:
+            logger.info("Prepared %s/%s geologic formation rows", i, n)
 
-            # Commit progress periodically
-            try:
-                session.commit()
-            except Exception as e:
-                logger.critical(f"Error committing geologic formations: {e}")
-                session.rollback()
-                continue
+        # 5. Extract and normalize formation code
+        formation_code = getattr(row, "Code", None)
 
-        # 5. Extract formation code and description
-        formation_code = row.Code
+        if not formation_code:
+            logger.warning("Skipping row %s: Missing formation code", i)
+            skipped_count += 1
+            continue
 
+        formation_code = str(formation_code).strip().upper()
         if not formation_code:
-            logger.warning(f"Skipping row {i}: Missing formation code")
+            logger.warning("Skipping row %s: Blank formation code", i)
+            skipped_count += 1
+            continue
+
+        if formation_code in seen_codes:
+            # Duplicate code in source payload; keep first one only.
             skipped_count += 1
             continue
+        seen_codes.add(formation_code)
 
-        # Check if this formation already exists
-        # existing = (
-        #     session.query(GeologicFormation)
-        #     .filter(GeologicFormation.formation_code == formation_code)
-        #     .first()
-        # )
-        #
-        # if existing:
-        #     logger.info(
-        #         f"Skipping row {i}: Formation code {formation_code} already exists"
-        #     )
-        #     skipped_count += 1
-        #     continue
-
-        # 6. Prepare data for creation
+        # 6. Validate and prepare payload
         # Note: We only store the formation_code. Formation names will be mapped by the API using a
         # formations.json file from authoritative sources (e.g., USGS).
         # The description field is left as None and can be populated later if needed.
@@ -105,33 +93,30 @@ def transfer_geologic_formations(session: Session, limit: int = None) -> tuple:
             logger.critical(f"Error preparing data for {formation_code}: {e}")
             continue
 
-        # 7. Create database object
-        geologic_formation = None
-        try:
-            formation_data = data.model_dump()
-            geologic_formation = GeologicFormation(**formation_data)
-            session.add(geologic_formation)
-            created_count += 1
+        rows_to_insert.append(data.model_dump())
+        prepared_count += 1
 
-            logger.info(
-                f"Created geologic formation: {geologic_formation.formation_code}"
-            )
-
-        except Exception as e:
-            if geologic_formation is not None:
-                session.expunge(geologic_formation)
-            errors.append({"code": formation_code, "error": str(e)})
-            logger.critical(
-                f"Error creating geologic formation for {formation_code}: {e}"
+    # 7. Bulk insert with idempotent upsert semantics.
+    created_count = 0
+    try:
+        if rows_to_insert:
+            stmt = (
+                pg_insert(GeologicFormation)
+                .values(rows_to_insert)
+                .on_conflict_do_nothing(index_elements=["formation_code"])
+                .returning(GeologicFormation.formation_code)
             )
-            continue
+            inserted_codes = session.execute(stmt).scalars().all()
+            created_count = len(inserted_codes)
 
-    # 8. Final commit
-    try:
         session.commit()
         logger.info(
-            f"Successfully transferred {created_count} geologic formations, skipped {skipped_count}. "
-            f"Note: lithology is None and will be updated during stratigraphy transfer."
+            "Successfully transferred geologic formations. prepared=%s created=%s skipped=%s "
+            "existing_or_duplicate=%s. Note: lithology is None and will be updated during stratigraphy transfer.",
+            prepared_count,
+            created_count,
+            skipped_count,
+            max(prepared_count - created_count, 0),
         )
     except Exception as e:
         logger.critical(f"Error during final commit of geologic formations: {e}")
diff --git a/transfers/link_ids_transfer.py b/transfers/link_ids_transfer.py
index c32fd0b8..462f6de7 100644
--- a/transfers/link_ids_transfer.py
+++ b/transfers/link_ids_transfer.py
@@ -16,8 +16,10 @@
 import re
 
 import pandas as pd
+from sqlalchemy import insert
 
 from db import Thing, ThingIdLink
+from transfers.transferer import chunk_by_size
 from transfers.util import (
     filter_to_valid_point_ids,
     logger,
@@ -31,47 +33,78 @@
 class LinkIdsWellDataTransferer(WellChunkTransferer):
     source_table = "WellData"
     source_dtypes = {"OSEWellID": str, "OSEWelltagID": str}
+    _ose_wellid_regex = re.compile(r"^[A-Z]{1,3}-\d{3,6}$")
+
+    def _transfer_hook(self, session):
+        df = self._get_df_to_iterate()
+        for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
+            thing_id_by_pointid = {
+                name: thing_id
+                for name, thing_id in session.query(Thing.name, Thing.id)
+                .filter(Thing.name.in_(chunk.PointID.tolist()))
+                .all()
+            }
+            logger.info(
+                "Processing LinkIdsWellData chunk %s, %s rows, %s db items",
+                ci,
+                len(chunk),
+                len(thing_id_by_pointid),
+            )
 
-    def _chunk_step(self, session, dr, i, row, db_item):
-        if pd.isna(row.OSEWellID) and pd.isna(row.OSEWelltagID):
-            return
-
-        for aid, klass, regex in (
-            (row.OSEWellID, "OSEPOD", r"^[A-Z]{1,3}-\d{3,6}"),
-            (
-                row.OSEWelltagID,
-                "OSEWellTagID",
-                r"",
-            ),  # TODO: need to figure out regex for this field
-        ):
-            if pd.isna(aid):
-                # logger.warning(f"{klass} is null for {row.PointID}")
-                continue
-
-            # RULE: exclude any id that == 'X', '?'
-            if aid.strip().lower() in ("x", "?", "exempt"):
-                logger.critical(
-                    f'{klass} is "X", "?", or "exempt", id={aid} for {row.PointID}'
-                )
-                continue
-
-            if regex and not re.match(regex, aid):
-                logger.critical(
-                    f"{klass} id does not match regex {regex}, id={aid} for {row.PointID}"
-                )
-                continue
-
-            # TODO: add guards for null values
-            link_id = ThingIdLink()
-            link_id.thing = db_item
-            link_id.relation = klass
-            link_id.alternate_id = aid
-            link_id.alternate_organization = "NMOSE"
-
-            # does link_id need a class  e.g.
-            # link_id.alternate_id_class = klass
-
-            session.add(link_id)
+            rows_to_insert: list[dict] = []
+            for row in chunk.itertuples(index=False):
+                thing_id = thing_id_by_pointid.get(row.PointID)
+                if thing_id is None:
+                    self._missing_db_item_warning(row)
+                    continue
+
+                if pd.isna(row.OSEWellID) and pd.isna(row.OSEWelltagID):
+                    continue
+
+                for aid, relation, regex in (
+                    (row.OSEWellID, "OSEPOD", self._ose_wellid_regex),
+                    (row.OSEWelltagID, "OSEWellTagID", None),
+                ):
+                    if pd.isna(aid):
+                        continue
+
+                    aid_text = str(aid).strip()
+                    if not aid_text:
+                        continue
+
+                    # RULE: exclude any id that == 'X', '?', or 'exempt'
+                    if aid_text.casefold() in ("x", "?", "exempt"):
+                        logger.critical(
+                            '%s is "X", "?", or "exempt", id=%s for %s',
+                            relation,
+                            aid_text,
+                            row.PointID,
+                        )
+                        continue
+
+                    if regex and not regex.match(aid_text):
+                        logger.critical(
+                            "%s id does not match regex %s, id=%s for %s",
+                            relation,
+                            regex.pattern,
+                            aid_text,
+                            row.PointID,
+                        )
+                        continue
+
+                    rows_to_insert.append(
+                        {
+                            "thing_id": thing_id,
+                            "relation": relation,
+                            "alternate_id": aid_text,
+                            "alternate_organization": "NMOSE",
+                        }
+                    )
+
+            if rows_to_insert:
+                session.execute(insert(ThingIdLink), rows_to_insert)
+            session.commit()
+            session.expunge_all()
 
 
 class LinkIdsLocationDataTransferer(WellChunkTransferer):
@@ -105,31 +138,65 @@ def _get_dfs(self):
         cleaned_df = filter_to_valid_point_ids(ldf)
         return input_df, cleaned_df
 
+    def _transfer_hook(self, session):
+        df = self._get_df_to_iterate()
+        for ci, chunk in enumerate(chunk_by_size(df, self.chunk_size)):
+            thing_id_by_pointid = {
+                name: thing_id
+                for name, thing_id in session.query(Thing.name, Thing.id)
+                .filter(Thing.name.in_(chunk.PointID.tolist()))
+                .all()
+            }
+            logger.info(
+                "Processing LinkIdsLocationData chunk %s, %s rows, %s db items",
+                ci,
+                len(chunk),
+                len(thing_id_by_pointid),
+            )
+
+            rows_to_insert: list[dict] = []
+            for row in chunk.itertuples(index=False):
+                thing_id = thing_id_by_pointid.get(row.PointID)
+                if thing_id is None:
+                    self._missing_db_item_warning(row)
+                    continue
+
+                for func in (
+                    self._add_link_alternate_site_id,
+                    self._add_link_site_id,
+                    self._add_link_plss,
+                ):
+                    link_row = func(row, thing_id)
+                    if link_row:
+                        rows_to_insert.append(link_row)
+
+            if rows_to_insert:
+                session.execute(insert(ThingIdLink), rows_to_insert)
+            session.commit()
+            session.expunge_all()
+
     def _chunk_step(self, session, df, i, row, db_item):
-        logger.info(
-            f"Processing PointID: {row.PointID}, "
-            f"Thing ID: {db_item.id}, "
-            f"AlternateSiteID={row.AlternateSiteID}, "
-            f"AlternateSiteID2={row.AlternateSiteID2}"
-        )
+        # Kept for compatibility; bulk path uses _transfer_hook.
         for func in (
             self._add_link_alternate_site_id,
             self._add_link_site_id,
             self._add_link_plss,
         ):
-            link = func(row, db_item)
+            link = func(row, db_item.id)
             if link:
-                session.add(link)
+                session.execute(insert(ThingIdLink), [link])
 
-    def _add_link_alternate_site_id(self, row: pd.Series, thing: Thing):
+    def _add_link_alternate_site_id(self, row: pd.Series, thing_id: int):
         if not row.AlternateSiteID:
             return
 
         return _make_thing_id_link(
-            thing, row.AlternateSiteID, extract_organization(str(row.AlternateSiteID))
+            thing_id,
+            row.AlternateSiteID,
+            extract_organization(str(row.AlternateSiteID)),
         )
 
-    def _add_link_site_id(self, row, thing):
+    def _add_link_site_id(self, row, thing_id: int):
         if not row.SiteID:
             return
 
@@ -143,9 +210,9 @@ def _add_link_site_id(self, row, thing):
             )
             return
 
-        return _make_thing_id_link(thing, row.SiteID, "USGS")
+        return _make_thing_id_link(thing_id, row.SiteID, "USGS")
 
-    def _add_link_plss(self, row, thing):
+    def _add_link_plss(self, row, thing_id: int):
         township = row.Township
         township_direction = row.TownshipDirection
         _range = row.Range
@@ -167,18 +234,18 @@ def _add_link_plss(self, row, thing):
             logger.critical(f"alternate id {alternate_id} is not a valid PLSS")
             return
 
-        return _make_thing_id_link(thing, alternate_id, "PLSS")
+        return _make_thing_id_link(thing_id, alternate_id, "PLSS")
 
 
 def _make_thing_id_link(
-    thing, alternate_id, alternate_organization, relation="same_as"
+    thing_id: int, alternate_id, alternate_organization, relation="same_as"
 ):
-    return ThingIdLink(
-        thing=thing,
-        relation=relation,
-        alternate_id=alternate_id,
-        alternate_organization=alternate_organization,
-    )
+    return {
+        "thing_id": thing_id,
+        "relation": relation,
+        "alternate_id": alternate_id,
+        "alternate_organization": alternate_organization,
+    }
 
 
 # ============= EOF =============================================
diff --git a/transfers/logger.py b/transfers/logger.py
index decf34d0..57a78f8f 100644
--- a/transfers/logger.py
+++ b/transfers/logger.py
@@ -21,14 +21,20 @@
 
 from services.gcs_helper import get_storage_bucket
 
-root = Path("logs")
-if not os.getcwd().endswith("transfers"):
-    root = Path("transfers") / root
+_context = os.environ.get("OCO_LOG_CONTEXT", "transfer").strip().lower() or "transfer"
 
-if not os.path.exists(root):
-    os.mkdir(root)
+if _context == "cli":
+    root = Path("cli") / "logs"
+    _prefix = "cli"
+else:
+    root = Path("logs")
+    if not os.getcwd().endswith("transfers"):
+        root = Path("transfers") / root
+    _prefix = "transfer"
 
-log_filename = f"transfer_{datetime.now():%Y-%m-%dT%H_%M_%S}.log"
+root.mkdir(parents=True, exist_ok=True)
+
+log_filename = f"{_prefix}_{datetime.now():%Y-%m-%dT%H_%M_%S}.log"
 log_path = root / log_filename
 
 
@@ -53,9 +59,10 @@
 
 def save_log_to_bucket():
     bucket = get_storage_bucket()
-    blob = bucket.blob(f"transfer_logs/{log_filename}")
+    bucket_folder = "transfer_logs" if _context != "cli" else "cli_logs"
+    blob = bucket.blob(f"{bucket_folder}/{log_filename}")
     blob.upload_from_filename(log_path)
-    logger.info(f"Uploaded log to gs://{bucket.name}/transfer_logs/{log_filename}")
+    logger.info(f"Uploaded log to gs://{bucket.name}/{bucket_folder}/{log_filename}")
 
 
 # ============= EOF =============================================
diff --git a/transfers/relaxed_constraints.md b/transfers/relaxed_constraints.md
new file mode 100644
index 00000000..1ab097a0
--- /dev/null
+++ b/transfers/relaxed_constraints.md
@@ -0,0 +1,10 @@
+Address.postal_code is nullable
+Thing measuring_point_height is nullable
+ValidateWell, depth validation removed
+Deployment.installation_date is nullable
+CreateWellScreen depth validation removed
+FieldEventParticipants not required
+screen_depth_bottom is nullable
+screen_depth_top is nullable
+city nullable
+state nullable
\ No newline at end of file
diff --git a/transfers/sensor_transfer.py b/transfers/sensor_transfer.py
index 61aea732..a1c65b27 100644
--- a/transfers/sensor_transfer.py
+++ b/transfers/sensor_transfer.py
@@ -166,16 +166,10 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
             estimator = self._get_estimator(sensor_type)
             installation_date = estimator.estimate_installation_date(row)
             if not installation_date:
-                logger.critical(
-                    f"Installation Date cannot be None. Skipping deployment. Sensor: {row.ID}, "
-                    f"SerialNo: {row.SerialNo} PointID: {pointid}"
-                )
-                self._capture_error(
-                    pointid,
-                    f"row.SerialNo={row.SerialNo}. Installation Date cannot be None",
-                    "DateInstalled",
+                logger.warning(
+                    f"Installation Date is None. Proceeding with NULL deployment installation date. "
+                    f"Sensor: {row.ID}, SerialNo: {row.SerialNo} PointID: {pointid}"
                 )
-                return
             else:
                 logger.warning(
                     f"Estimated installation date={installation_date} for {pointid}"
@@ -204,10 +198,6 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
 
             if recording_interval is not None:
                 recording_interval_unit = unit
-                logger.info(
-                    f"name={sensor.name}, serial_no={sensor.serial_no}. "
-                    f"estimated recording interval: {recording_interval} {unit}"
-                )
                 self._capture_error(
                     pointid,
                     f"Estimated recording interval={recording_interval} {unit}. Is this correct?",
@@ -215,10 +205,6 @@ def _group_step(self, session: Session, row: pd.Series, db_item: Base):
                 )
 
             else:
-                logger.critical(
-                    f"name={sensor.name}, serial_no={sensor.serial_no} error={error}"
-                )
-
                 self._capture_error(
                     pointid,
                     f"name={sensor.name}, row.SerialNo={row.SerialNo}. "
diff --git a/transfers/thing_transfer.py b/transfers/thing_transfer.py
index 6c78cc8e..e0603b8a 100644
--- a/transfers/thing_transfer.py
+++ b/transfers/thing_transfer.py
@@ -14,13 +14,15 @@
 # limitations under the License.
 # ===============================================================================
 import time
+from threading import Lock
+from types import SimpleNamespace
 
 from pandas import isna
 from pydantic import ValidationError
+from sqlalchemy import insert
 from sqlalchemy.orm import Session
 
-from db import LocationThingAssociation
-from services.thing_helper import add_thing
+from db import LocationThingAssociation, Location, Thing, Notes, DataProvenance
 from transfers.logger import logger
 from transfers.util import (
     make_location,
@@ -29,23 +31,49 @@
     replace_nans,
 )
 
+_LOCATION_DF_CACHE = None
+_LOCATION_DF_LOCK = Lock()
+
+
+def _get_location_df():
+    global _LOCATION_DF_CACHE
+    if _LOCATION_DF_CACHE is None:
+        with _LOCATION_DF_LOCK:
+            if _LOCATION_DF_CACHE is None:
+                df = read_csv("Location")
+                _LOCATION_DF_CACHE = replace_nans(df)
+    return _LOCATION_DF_CACHE
 
-def transfer_thing(session: Session, site_type: str, make_payload, limit=None) -> None:
 
-    ldf = read_csv("Location")
+def transfer_thing(session: Session, site_type: str, make_payload, limit=None) -> None:
+    ldf = _get_location_df()
     ldf = ldf[ldf["SiteType"] == site_type]
     ldf = ldf[ldf["Easting"].notna() & ldf["Northing"].notna()]
-    ldf = replace_nans(ldf)
+
+    # Pre-compute duplicate PointIDs once to avoid O(n^2) filtering in the loop.
+    duplicate_mask = ldf["PointID"].duplicated(keep=False)
+    duplicate_pointids = set(ldf.loc[duplicate_mask, "PointID"])
+    if duplicate_pointids:
+        logger.warning(
+            "Found %s duplicate PointID values for site type %s; these will be skipped.",
+            len(duplicate_pointids),
+            site_type,
+        )
+
     n = len(ldf)
     start_time = time.time()
+    batch_size = 500
 
     logger.info("Starting transfer: Things (%s) [%s rows]", site_type, n)
     cached_elevations = {}
+    prepared_rows: list[dict] = []
+    skipped_count = 0
 
-    for i, row in enumerate(ldf.itertuples()):
+    for i, row in enumerate(ldf.itertuples(index=False)):
         pointid = row.PointID
-        if ldf[ldf["PointID"] == pointid].shape[0] > 1:
-            logger.critical(f"PointID {pointid} has duplicate records. Skipping.")
+        if pointid in duplicate_pointids:
+            logger.critical("PointID %s has duplicate records. Skipping.", pointid)
+            skipped_count += 1
             continue
 
         if limit is not None and limit > 0 and i >= limit:
@@ -56,42 +84,136 @@ def transfer_thing(session: Session, site_type: str, make_payload, limit=None) -
             logger.info(
                 f"Processing row {i} of {n}. {row.PointID},  avg rows per second: {i / (time.time() - start_time):.2f}"
             )
-            session.commit()
 
         try:
             location, elevation_method, location_notes = make_location(
                 row, cached_elevations
             )
-            session.add(location)
-            session.flush()
-            for note_type, note_content in location_notes.items():
-                if not isna(note_content):
-                    location_note = location.add_note(note_content, note_type)
-                    session.add(location_note)
-
-            data_provenances = make_location_data_provenance(
-                row, location, elevation_method
-            )
-            for dp in data_provenances:
-                session.add(dp)
-
             payload = make_payload(row)
-            thing_type = payload.pop("thing_type")
-            payload["nma_pk_location"] = row.LocationId
-            thing = add_thing(session, payload, thing_type=thing_type)
-            assoc = LocationThingAssociation()
-            assoc.location = location
-            assoc.thing = thing
-            session.add(assoc)
+            prepared_rows.append(
+                {
+                    "row": row,
+                    "location_row": {
+                        "nma_pk_location": location.nma_pk_location,
+                        "description": location.description,
+                        "point": location.point,
+                        "elevation": location.elevation,
+                        "release_status": location.release_status,
+                        "nma_date_created": location.nma_date_created,
+                        "nma_site_date": location.nma_site_date,
+                        "nma_location_notes": location.nma_location_notes,
+                        "nma_coordinate_notes": location.nma_coordinate_notes,
+                        "nma_data_reliability": location.nma_data_reliability,
+                    },
+                    "location_notes": location_notes,
+                    "elevation_method": elevation_method,
+                    "thing_row": {
+                        "name": payload["name"],
+                        "thing_type": payload["thing_type"],
+                        "release_status": payload["release_status"],
+                        "nma_pk_location": row.LocationId,
+                    },
+                }
+            )
         except ValidationError as e:
             logger.critical(
                 f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
             )
+            skipped_count += 1
         except Exception as e:
             logger.critical(f"Error creating location for {row.PointID}: {e}")
+            skipped_count += 1
+            continue
+
+    created_count = 0
+    for start in range(0, len(prepared_rows), batch_size):
+        chunk = prepared_rows[start : start + batch_size]
+        if not chunk:
             continue
 
+        location_rows = [item["location_row"] for item in chunk]
+        inserted_locations = session.execute(
+            insert(Location).returning(Location.id, Location.nma_pk_location),
+            location_rows,
+        ).all()
+        location_id_by_nma_pk = {
+            nma_pk: loc_id for loc_id, nma_pk in inserted_locations
+        }
+
+        thing_rows = [item["thing_row"] for item in chunk]
+        inserted_things = session.execute(
+            insert(Thing).returning(Thing.id, Thing.nma_pk_location),
+            thing_rows,
+        ).all()
+        thing_id_by_nma_pk = {nma_pk: thing_id for thing_id, nma_pk in inserted_things}
+
+        notes_rows: list[dict] = []
+        provenance_rows: list[dict] = []
+        assoc_rows: list[dict] = []
+
+        for item in chunk:
+            nma_pk_location = item["thing_row"]["nma_pk_location"]
+            location_id = location_id_by_nma_pk.get(nma_pk_location)
+            thing_id = thing_id_by_nma_pk.get(nma_pk_location)
+
+            if location_id is None or thing_id is None:
+                logger.critical(
+                    "Failed to resolve inserted IDs for nma_pk_location=%s; skipping associations",
+                    nma_pk_location,
+                )
+                skipped_count += 1
+                continue
+
+            assoc_rows.append({"location_id": location_id, "thing_id": thing_id})
+
+            for note_type, note_content in item["location_notes"].items():
+                if not isna(note_content):
+                    notes_rows.append(
+                        {
+                            "target_id": location_id,
+                            "target_table": "location",
+                            "note_type": note_type,
+                            "content": note_content,
+                            "release_status": "draft",
+                        }
+                    )
+
+            # Reuse existing provenance mapper by passing an object with .id.
+            location_stub = SimpleNamespace(id=location_id)
+            data_provenances = make_location_data_provenance(
+                item["row"], location_stub, item["elevation_method"]
+            )
+            for dp in data_provenances:
+                provenance_rows.append(
+                    {
+                        "target_id": dp.target_id,
+                        "target_table": dp.target_table,
+                        "field_name": dp.field_name,
+                        "origin_type": dp.origin_type,
+                        "origin_source": dp.origin_source,
+                        "collection_method": dp.collection_method,
+                        "accuracy_value": dp.accuracy_value,
+                        "accuracy_unit": dp.accuracy_unit,
+                        "release_status": dp.release_status or "draft",
+                    }
+                )
+
+        if notes_rows:
+            session.execute(insert(Notes), notes_rows)
+        if provenance_rows:
+            session.execute(insert(DataProvenance), provenance_rows)
+        if assoc_rows:
+            session.execute(insert(LocationThingAssociation), assoc_rows)
+            created_count += len(assoc_rows)
+
     session.commit()
+    logger.info(
+        "Things transfer summary (%s): created=%s skipped=%s total_candidates=%s",
+        site_type,
+        created_count,
+        skipped_count,
+        n,
+    )
     logger.info("Completed transfer: Things (%s)", site_type)
 
 
diff --git a/transfers/transfer_results_builder.py b/transfers/transfer_results_builder.py
index 1a2392c0..296529cd 100644
--- a/transfers/transfer_results_builder.py
+++ b/transfers/transfer_results_builder.py
@@ -7,7 +7,11 @@
 import pandas as pd
 from sqlalchemy import select, func
 
+from db import Deployment, Sensor, Thing
 from db.engine import session_ctx
+from transfers.sensor_transfer import (
+    EQUIPMENT_TO_SENSOR_TYPE_MAP,
+)
 from transfers.transfer import load_transfer_options
 from transfers.transfer_results_specs import (
     TRANSFER_COMPARISON_SPECS,
@@ -18,12 +22,26 @@
     TransferResult,
 )
 from transfers.util import (
+    SensorParameterEstimator,
     read_csv,
     replace_nans,
     get_transferable_wells,
 )
 
 
+def _model_column(model: Any, token: str) -> Any:
+    if hasattr(model, token):
+        return getattr(model, token)
+    table = model.__table__
+    if token in table.c:
+        return table.c[token]
+    token_norm = token.casefold()
+    for col in table.c:
+        if col.key.casefold() == token_norm or col.name.casefold() == token_norm:
+            return col
+    raise AttributeError(f"{model.__name__} has no column '{token}'")
+
+
 def _normalize_key(value: Any) -> str | None:
     if value is None:
         return None
@@ -57,6 +75,96 @@ def _normalized_series(df: pd.DataFrame, key_col: str) -> pd.Series:
     return s.astype(str)
 
 
+def _normalize_date_like(value: Any) -> str:
+    if value is None:
+        return ""
+    try:
+        if pd.isna(value):
+            return ""
+    except TypeError:
+        pass
+    dt = pd.to_datetime(value, errors="coerce")
+    if pd.isna(dt):
+        return ""
+    return dt.date().isoformat()
+
+
+def _parse_legacy_datetime_date(value: Any) -> str | None:
+    if value is None:
+        return None
+    try:
+        if pd.isna(value):
+            return None
+    except TypeError:
+        pass
+    text = str(value).strip()
+    if not text:
+        return None
+    try:
+        return pd.to_datetime(text, format="%Y-%m-%d %H:%M:%S.%f").date().isoformat()
+    except (TypeError, ValueError):
+        return None
+
+
+def _equipment_source_series(df: pd.DataFrame) -> pd.Series:
+    required = {"PointID", "SerialNo", "DateInstalled", "DateRemoved"}
+    if not required.issubset(df.columns):
+        return pd.Series([], dtype=object)
+
+    estimators: dict[str, SensorParameterEstimator] = {}
+    keys: list[str] = []
+    for row in df.itertuples(index=False):
+        pointid = _normalize_key(getattr(row, "PointID", None)) or ""
+        serial = _normalize_key(getattr(row, "SerialNo", None)) or ""
+
+        installed = _parse_legacy_datetime_date(getattr(row, "DateInstalled", None))
+        if installed is None:
+            equipment_type = getattr(row, "EquipmentType", None)
+            sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP.get(equipment_type)
+            if sensor_type:
+                estimator = estimators.get(sensor_type)
+                if estimator is None:
+                    estimator = SensorParameterEstimator(sensor_type)
+                    estimators[sensor_type] = estimator
+                estimated = estimator.estimate_installation_date(row)
+                installed = _normalize_date_like(estimated)
+            else:
+                installed = ""
+
+        removed = _parse_legacy_datetime_date(getattr(row, "DateRemoved", None))
+        if removed is None:
+            removed = ""
+
+        keys.append(f"{pointid}|{serial}|{installed}|{removed}")
+    return pd.Series(keys, dtype=object)
+
+
+def _equipment_destination_series(session) -> pd.Series:
+    sql = (
+        select(
+            Thing.name.label("point_id"),
+            Sensor.serial_no.label("serial_no"),
+            Deployment.installation_date.label("installed"),
+            Deployment.removal_date.label("removed"),
+        )
+        .select_from(Deployment)
+        .join(Thing, Deployment.thing_id == Thing.id)
+        .join(Sensor, Deployment.sensor_id == Sensor.id)
+        .where(Thing.name.is_not(None))
+        .where(Sensor.serial_no.is_not(None))
+    )
+    rows = session.execute(sql).all()
+    if not rows:
+        return pd.Series([], dtype=object)
+    pointid = pd.Series([_normalize_key(r.point_id) or "" for r in rows], dtype=object)
+    serial = pd.Series([_normalize_key(r.serial_no) or "" for r in rows], dtype=object)
+    installed = pd.Series(
+        [_normalize_date_like(r.installed) for r in rows], dtype=object
+    )
+    removed = pd.Series([_normalize_date_like(r.removed) for r in rows], dtype=object)
+    return pointid + "|" + serial + "|" + installed + "|" + removed
+
+
 class TransferResultsBuilder:
     """Compare transfer input CSV keys to destination database keys per transfer."""
 
@@ -87,29 +195,45 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
         elif spec.transfer_name == "WellData":
             comparison_df = self._agreed_welldata_df()
 
-        source_series = _normalized_series(comparison_df, spec.source_key_column)
+        if spec.transfer_name == "Equipment":
+            source_series = _equipment_source_series(comparison_df)
+        else:
+            source_series = _normalized_series(comparison_df, spec.source_key_column)
         source_keys = set(source_series.unique().tolist())
         source_keyed_row_count = int(source_series.shape[0])
         source_duplicate_key_row_count = source_keyed_row_count - len(source_keys)
         agreed_transfer_row_count = int(len(comparison_df))
 
         model = spec.destination_model
-        key_col = getattr(model, spec.destination_key_column)
+        destination_model_name = model.__name__
+        destination_key_column = spec.destination_key_column
         with session_ctx() as session:
-            key_sql = select(key_col).where(key_col.is_not(None))
-            count_sql = select(func.count()).select_from(model)
+            if spec.transfer_name == "Equipment":
+                count_sql = select(func.count()).select_from(Deployment)
+                count_sql = count_sql.join(Thing, Deployment.thing_id == Thing.id)
+                count_sql = count_sql.join(Sensor, Deployment.sensor_id == Sensor.id)
+                count_sql = count_sql.where(Thing.name.is_not(None))
+                count_sql = count_sql.where(Sensor.serial_no.is_not(None))
+                destination_series = _equipment_destination_series(session)
+                destination_row_count = int(session.execute(count_sql).scalar_one())
+                destination_model_name = "Deployment"
+                destination_key_column = "thing.name|sensor.serial_no|deployment.installation_date|deployment.removal_date"
+            else:
+                key_col = _model_column(model, spec.destination_key_column)
+                key_sql = select(key_col).where(key_col.is_not(None))
+                count_sql = select(func.count()).select_from(model)
 
-            if spec.destination_where:
-                where_clause = spec.destination_where(model)
-                key_sql = key_sql.where(where_clause)
-                count_sql = count_sql.where(where_clause)
+                if spec.destination_where:
+                    where_clause = spec.destination_where(model)
+                    key_sql = key_sql.where(where_clause)
+                    count_sql = count_sql.where(where_clause)
 
-            raw_dest_keys = session.execute(key_sql).scalars().all()
-            destination_row_count = int(session.execute(count_sql).scalar_one())
+                raw_dest_keys = session.execute(key_sql).scalars().all()
+                destination_series = pd.Series(
+                    [_normalize_key(v) for v in raw_dest_keys], dtype=object
+                ).dropna()
+                destination_row_count = int(session.execute(count_sql).scalar_one())
 
-        destination_series = pd.Series(
-            [_normalize_key(v) for v in raw_dest_keys], dtype=object
-        ).dropna()
         if destination_series.empty:
             destination_series = pd.Series([], dtype=object)
         else:
@@ -123,13 +247,18 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
 
         missing = sorted(source_keys - destination_keys)
         extra = sorted(destination_keys - source_keys)
+        transferred_agreed_row_count = int(source_series.isin(destination_keys).sum())
+        missing_agreed_row_count = max(
+            agreed_transfer_row_count - transferred_agreed_row_count,
+            0,
+        )
 
         return spec.result_cls(
             transfer_name=spec.transfer_name,
             source_csv=spec.source_csv,
             source_key_column=spec.source_key_column,
-            destination_model=model.__name__,
-            destination_key_column=spec.destination_key_column,
+            destination_model=destination_model_name,
+            destination_key_column=destination_key_column,
             source_row_count=len(source_df),
             agreed_transfer_row_count=agreed_transfer_row_count,
             source_keyed_row_count=source_keyed_row_count,
@@ -142,6 +271,8 @@ def _build_one(self, spec: TransferComparisonSpec) -> TransferResult:
             matched_key_count=len(source_keys & destination_keys),
             missing_in_destination_count=len(missing),
             extra_in_destination_count=len(extra),
+            transferred_agreed_row_count=transferred_agreed_row_count,
+            missing_agreed_row_count=missing_agreed_row_count,
             missing_in_destination_sample=missing[: self.sample_limit],
             extra_in_destination_sample=extra[: self.sample_limit],
         )
diff --git a/transfers/transfer_results_specs.py b/transfers/transfer_results_specs.py
index 449ffa89..c117e7b3 100644
--- a/transfers/transfer_results_specs.py
+++ b/transfers/transfer_results_specs.py
@@ -37,7 +37,6 @@
 from db.engine import session_ctx
 from transfers.contact_transfer import (
     _get_organization,
-    _make_name,
     _safe_make_name,
     _select_ownerkey_col,
 )
@@ -78,9 +77,12 @@
     WellScreensTransferResult,
 )
 from transfers.util import (
+    filter_non_transferred_wells,
     filter_by_valid_measuring_agency,
     filter_to_valid_point_ids,
+    get_transferable_wells,
     get_transfers_data_path,
+    lexicon_mapper,
     read_csv,
     replace_nans,
 )
@@ -181,9 +183,87 @@ def _waterlevels_filter(df: pd.DataFrame) -> pd.DataFrame:
     cleaned_df = replace_nans(df.copy())
     cleaned_df = filter_to_valid_point_ids(cleaned_df)
     cleaned_df = filter_by_valid_measuring_agency(cleaned_df)
+
+    # Mirror WaterLevelTransferer behavior for observation creation:
+    # rows whose mapped LevelStatus indicates a destroyed well only create
+    # FieldEvent notes and intentionally do not create observations.
+    def _is_destroyed(level_status: Any) -> bool:
+        if pd.isna(level_status):
+            return False
+
+        value = level_status
+        if value == "X?":
+            value = "X"
+        mapped = lexicon_mapper.map_value(f"LU_LevelStatus:{value}")
+        return (
+            mapped
+            == "Well was destroyed (no subsequent water levels should be recorded)"
+        )
+
+    if "LevelStatus" in cleaned_df.columns:
+        cleaned_df = cleaned_df[~cleaned_df["LevelStatus"].map(_is_destroyed)]
+
+    return cleaned_df
+
+
+def _equipment_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror SensorTransferer._get_dfs filtering stage.
+    cleaned_df = df.copy()
+    cleaned_df.columns = cleaned_df.columns.str.replace(" ", "_")
+    if "SerialNo" in cleaned_df.columns:
+        cleaned_df = cleaned_df[cleaned_df["SerialNo"].notna()]
+    else:
+        return cleaned_df.iloc[0:0]
+    cleaned_df = filter_to_valid_point_ids(cleaned_df)
+    cleaned_df = replace_nans(cleaned_df)
+    return cleaned_df
+
+
+def _wellscreens_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror WellChunkTransferer._get_dfs used by WellScreenTransferer.
+    cleaned_df = replace_nans(df.copy())
+    cleaned_df = filter_to_valid_point_ids(cleaned_df)
     return cleaned_df
 
 
+def _welldata_filter(df: pd.DataFrame) -> pd.DataFrame:
+    # Mirror WellTransferer._get_dfs filtering stage.
+    if "LocationId" not in df.columns:
+        return df.iloc[0:0]
+
+    cleaned_df = df.copy()
+    ldf = read_csv("Location")
+    ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1, errors="ignore")
+    cleaned_df = cleaned_df.join(ldf.set_index("LocationId"), on="LocationId")
+
+    if "SiteType" in cleaned_df.columns:
+        cleaned_df = cleaned_df[cleaned_df["SiteType"] == "GW"]
+    else:
+        return cleaned_df.iloc[0:0]
+
+    if "Easting" in cleaned_df.columns and "Northing" in cleaned_df.columns:
+        cleaned_df = cleaned_df[
+            cleaned_df["Easting"].notna() & cleaned_df["Northing"].notna()
+        ]
+    else:
+        return cleaned_df.iloc[0:0]
+
+    cleaned_df = replace_nans(cleaned_df)
+    cleaned_df = get_transferable_wells(cleaned_df)
+    cleaned_df = filter_non_transferred_wells(cleaned_df)
+
+    if "PointID" not in cleaned_df.columns:
+        return cleaned_df.iloc[0:0]
+
+    # Match WellTransferer behavior: skip every duplicated PointID.
+    dupes = cleaned_df["PointID"].duplicated(keep=False)
+    if dupes.any():
+        dup_ids = set(cleaned_df.loc[dupes, "PointID"])
+        cleaned_df = cleaned_df[~cleaned_df["PointID"].isin(dup_ids)]
+
+    return cleaned_df.sort_values(by=["PointID"])
+
+
 def _stratigraphy_filter(df: pd.DataFrame) -> pd.DataFrame:
     # Mirror StratigraphyLegacyTransferer._get_dfs filtering stage.
     cleaned_df = replace_nans(df.copy())
@@ -379,6 +459,7 @@ def _record_new_contact(
             getattr(row, "LastName", None),
             owner_key,
             organization,
+            fallback_suffix="primary",
         )
         _record_new_contact(owner_key, "Primary", primary_name, organization)
 
@@ -391,9 +472,12 @@ def _record_new_contact(
             ]
         )
         if has_secondary_input:
-            secondary_name = _make_name(
+            secondary_name = _safe_make_name(
                 getattr(row, "SecondFirstName", None),
                 getattr(row, "SecondLastName", None),
+                owner_key,
+                organization,
+                fallback_suffix="secondary",
             )
             _record_new_contact(owner_key, "Secondary", secondary_name, organization)
 
@@ -408,6 +492,7 @@ def _record_new_contact(
         "WellID",
         Thing,
         "nma_pk_welldata",
+        agreed_filter=_welldata_filter,
         destination_where=lambda m: m.thing_type == "water well",
     ),
     TransferComparisonSpec(
@@ -417,6 +502,7 @@ def _record_new_contact(
         "GlobalID",
         WellScreen,
         "nma_pk_wellscreens",
+        agreed_filter=_wellscreens_filter,
         option_field="transfer_screens",
     ),
     TransferComparisonSpec(
@@ -447,6 +533,7 @@ def _record_new_contact(
         "GlobalID",
         Sensor,
         "nma_pk_equipment",
+        agreed_filter=_equipment_filter,
         option_field="transfer_sensors",
     ),
     TransferComparisonSpec(
diff --git a/transfers/transfer_results_types.py b/transfers/transfer_results_types.py
index dc58238a..1163a2c7 100644
--- a/transfers/transfer_results_types.py
+++ b/transfers/transfer_results_types.py
@@ -22,6 +22,8 @@ class TransferResult:
     matched_key_count: int = 0
     missing_in_destination_count: int = 0
     extra_in_destination_count: int = 0
+    transferred_agreed_row_count: int = 0
+    missing_agreed_row_count: int = 0
     missing_in_destination_sample: list[str] = field(default_factory=list)
     extra_in_destination_sample: list[str] = field(default_factory=list)
 
diff --git a/transfers/transferer.py b/transfers/transferer.py
index afef86e3..e05fd90d 100644
--- a/transfers/transferer.py
+++ b/transfers/transferer.py
@@ -329,16 +329,6 @@ def _filter_to_valid_sample_infos(self, df: pd.DataFrame) -> pd.DataFrame:
         parsed_sample_pt_ids = df["SamplePtID"].map(self._uuid_val)
         mask = parsed_sample_pt_ids.isin(valid_sample_pt_ids)
         filtered_df = df[mask].copy()
-        inverted_df = df[~mask].copy()
-        if not inverted_df.empty:
-            for _, row in inverted_df.iterrows():
-                sample_pt_id = row.get("SamplePtID")
-                self._capture_error(
-                    sample_pt_id,
-                    f"No matching ChemistrySampleInfo for SamplePtID: {sample_pt_id}",
-                    "SamplePtID",
-                )
-
         after_count = len(filtered_df)
 
         if before_count > after_count:
diff --git a/transfers/util.py b/transfers/util.py
index d358937c..5fd1a471 100644
--- a/transfers/util.py
+++ b/transfers/util.py
@@ -126,6 +126,7 @@ def estimate_measuring_point_height(
                 # try to estimate mpheight from measurements
                 for m in df.itertuples():
                     mphi = m.DepthToWater - m.DepthToWaterBGS
+                    mphi = _round_sig_figs(mphi, 2)
                     start_date = m.DateMeasured
                     if mphi not in mphs:
                         if notna(mphi):
@@ -155,6 +156,28 @@ def estimate_measuring_point_height(
         return mphs, mph_descs, start_dates, end_dates
 
 
+def _round_sig_figs(value: float, sig_figs: int) -> float:
+    if value is None:
+        return value
+    try:
+        if pd.isna(value):
+            return value
+    except TypeError:
+        pass
+
+    try:
+        numeric = float(value)
+    except (TypeError, ValueError):
+        return value
+
+    if not math.isfinite(numeric):
+        return value
+
+    if numeric == 0:
+        return 0.0
+    return round(numeric, sig_figs - int(math.floor(math.log10(abs(numeric)))) - 1)
+
+
 def _get_defined_recording_interval(pointid: str) -> tuple[int, str] | None:
     if pointid in DEFINED_RECORDING_INTERVALS:
         return DEFINED_RECORDING_INTERVALS[pointid]
diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py
index 3b664e4c..261faf53 100644
--- a/transfers/waterlevels_transfer.py
+++ b/transfers/waterlevels_transfer.py
@@ -19,6 +19,10 @@
 from typing import Any
 
 import pandas as pd
+from sqlalchemy import insert
+from sqlalchemy.exc import DatabaseError, SQLAlchemyError
+from sqlalchemy.orm import Session
+
 from db import (
     Thing,
     ThingContactAssociation,
@@ -31,9 +35,6 @@
     Parameter,
 )
 from db.engine import session_ctx
-from sqlalchemy import insert
-from sqlalchemy.exc import DatabaseError, SQLAlchemyError
-from sqlalchemy.orm import Session
 from transfers.transferer import Transferer
 from transfers.util import (
     filter_to_valid_point_ids,
@@ -149,7 +150,7 @@ def _transfer_hook(self, session: Session) -> None:
             "rows_created": 0,
             "rows_skipped_dt": 0,
             "rows_skipped_reason": 0,
-            "rows_skipped_contacts": 0,
+            "rows_missing_participants": 0,
             "rows_well_destroyed": 0,
             "field_events_created": 0,
             "field_activities_created": 0,
@@ -175,9 +176,6 @@ def _transfer_hook(self, session: Session) -> None:
             thing_id = self._thing_id_by_pointid.get(pointid)
             if thing_id is None:
                 stats["groups_skipped_missing_thing"] += 1
-                logger.warning(
-                    "Skipping PointID=%s because Thing was not found", pointid
-                )
                 self._capture_error(pointid, "Thing not found", "PointID")
                 continue
 
@@ -219,12 +217,7 @@ def _transfer_hook(self, session: Session) -> None:
                 )
 
                 if not field_event_participants:
-                    stats["rows_skipped_contacts"] += 1
-                    logger.warning(
-                        "Skipping %s because no field event participants were found",
-                        self._row_context(row),
-                    )
-                    continue
+                    stats["rows_missing_participants"] += 1
 
                 is_destroyed = (
                     glv
@@ -406,29 +399,14 @@ def _transfer_hook(self, session: Session) -> None:
                 stats["groups_processed"] += 1
             except DatabaseError as e:
                 stats["groups_failed_commit"] += 1
-                logger.exception(
-                    "Failed committing WaterLevels group for PointID=%s: %s",
-                    pointid,
-                    e,
-                )
                 session.rollback()
                 self._capture_database_error(pointid, e)
             except SQLAlchemyError as e:
                 stats["groups_failed_commit"] += 1
-                logger.exception(
-                    "SQLAlchemy failure committing WaterLevels group for PointID=%s: %s",
-                    pointid,
-                    e,
-                )
                 session.rollback()
-                self._capture_error(pointid, str(e), "UnknownField")
+                self._capture_error(pointid, str(e), "SQLAlchemyError")
             except Exception as e:
                 stats["groups_failed_commit"] += 1
-                logger.exception(
-                    "Unexpected failure committing WaterLevels group for PointID=%s: %s",
-                    pointid,
-                    e,
-                )
                 session.rollback()
                 self._capture_error(pointid, str(e), "UnknownField")
 
@@ -673,9 +651,9 @@ def _get_field_event_participants(self, session, row) -> list[Contact]:
                     self._last_contacts_reused_count += 1
 
         if len(field_event_participants) == 0:
-            logger.critical(
-                f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}, "
-                f"therefore no field event, field activity, sample, and observation can be made. Skipping."
+            logger.warning(
+                f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}; "
+                f"continuing with nullable field_event_participant_id."
             )
 
         return field_event_participants
@@ -690,7 +668,7 @@ def _row_context(self, row: Any) -> str:
     def _log_transfer_summary(self, stats: dict[str, int]) -> None:
         logger.info(
             "WaterLevels summary: groups total=%s processed=%s skipped_missing_thing=%s failed_commit=%s "
-            "rows total=%s created=%s skipped_dt=%s skipped_reason=%s skipped_contacts=%s well_destroyed=%s "
+            "rows total=%s created=%s skipped_dt=%s skipped_reason=%s missing_participants=%s well_destroyed=%s "
             "field_events=%s activities=%s samples=%s observations=%s contacts_created=%s contacts_reused=%s",
             stats["groups_total"],
             stats["groups_processed"],
@@ -700,7 +678,7 @@ def _log_transfer_summary(self, stats: dict[str, int]) -> None:
             stats["rows_created"],
             stats["rows_skipped_dt"],
             stats["rows_skipped_reason"],
-            stats["rows_skipped_contacts"],
+            stats["rows_missing_participants"],
             stats["rows_well_destroyed"],
             stats["field_events_created"],
             stats["field_activities_created"],
diff --git a/transfers/well_transfer.py b/transfers/well_transfer.py
index a6fa6408..5d459c23 100644
--- a/transfers/well_transfer.py
+++ b/transfers/well_transfer.py
@@ -188,9 +188,12 @@ def transfer_parallel(self, num_workers: int = None) -> None:
         all_errors = []
         errors_lock = threading.Lock()
         aquifers_lock = threading.Lock()
+        progress_lock = threading.Lock()
+        transferred_count = 0
 
         def process_batch(batch_idx: int, batch_df: pd.DataFrame) -> dict:
             """Process a batch of wells in a separate thread with its own session."""
+            nonlocal transferred_count
             batch_errors = []
             batch_start = time.time()
 
@@ -206,7 +209,7 @@ def process_batch(batch_idx: int, batch_df: pd.DataFrame) -> dict:
                     for i, row in enumerate(batch_df.itertuples()):
                         try:
                             # Process single well with all dependent objects
-                            self._step_parallel_complete(
+                            transferred = self._step_parallel_complete(
                                 session,
                                 row,
                                 local_aquifers,
@@ -214,6 +217,15 @@ def process_batch(batch_idx: int, batch_df: pd.DataFrame) -> dict:
                                 batch_errors,
                                 aquifers_lock,
                             )
+                            if transferred:
+                                with progress_lock:
+                                    transferred_count += 1
+                                    logger.info(
+                                        "[%s/%s] Transferred PointID=%s",
+                                        transferred_count,
+                                        n,
+                                        row.PointID,
+                                    )
                         except Exception as e:
                             self._log_exception(
                                 getattr(row, "PointID", "Unknown"),
@@ -321,12 +333,19 @@ def _extract_well_purposes(self, row) -> list[str]:
 
         if isna(cu):
             return []
+
+        cu = cu.strip()
+        if not cu:
+            return []
         else:
             purposes = []
             for cui in cu:
                 if cui == "A":
                     # skip "Open, unequipped well" as that gets mapped to the status_history table
                     continue
+                if cui == ",":
+                    continue
+
                 p = self._get_lexicon_value(row, f"LU_CurrentUse:{cui}")
                 if p is not None:
                     purposes.append(p)
@@ -718,6 +737,7 @@ def _add_notes_and_provenance(
 
     def _add_histories(self, session: Session, row, well: Thing) -> None:
         mphs = self._measuring_point_estimator.estimate_measuring_point_height(row)
+        added_measuring_point = False
         for mph, mph_desc, start_date, end_date in zip(*mphs):
             session.add(
                 MeasuringPointHistory(
@@ -728,6 +748,21 @@ def _add_histories(self, session: Session, row, well: Thing) -> None:
                     end_date=end_date,
                 )
             )
+            added_measuring_point = True
+
+        # Preserve transfer intent even when no MP height can be measured/estimated.
+        if not added_measuring_point:
+            raw_desc = getattr(row, "MeasuringPoint", None)
+            mp_desc = None if isna(raw_desc) else raw_desc
+            session.add(
+                MeasuringPointHistory(
+                    thing_id=well.id,
+                    measuring_point_height=None,
+                    measuring_point_description=mp_desc,
+                    start_date=datetime.now(tz=UTC).date(),
+                    end_date=None,
+                )
+            )
 
         target_id = well.id
         target_table = "thing"
@@ -810,22 +845,22 @@ def _step_parallel_complete(
         local_formations: dict,
         batch_errors: list,
         aquifers_lock: threading.Lock,
-    ):
+    ) -> bool:
         """
         Process a single well with ALL dependent objects in one pass.
         Combines _step_parallel and _after_hook_chunk for maximum parallelization.
         """
         payload = self._build_well_payload(row)
         if not payload:
-            return
+            return False
 
         well = self._persist_well(session, row, payload, batch_errors)
         if well is None:
-            return
+            return False
 
         location_result = self._persist_location(session, row, batch_errors)
         if not location_result:
-            return
+            return False
         location, elevation_method, location_note_payload = location_result
 
         assoc = LocationThingAssociation(
@@ -873,6 +908,7 @@ def _step_parallel_complete(
             session, row, well, location, location_note_payload, elevation_method
         )
         self._add_histories(session, row, well)
+        return True
 
     def _get_lexicon_value_safe(self, row, value, default, errors_list):
         """Thread-safe version of _get_lexicon_value."""
@@ -1028,7 +1064,6 @@ def _chunk_step(self, session, df, i, row, db_item):
             "thing_id": db_item.id,
             "screen_depth_top": row.ScreenTop,
             "screen_depth_bottom": row.ScreenBottom,
-            # "screen_type": row.ScreenType,
             "screen_description": row.ScreenDescription,
             "release_status": "draft",
             "nma_pk_wellscreens": row.GlobalID,
@@ -1037,9 +1072,6 @@ def _chunk_step(self, session, df, i, row, db_item):
             # TODO: add validation logic here to ensure no overlapping screens for the same well
             CreateWellScreen.model_validate(well_screen_data)
         except ValidationError as e:
-            logger.critical(
-                f"Validation error for row {i} with PointID {row.PointID}: {e.errors()}"
-            )
             self._capture_validation_error(row.PointID, e)
             return
 
@@ -1047,16 +1079,4 @@ def _chunk_step(self, session, df, i, row, db_item):
         session.add(well_screen)
 
 
-# def transfer_wells(flags: dict = None):
-#     transferer = WellTransferer(flags=flags)
-#     transferer.transfer()
-#     return transferer.input_df, transferer.cleaned_df, transferer.errors
-#
-#
-# def transfer_wellscreens(flags: dict = None):
-#     transferer = WellScreenTransferer(flags=flags)
-#     transferer.chunk_transfer()
-#     return transferer.input_df, transferer.cleaned_df, transferer.errors
-
-
 # ============= EOF =============================================

From 41ff8de1ee171b0adacb613d054bccdf5243ae37 Mon Sep 17 00:00:00 2001
From: jirhiker <2035568+jirhiker@users.noreply.github.com>
Date: Sun, 22 Feb 2026 21:25:09 +0000
Subject: [PATCH 09/14] Formatting changes

---
 schemas/contact.py         | 1 -
 schemas/thing.py           | 1 -
 tests/test_cli_commands.py | 6 ++----
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/schemas/contact.py b/schemas/contact.py
index 248ff173..590d6db8 100644
--- a/schemas/contact.py
+++ b/schemas/contact.py
@@ -24,7 +24,6 @@
 from schemas import BaseResponseModel, BaseCreateModel, BaseUpdateModel
 from schemas.notes import CreateNote, NoteResponse
 
-
 # -------- VALIDATORS ----------
 
 
diff --git a/schemas/thing.py b/schemas/thing.py
index a6080923..fceba6c0 100644
--- a/schemas/thing.py
+++ b/schemas/thing.py
@@ -35,7 +35,6 @@
 from schemas.notes import NoteResponse, CreateNote
 from schemas.permission_history import PermissionHistoryResponse
 
-
 # -------- VALIDATE ----------
 
 
diff --git a/tests/test_cli_commands.py b/tests/test_cli_commands.py
index 8bdc2f9c..412ebea3 100644
--- a/tests/test_cli_commands.py
+++ b/tests/test_cli_commands.py
@@ -244,12 +244,10 @@ def test_water_levels_cli_persists_observations(tmp_path, water_well_thing):
     """
 
     def _write_csv(path: Path, *, well_name: str, notes: str):
-        csv_text = textwrap.dedent(
-            f"""\
+        csv_text = textwrap.dedent(f"""\
             field_staff,well_name_point_id,field_event_date_time,measurement_date_time,sampler,sample_method,mp_height,level_status,depth_to_water_ft,data_quality,water_level_notes
             CLI Tester,{well_name},2025-02-15T08:00:00-07:00,2025-02-15T10:30:00-07:00,Groundwater Team,electric tape,1.5,stable,42.5,approved,{notes}
-            """
-        )
+            """)
         path.write_text(csv_text)
 
     unique_notes = f"pytest-{uuid.uuid4()}"

From a2baff6f0b6aadc9d56509da4094e0e9b6c78a78 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 22 Feb 2026 14:28:48 -0700
Subject: [PATCH 10/14] feat: enable database drop and rebuild for unit tests

---
 .github/workflows/tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 26e1f08f..221c559b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -104,6 +104,7 @@ jobs:
       BASE_URL: http://localhost:8000
       SESSION_SECRET_KEY: supersecretkeyforunittests
       AUTHENTIK_DISABLE_AUTHENTICATION: 1
+      DROP_AND_REBUILD_DB: 1
 
     services:
       postgis:

From d2f4f1f9f5b20e1d6935a5437e8ad80598c29fe2 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 22 Feb 2026 14:37:11 -0700
Subject: [PATCH 11/14] feat: enhance data transfer handling by logging skipped
 records and updating row processing

---
 tests/test_thing.py             |  2 ++
 transfers/surface_water_data.py | 24 +++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/tests/test_thing.py b/tests/test_thing.py
index 713b7444..00a476d9 100644
--- a/tests/test_thing.py
+++ b/tests/test_thing.py
@@ -63,6 +63,7 @@ def override_authentication_dependency_fixture():
 # VALIDATE tests ===============================================================
 
 
+@pytest.mark.skip(reason="Temporarily not relevant until transfer process is complete.")
 def test_validate_hole_depth_well_depth():
     with pytest.raises(
         ValueError, match="well depth must be less than than or equal to hole depth"
@@ -70,6 +71,7 @@ def test_validate_hole_depth_well_depth():
         ValidateWell(well_depth=100.0, hole_depth=90.0)
 
 
+@pytest.mark.skip(reason="Temporarily not relevant until transfer process is complete.")
 def test_validate_hole_depth_casing_depth():
     with pytest.raises(
         ValueError,
diff --git a/transfers/surface_water_data.py b/transfers/surface_water_data.py
index e4e8a908..519d9a62 100644
--- a/transfers/surface_water_data.py
+++ b/transfers/surface_water_data.py
@@ -62,10 +62,24 @@ def _get_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
 
     def _transfer_hook(self, session: Session) -> None:
         rows: list[dict[str, Any]] = []
+        skipped_missing_thing = 0
         for raw in self.cleaned_df.to_dict("records"):
             record = self._row_dict(raw)
+            if record is None:
+                skipped_missing_thing += 1
+                continue
             rows.append(record)
 
+        if skipped_missing_thing:
+            logger.warning(
+                "Skipped %s SurfaceWaterData rows without matching Thing",
+                skipped_missing_thing,
+            )
+
+        if not rows:
+            logger.info("No SurfaceWaterData rows to transfer")
+            return
+
         rows = self._dedupe_rows(rows, key="OBJECTID", include_missing=True)
 
         insert_stmt = insert(NMA_SurfaceWaterData)
@@ -101,7 +115,7 @@ def _transfer_hook(self, session: Session) -> None:
             session.commit()
             session.expunge_all()
 
-    def _row_dict(self, row: dict[str, Any]) -> dict[str, Any]:
+    def _row_dict(self, row: dict[str, Any]) -> Optional[dict[str, Any]]:
         def val(key: str) -> Optional[Any]:
             v = row.get(key)
             if pd.isna(v):
@@ -123,6 +137,14 @@ def to_uuid(v: Any) -> Optional[uuid.UUID]:
 
         location_id = to_uuid(val("LocationId"))
         thing_id = self._resolve_thing_id(location_id)
+        if thing_id is None:
+            logger.warning(
+                "Skipping SurfaceWaterData OBJECTID=%s PointID=%s LocationId=%s - Thing not found",
+                val("OBJECTID"),
+                val("PointID"),
+                location_id,
+            )
+            return None
 
         return {
             "LocationId": location_id,

From e089b32a93556fb7a24f9cfbe0226d0b873f5806 Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Sun, 22 Feb 2026 14:44:01 -0700
Subject: [PATCH 12/14] feat: update nullable fields in relaxed_constraints.md
 for MeasuringPointHistory and remove depth validation

---
 transfers/relaxed_constraints.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/transfers/relaxed_constraints.md b/transfers/relaxed_constraints.md
index 1ab097a0..a8d932df 100644
--- a/transfers/relaxed_constraints.md
+++ b/transfers/relaxed_constraints.md
@@ -1,5 +1,5 @@
 Address.postal_code is nullable
-Thing measuring_point_height is nullable
+MeasuringPointHistory.measuring_point_height is nullable
 ValidateWell, depth validation removed
 Deployment.installation_date is nullable
 CreateWellScreen depth validation removed

From c9cf672566b2b4f37741ca145e3b49a6389c2a4a Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Mon, 23 Feb 2026 11:55:15 -0700
Subject: [PATCH 13/14] feat: simplify location DataFrame caching by removing
 threading lock

---
 transfers/thing_transfer.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/transfers/thing_transfer.py b/transfers/thing_transfer.py
index e0603b8a..a7442bb3 100644
--- a/transfers/thing_transfer.py
+++ b/transfers/thing_transfer.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 # ===============================================================================
 import time
-from threading import Lock
 from types import SimpleNamespace
 
 from pandas import isna
@@ -32,16 +31,15 @@
 )
 
 _LOCATION_DF_CACHE = None
-_LOCATION_DF_LOCK = Lock()
 
 
 def _get_location_df():
     global _LOCATION_DF_CACHE
+    # transfer_thing is executed in a session-scoped, non-threaded transfer flow.
+    # Keep a simple module-level cache and avoid lock complexity here.
     if _LOCATION_DF_CACHE is None:
-        with _LOCATION_DF_LOCK:
-            if _LOCATION_DF_CACHE is None:
-                df = read_csv("Location")
-                _LOCATION_DF_CACHE = replace_nans(df)
+        df = read_csv("Location")
+        _LOCATION_DF_CACHE = replace_nans(df)
     return _LOCATION_DF_CACHE
 
 

From 782477977828bc3879c57f238db23f5a24784acc Mon Sep 17 00:00:00 2001
From: jakeross <jirhiker@gmail.com>
Date: Mon, 23 Feb 2026 15:15:49 -0700
Subject: [PATCH 14/14] feat: add well smoke test command and enhance contact
 handling with missing value checks

---
 .gitignore                                    |    1 +
 cli/cli.py                                    |  106 ++
 core/lexicon.json                             |    7 +
 transfers/contact_transfer.py                 |  133 +-
 .../data/owners_organization_mapper.json      |    3 +-
 transfers/smoke_test.py                       | 1094 +++++++++++++++++
 transfers/waterlevels_transfer.py             |  109 +-
 7 files changed, 1374 insertions(+), 79 deletions(-)
 create mode 100644 transfers/smoke_test.py

diff --git a/.gitignore b/.gitignore
index 197d0355..9d9c353e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ transfers/logs/*
 run_bdd-local.sh
 .pre-commit-config.local.yaml
 .serena/
+cli/logs
 
 # deployment files
 app.yaml
diff --git a/cli/cli.py b/cli/cli.py
index cb29338e..ae54ab42 100644
--- a/cli/cli.py
+++ b/cli/cli.py
@@ -44,6 +44,11 @@ class ThemeMode(str, Enum):
     dark = "dark"
 
 
+class SmokePopulation(str, Enum):
+    all = "all"
+    agreed = "agreed"
+
+
 def _resolve_theme(theme: ThemeMode) -> ThemeMode:
     if theme != ThemeMode.auto:
         return theme
@@ -278,6 +283,107 @@ def compare_duplicated_welldata(
         )
 
 
+@cli.command("well-smoke-test")
+def well_smoke_test(
+    sample_size: int = typer.Option(
+        25,
+        "--sample-size",
+        min=1,
+        help="Number of wells to sample.",
+    ),
+    population: SmokePopulation = typer.Option(
+        SmokePopulation.agreed,
+        "--population",
+        help="Sample from all wells or transfer-agreed wells.",
+    ),
+    all_wells: bool = typer.Option(
+        False,
+        "--all-wells/--sampled",
+        help="Check all wells in the selected population instead of sampling.",
+    ),
+    seed: int = typer.Option(
+        42,
+        "--seed",
+        help="Random seed for deterministic sampling.",
+    ),
+    detail_path: Path = typer.Option(
+        Path("transfers") / "metrics" / "well_smoke_test_detail.csv",
+        "--detail-path",
+        help="Output CSV path for per-well per-entity smoke-test rows.",
+    ),
+    summary_path: Path = typer.Option(
+        Path("transfers") / "metrics" / "well_smoke_test_summary.json",
+        "--summary-path",
+        help="Output JSON path for smoke-test summary.",
+    ),
+    fail_on_mismatch: bool = typer.Option(
+        False,
+        "--fail-on-mismatch/--no-fail-on-mismatch",
+        help="Exit with code 1 if any mismatches are found.",
+    ),
+    theme: ThemeMode = typer.Option(
+        ThemeMode.auto, "--theme", help="Color theme: auto, light, dark."
+    ),
+):
+    from transfers.smoke_test import (
+        SmokePopulation as SmokePopulationModel,
+        run_well_smoke_test,
+        write_smoke_outputs,
+    )
+
+    payload = run_well_smoke_test(
+        sample_size=sample_size,
+        population=SmokePopulationModel(population.value),
+        seed=seed,
+        all_wells=all_wells,
+    )
+    write_smoke_outputs(payload, detail_path=detail_path, summary_path=summary_path)
+
+    sampled_wells = payload.get("sampled_wells", 0)
+    mismatch_count = payload.get("mismatch_count", 0)
+    value_mismatch_count = payload.get("value_mismatch_count", 0)
+    fail_count = payload.get("well_fail_count", 0)
+    typer.echo(
+        f"Smoke test complete: sampled_wells={sampled_wells}, "
+        f"presence_mismatches={mismatch_count}, "
+        f"value_mismatches={value_mismatch_count}, "
+        f"failed_wells={fail_count}"
+    )
+    typer.echo(f"Wrote detail: {detail_path}")
+    typer.echo(f"Wrote summary: {summary_path}")
+
+    if mismatch_count or value_mismatch_count:
+        failed_wells = payload.get("failed_wells", [])[:20]
+        typer.echo(f"Sample failed wells (up to 20): {failed_wells}")
+
+    if value_mismatch_count:
+        entity_results = payload.get("entity_results", [])
+        value_mismatches = [
+            r
+            for r in entity_results
+            if r.get("value_status") not in {"MATCH", "NOT_APPLICABLE"}
+        ]
+        typer.echo("\nValue mismatches:")
+        for row in value_mismatches[:100]:
+            pointid = row.get("pointid")
+            entity = row.get("entity")
+            status = row.get("value_status")
+            missing = row.get("missing_value_sample") or []
+            extra = row.get("extra_value_sample") or []
+            typer.echo(
+                f"- {pointid} | {entity} | {status} | "
+                f"missing={missing[:3]} | extra={extra[:3]}"
+            )
+        if len(value_mismatches) > 100:
+            typer.echo(
+                f"... truncated {len(value_mismatches) - 100} additional value mismatches"
+            )
+
+    if mismatch_count or value_mismatch_count:
+        if fail_on_mismatch:
+            raise typer.Exit(code=1)
+
+
 @cli.command("well-inventory-csv")
 def well_inventory_csv(
     file_path: str = typer.Argument(
diff --git a/core/lexicon.json b/core/lexicon.json
index 07b32c30..2f325282 100644
--- a/core/lexicon.json
+++ b/core/lexicon.json
@@ -3703,6 +3703,13 @@
       "term": "Commonwealth Conservancy",
       "definition": "Commonwealth Conservancy"
     },
+    {
+      "categories": [
+        "organization"
+      ],
+      "term": "Costilla MDWCA",
+      "definition": "Costilla MDWCA"
+    },
     {
       "categories": [
         "organization"
diff --git a/transfers/contact_transfer.py b/transfers/contact_transfer.py
index 1e99d88b..4167eec2 100644
--- a/transfers/contact_transfer.py
+++ b/transfers/contact_transfer.py
@@ -330,9 +330,6 @@ def _add_first_contact(
         contact_by_name_org,
     )
 
-    if not new:
-        return None
-
     if row.Email:
         raw_email = str(row.Email).strip()
         if _looks_like_phone_in_email_field(raw_email):
@@ -349,9 +346,9 @@ def _add_first_contact(
             )
             if phone:
                 if complete:
-                    contact.phones.append(phone)
+                    _append_phone_if_missing(contact, phone)
                 else:
-                    contact.incomplete_nma_phones.append(phone)
+                    _append_incomplete_phone_if_missing(contact, phone)
         else:
             email = _make_email(
                 "first",
@@ -361,7 +358,7 @@ def _add_first_contact(
                 release_status=release_status,
             )
             if email:
-                contact.emails.append(email)
+                _append_email_if_missing(contact, email)
 
     if row.Phone:
         phone, complete = _make_phone(
@@ -373,9 +370,9 @@ def _add_first_contact(
         )
         if phone:
             if complete:
-                contact.phones.append(phone)
+                _append_phone_if_missing(contact, phone)
             else:
-                contact.incomplete_nma_phones.append(phone)
+                _append_incomplete_phone_if_missing(contact, phone)
 
     if row.CellPhone:
         phone, complete = _make_phone(
@@ -387,9 +384,9 @@ def _add_first_contact(
         )
         if phone:
             if complete:
-                contact.phones.append(phone)
+                _append_phone_if_missing(contact, phone)
             else:
-                contact.incomplete_nma_phones.append(phone)
+                _append_incomplete_phone_if_missing(contact, phone)
 
     if row.MailingAddress:
         address = _make_address(
@@ -404,7 +401,7 @@ def _add_first_contact(
             release_status=release_status,
         )
         if address:
-            contact.addresses.append(address)
+            _append_address_if_missing(contact, address)
 
     if row.PhysicalAddress:
         address = _make_address(
@@ -419,9 +416,9 @@ def _add_first_contact(
             release_status=release_status,
         )
         if address:
-            contact.addresses.append(address)
+            _append_address_if_missing(contact, address)
 
-    return contact
+    return contact if new else None
 
 
 def _safe_make_name(
@@ -452,7 +449,7 @@ def _add_second_contact(
     added: set[tuple[str | None, str | None]],
     contact_by_owner_type: dict[tuple[str, str], Contact],
     contact_by_name_org: dict[tuple[str | None, str | None], Contact],
-) -> None:
+) -> Contact | None:
     if all(
         [
             getattr(row, f"Second{f}") is None
@@ -492,9 +489,6 @@ def _add_second_contact(
         contact_by_owner_type,
         contact_by_name_org,
     )
-    if not new:
-        return
-
     if row.SecondCtctEmail:
         raw_email = str(row.SecondCtctEmail).strip()
         if _looks_like_phone_in_email_field(raw_email):
@@ -511,9 +505,9 @@ def _add_second_contact(
             )
             if phone:
                 if complete:
-                    contact.phones.append(phone)
+                    _append_phone_if_missing(contact, phone)
                 else:
-                    contact.incomplete_nma_phones.append(phone)
+                    _append_incomplete_phone_if_missing(contact, phone)
         else:
             email = _make_email(
                 "second",
@@ -523,7 +517,7 @@ def _add_second_contact(
                 release_status=release_status,
             )
             if email:
-                contact.emails.append(email)
+                _append_email_if_missing(contact, email)
 
     if row.SecondCtctPhone:
         phone, complete = _make_phone(
@@ -535,9 +529,11 @@ def _add_second_contact(
         )
         if phone:
             if complete:
-                contact.phones.append(phone)
+                _append_phone_if_missing(contact, phone)
             else:
-                contact.incomplete_nma_phones.append(phone)
+                _append_incomplete_phone_if_missing(contact, phone)
+
+    return contact if new else None
 
 
 # helpers
@@ -633,6 +629,68 @@ def _make_address(first_second: str, ownerkey: str, kind: str, **kw) -> Address
         )
 
 
+def _norm_text(value) -> str:
+    return str(value).strip().casefold() if value is not None else ""
+
+
+def _phone_digits(value) -> str:
+    if value is None:
+        return ""
+    return re.sub(r"\D", "", str(value))
+
+
+def _append_email_if_missing(contact: Contact, email: Email) -> None:
+    new_key = (_norm_text(email.email), _norm_text(email.email_type))
+    existing = {
+        (_norm_text(e.email), _norm_text(e.email_type)) for e in (contact.emails or [])
+    }
+    if new_key not in existing:
+        contact.emails.append(email)
+
+
+def _append_phone_if_missing(contact: Contact, phone: Phone) -> None:
+    new_key = (_phone_digits(phone.phone_number), _norm_text(phone.phone_type))
+    existing = {
+        (_phone_digits(p.phone_number), _norm_text(p.phone_type))
+        for p in (contact.phones or [])
+    }
+    if new_key not in existing:
+        contact.phones.append(phone)
+
+
+def _append_incomplete_phone_if_missing(
+    contact: Contact, phone: IncompleteNMAPhone
+) -> None:
+    new_key = _phone_digits(phone.phone_number)
+    existing = {
+        _phone_digits(p.phone_number) for p in (contact.incomplete_nma_phones or [])
+    }
+    if new_key not in existing:
+        contact.incomplete_nma_phones.append(phone)
+
+
+def _append_address_if_missing(contact: Contact, address: Address) -> None:
+    new_key = (
+        _norm_text(address.address_line_1),
+        _norm_text(address.city),
+        _norm_text(address.state),
+        _norm_text(address.postal_code),
+        _norm_text(address.address_type),
+    )
+    existing = {
+        (
+            _norm_text(a.address_line_1),
+            _norm_text(a.city),
+            _norm_text(a.state),
+            _norm_text(a.postal_code),
+            _norm_text(a.address_type),
+        )
+        for a in (contact.addresses or [])
+    }
+    if new_key not in existing:
+        contact.addresses.append(address)
+
+
 def _make_contact_and_assoc(
     session: Session,
     data: dict,
@@ -646,13 +704,17 @@ def _make_contact_and_assoc(
 
     owner_key = data.get("nma_pk_owners")
     contact_type = data.get("contact_type")
+    organization = data.get("organization")
+    # Prefer owner-key/type identity. Allow name/org reuse when organization is
+    # present (stable identity) or when owner key is unavailable.
+    allow_name_org_fallback = (not bool(owner_key)) or bool(organization)
     if owner_key and contact_type:
         contact = contact_by_owner_type.get((owner_key, contact_type))
         if contact is not None:
             new_contact = False
 
     name_org_key = (data["name"], data["organization"])
-    if contact is None and name_org_key in added:
+    if contact is None and allow_name_org_fallback:
         contact = contact_by_name_org.get(name_org_key)
         if contact is not None:
             new_contact = False
@@ -664,15 +726,28 @@ def _make_contact_and_assoc(
         contact_data = contact.model_dump(exclude=["thing_id", "notes"])
         contact = Contact(**contact_data)
         session.add(contact)
-        if owner_key and contact_type:
-            contact_by_owner_type[(owner_key, contact_type)] = contact
         contact_by_name_org[name_org_key] = contact
         added.add(name_org_key)
 
-    assoc = ThingContactAssociation()
-    assoc.thing = thing
-    assoc.contact = contact
-    session.add(assoc)
+    if owner_key and contact_type:
+        contact_by_owner_type[(owner_key, contact_type)] = contact
+
+    assoc_exists = False
+    if contact.id is not None:
+        assoc_exists = (
+            session.query(ThingContactAssociation.id)
+            .filter(
+                ThingContactAssociation.thing_id == thing.id,
+                ThingContactAssociation.contact_id == contact.id,
+            )
+            .first()
+            is not None
+        )
+    if not assoc_exists:
+        assoc = ThingContactAssociation()
+        assoc.thing = thing
+        assoc.contact = contact
+        session.add(assoc)
 
     return contact, new_contact
 
diff --git a/transfers/data/owners_organization_mapper.json b/transfers/data/owners_organization_mapper.json
index b10f5da0..674bf154 100644
--- a/transfers/data/owners_organization_mapper.json
+++ b/transfers/data/owners_organization_mapper.json
@@ -51,6 +51,7 @@
 	"City of Truth or Consequences, WWTP": "City of Truth or Consequences, WWTP",
 	"Cloud Country West Subdivision": "Cloud Country West Subdivision",
 	"Commonwealth Conservancy": "Commonwealth Conservancy",
+	"Costilla MDWCA": "Costilla MDWCA",
 	"Cottonwood Rural Water Assn.": "Cottonwood RWA",
 	"Country Club Garden MHP": "Country Club Garden Mobile Home Park",
 	"Coyote Creek MDWUA": "Coyote Creek MDWUA",
@@ -235,4 +236,4 @@
 	"Winter Brothers/U.S. Government": "Winter Brothers",
 	"Yates Petroleum": "Yates Petroleum Corporation",
 	"Zamora Accounting Services": "Zamora Accounting Services"
-}
\ No newline at end of file
+}
diff --git a/transfers/smoke_test.py b/transfers/smoke_test.py
new file mode 100644
index 00000000..09a45ff3
--- /dev/null
+++ b/transfers/smoke_test.py
@@ -0,0 +1,1094 @@
+from __future__ import annotations
+
+import json
+import random
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from enum import Enum
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+from sqlalchemy import func, select
+
+from core.enums import Organization
+from db import (
+    Address,
+    Contact,
+    Deployment,
+    Email,
+    IncompleteNMAPhone,
+    Observation,
+    Phone,
+    Sensor,
+    Thing,
+    ThingContactAssociation,
+    WellScreen,
+)
+from db.engine import session_ctx
+from db.field import FieldActivity, FieldEvent
+from db.sample import Sample
+from transfers.contact_transfer import _select_ownerkey_col
+from transfers.sensor_transfer import EQUIPMENT_TO_SENSOR_TYPE_MAP
+from transfers.util import (
+    SensorParameterEstimator,
+    filter_by_valid_measuring_agency,
+    get_transfers_data_path,
+    get_transferable_wells,
+    read_csv,
+    replace_nans,
+)
+
+
+class SmokePopulation(str, Enum):
+    all = "all"
+    agreed = "agreed"
+
+
+class EntityStatus(str, Enum):
+    present_in_both = "PRESENT_IN_BOTH"
+    absent_in_both = "ABSENT_IN_BOTH"
+    missing_in_destination = "MISSING_IN_DESTINATION"
+    extra_in_destination = "EXTRA_IN_DESTINATION"
+
+
+class ValueStatus(str, Enum):
+    match = "MATCH"
+    missing_in_destination = "MISSING_IN_DESTINATION"
+    extra_in_destination = "EXTRA_IN_DESTINATION"
+    both_missing_and_extra = "BOTH_MISSING_AND_EXTRA"
+    not_applicable = "NOT_APPLICABLE"
+
+
+@dataclass
+class SmokeResult:
+    pointid: str
+    entity: str
+    source_count: int
+    destination_count: int
+    status: EntityStatus
+    value_status: ValueStatus
+    missing_value_sample: list[str]
+    extra_value_sample: list[str]
+
+    @property
+    def passed(self) -> bool:
+        return self.status in {
+            EntityStatus.present_in_both,
+            EntityStatus.absent_in_both,
+        }
+
+
+def _normalize_text(value: Any) -> str:
+    if value is None:
+        return ""
+    try:
+        if pd.isna(value):
+            return ""
+    except TypeError:
+        pass
+    return str(value).strip()
+
+
+def _has_text(value: Any) -> bool:
+    return bool(_normalize_text(value))
+
+
+def _looks_like_phone(value: Any) -> bool:
+    text = _normalize_text(value)
+    if not text or "@" in text:
+        return False
+    if not re.fullmatch(r"[\d\s().+\-]+", text):
+        return False
+    digits = re.sub(r"\D", "", text)
+    return len(digits) >= 7
+
+
+def _normalize_email(raw: Any) -> str:
+    text = _normalize_text(raw)
+    if not text:
+        return ""
+    text = re.sub(r"^\s*email\s*:\s*", "", text, flags=re.IGNORECASE)
+    text = re.sub(r"[.,;:]+$", "", text)
+    return text.strip()
+
+
+def _normalize_number(value: Any) -> str:
+    text = _normalize_text(value)
+    if not text:
+        return ""
+    try:
+        return f"{float(text):.6f}"
+    except ValueError:
+        return text.lower()
+
+
+def _normalize_contact_name(value: Any) -> str:
+    text = _normalize_text(value)
+    if not text:
+        return ""
+    # Transfer may preserve errant multiple spaces from source; compare normalized.
+    return re.sub(r"\s+", " ", text).strip().lower()
+
+
+def _normalize_phone(raw: Any) -> str:
+    text = _normalize_text(raw)
+    if not text:
+        return ""
+    digits = re.sub(r"\D", "", text)
+    # Treat US country-code-prefixed values as equivalent (1XXXXXXXXXX == XXXXXXXXXX).
+    if len(digits) == 11 and digits.startswith("1"):
+        return digits[1:]
+    return digits
+
+
+def _parse_legacy_datetime_date(value: Any) -> str | None:
+    if value is None:
+        return None
+    try:
+        if pd.isna(value):
+            return None
+    except TypeError:
+        pass
+    text = str(value).strip()
+    if not text:
+        return None
+    try:
+        return pd.to_datetime(text, format="%Y-%m-%d %H:%M:%S.%f").date().isoformat()
+    except (TypeError, ValueError):
+        return None
+
+
+def _normalize_date_like(value: Any) -> str:
+    if value is None:
+        return ""
+    try:
+        if pd.isna(value):
+            return ""
+    except TypeError:
+        pass
+    dt = pd.to_datetime(value, errors="coerce")
+    if pd.isna(dt):
+        return ""
+    return dt.date().isoformat()
+
+
+def _load_owner_org_mapper() -> dict[str, str]:
+    try:
+        mapper_path = get_transfers_data_path("owners_organization_mapper.json")
+        with open(mapper_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def _load_ownerkey_mapper() -> dict[str, str]:
+    try:
+        mapper_path = get_transfers_data_path("owners_ownerkey_mapper.json")
+        with open(mapper_path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception:
+        return {}
+
+
+def _normalize_source_organization(raw_company: Any, mapper: dict[str, str]) -> str:
+    company = _normalize_text(raw_company)
+    if not company:
+        return ""
+    organization = mapper.get(company, company)
+    try:
+        Organization(organization)
+    except ValueError:
+        return ""
+    return _normalize_text(organization)
+
+
+def _load_well_population(population: SmokePopulation) -> pd.DataFrame:
+    wdf = read_csv("WellData", dtype={"OSEWelltagID": str})
+    ldf = read_csv("Location")
+    ldf = ldf.drop(["PointID", "SSMA_TimeStamp"], axis=1, errors="ignore")
+    df = wdf.join(ldf.set_index("LocationId"), on="LocationId")
+    df = df[df["SiteType"] == "GW"]
+    df = df[df["Easting"].notna() & df["Northing"].notna()]
+    df = replace_nans(df)
+
+    if population == SmokePopulation.agreed:
+        df = get_transferable_wells(df)
+
+        # Match current WellTransferer duplicate handling (skip every duplicate PointID).
+        dupes = df["PointID"].duplicated(keep=False)
+        if dupes.any():
+            dup_ids = set(df.loc[dupes, "PointID"])
+            df = df[~df["PointID"].isin(dup_ids)]
+
+    return df
+
+
+def _sample_pointids(
+    df: pd.DataFrame, sample_size: int, seed: int, all_wells: bool = False
+) -> list[str]:
+    pointids = sorted(
+        {_normalize_text(v) for v in df["PointID"].tolist() if _has_text(v)}
+    )
+    if not pointids:
+        return []
+    if all_wells:
+        return pointids
+
+    n = min(sample_size, len(pointids))
+    rng = random.Random(seed)
+    return sorted(rng.sample(pointids, n))
+
+
+def _count_by_pointid(
+    df: pd.DataFrame, pointid_col: str, pointids: list[str]
+) -> dict[str, int]:
+    if df.empty or pointid_col not in df.columns:
+        return {pid: 0 for pid in pointids}
+    sub = df[df[pointid_col].isin(pointids)]
+    if sub.empty:
+        return {pid: 0 for pid in pointids}
+
+    counts = sub.groupby(pointid_col).size().to_dict()
+    return {pid: int(counts.get(pid, 0)) for pid in pointids}
+
+
+def _source_entity_counts(
+    pointids: list[str], well_df: pd.DataFrame
+) -> dict[str, dict[str, int]]:
+    counts = {
+        "thing": _count_by_pointid(well_df, "PointID", pointids),
+    }
+
+    ws = replace_nans(read_csv("WellScreens"))
+    counts["wellscreens"] = _count_by_pointid(ws, "PointID", pointids)
+
+    wl = replace_nans(read_csv("WaterLevels"))
+    wl = filter_by_valid_measuring_agency(wl)
+    counts["waterlevel_observations"] = _count_by_pointid(wl, "PointID", pointids)
+
+    eq = read_csv("Equipment")
+    eq.columns = eq.columns.str.replace(" ", "_")
+    if "SerialNo" in eq.columns:
+        eq = eq[eq["SerialNo"].notna()]
+    else:
+        eq = eq.iloc[0:0]
+    eq = replace_nans(eq)
+    counts["deployments"] = _count_by_pointid(eq, "PointID", pointids)
+
+    # Owners/contact graph counts.
+    odf = read_csv("OwnersData")
+    odf = odf.drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+
+    ldf = read_csv("OwnerLink")
+    ldf = ldf.drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+    locdf = read_csv("Location")
+    ldf = ldf.join(locdf.set_index("LocationId"), on="LocationId")
+
+    owner_key_col = _select_ownerkey_col(odf, "OwnersData")
+    link_owner_key_col = _select_ownerkey_col(ldf, "OwnerLink")
+
+    odf["ownerkey_norm"] = (
+        odf[owner_key_col]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+    ldf["ownerkey_norm"] = (
+        ldf[link_owner_key_col]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+
+    ldf_join = ldf.set_index("ownerkey_norm")[["PointID"]]
+    owners = odf.join(ldf_join, on="ownerkey_norm")
+    owners = replace_nans(owners)
+    owners = owners[owners["PointID"].isin(pointids)]
+
+    contact_counts = defaultdict(int)
+    phone_counts = defaultdict(int)
+    email_counts = defaultdict(int)
+    address_counts = defaultdict(int)
+
+    for row in owners.itertuples(index=False):
+        pid = _normalize_text(getattr(row, "PointID", None))
+        if not pid:
+            continue
+
+        contact_counts[pid] += 1
+
+        primary_phone = getattr(row, "Phone", None)
+        cell_phone = getattr(row, "CellPhone", None)
+        secondary_phone = getattr(row, "SecondCtctPhone", None)
+        for phone_value in (primary_phone, cell_phone, secondary_phone):
+            if _has_text(phone_value):
+                phone_counts[pid] += 1
+
+        for email_value in (
+            getattr(row, "Email", None),
+            getattr(row, "SecondCtctEmail", None),
+        ):
+            normalized = _normalize_email(email_value)
+            if not normalized:
+                continue
+            if _looks_like_phone(normalized):
+                phone_counts[pid] += 1
+            else:
+                email_counts[pid] += 1
+
+        if _has_text(getattr(row, "MailingAddress", None)):
+            address_counts[pid] += 1
+        if _has_text(getattr(row, "PhysicalAddress", None)):
+            address_counts[pid] += 1
+
+    counts["contacts"] = {pid: int(contact_counts.get(pid, 0)) for pid in pointids}
+    counts["contact_phones"] = {pid: int(phone_counts.get(pid, 0)) for pid in pointids}
+    counts["contact_emails"] = {pid: int(email_counts.get(pid, 0)) for pid in pointids}
+    counts["contact_addresses"] = {
+        pid: int(address_counts.get(pid, 0)) for pid in pointids
+    }
+
+    return counts
+
+
+def _blank_signature_map(pointids: list[str]) -> dict[str, set[str]]:
+    return {pid: set() for pid in pointids}
+
+
+def _source_entity_signatures(
+    pointids: list[str], well_df: pd.DataFrame
+) -> dict[str, dict[str, set[str]]]:
+    owner_org_mapper = _load_owner_org_mapper()
+    ownerkey_mapper = _load_ownerkey_mapper()
+    signatures = {
+        "thing": _blank_signature_map(pointids),
+        "wellscreens": _blank_signature_map(pointids),
+        "contacts": _blank_signature_map(pointids),
+        "contact_phones": _blank_signature_map(pointids),
+        "contact_emails": _blank_signature_map(pointids),
+        "contact_addresses": _blank_signature_map(pointids),
+        "waterlevel_observations": _blank_signature_map(pointids),
+        "deployments": _blank_signature_map(pointids),
+    }
+
+    # Well core fields from WellData.
+    for row in well_df[well_df["PointID"].isin(pointids)].itertuples(index=False):
+        pid = _normalize_text(getattr(row, "PointID", None))
+        if not pid:
+            continue
+        sig = "|".join(
+            [
+                _normalize_number(getattr(row, "WellDepth", None)),
+                _normalize_number(getattr(row, "HoleDepth", None)),
+                _normalize_text(getattr(row, "FormationZone", None)).upper(),
+            ]
+        )
+        signatures["thing"][pid].add(sig)
+
+    # Well screens.
+    ws = replace_nans(read_csv("WellScreens"))
+    ws = ws[ws["PointID"].isin(pointids)]
+    for row in ws.itertuples(index=False):
+        pid = _normalize_text(getattr(row, "PointID", None))
+        if not pid:
+            continue
+        top = getattr(row, "ScreenTop", None)
+        bottom = getattr(row, "ScreenBottom", None)
+        stype = getattr(row, "ScreenType", None)
+        sig = "|".join(
+            [
+                _normalize_number(top),
+                _normalize_number(bottom),
+                _normalize_text(stype).lower(),
+            ]
+        )
+        signatures["wellscreens"][pid].add(sig)
+
+    # Deployments from Equipment.
+    eq = read_csv("Equipment")
+    eq.columns = eq.columns.str.replace(" ", "_")
+    if "SerialNo" in eq.columns:
+        eq = eq[eq["SerialNo"].notna()]
+    else:
+        eq = eq.iloc[0:0]
+    eq = replace_nans(eq)
+    eq = eq[eq["PointID"].isin(pointids)]
+    estimators: dict[str, SensorParameterEstimator] = {}
+    for row in eq.itertuples(index=False):
+        pid = _normalize_text(getattr(row, "PointID", None))
+        if not pid:
+            continue
+        installed = _parse_legacy_datetime_date(getattr(row, "DateInstalled", None))
+        if installed is None:
+            equipment_type = getattr(row, "EquipmentType", None)
+            sensor_type = EQUIPMENT_TO_SENSOR_TYPE_MAP.get(equipment_type)
+            if sensor_type:
+                estimator = estimators.get(sensor_type)
+                if estimator is None:
+                    estimator = SensorParameterEstimator(sensor_type)
+                    estimators[sensor_type] = estimator
+                installed = _normalize_date_like(
+                    estimator.estimate_installation_date(row)
+                )
+            else:
+                installed = ""
+        removed = _parse_legacy_datetime_date(getattr(row, "DateRemoved", None)) or ""
+        sig = "|".join(
+            [
+                _normalize_text(getattr(row, "SerialNo", None)).lower(),
+                installed,
+                removed,
+            ]
+        )
+        signatures["deployments"][pid].add(sig)
+
+    # Owners/contact graph signatures.
+    odf = read_csv("OwnersData")
+    odf = odf.drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+    ldf = read_csv("OwnerLink")
+    ldf = ldf.drop(["OBJECTID", "GlobalID"], axis=1, errors="ignore")
+    locdf = read_csv("Location")
+    ldf = ldf.join(locdf.set_index("LocationId"), on="LocationId")
+
+    owner_key_col = _select_ownerkey_col(odf, "OwnersData")
+    link_owner_key_col = _select_ownerkey_col(ldf, "OwnerLink")
+    odf["ownerkey_canonical"] = odf[owner_key_col].replace(ownerkey_mapper)
+    ldf["ownerkey_canonical"] = ldf[link_owner_key_col].replace(ownerkey_mapper)
+    odf["ownerkey_norm"] = (
+        odf["ownerkey_canonical"]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+    ldf["ownerkey_norm"] = (
+        ldf["ownerkey_canonical"]
+        .fillna("")
+        .astype(str)
+        .str.strip()
+        .str.casefold()
+        .replace({"": pd.NA})
+    )
+    owners = replace_nans(
+        odf.join(ldf.set_index("ownerkey_norm")[["PointID"]], on="ownerkey_norm")
+    )
+    owners = owners[owners["PointID"].notna()]
+    owners = owners.sort_values(by=["PointID"])
+
+    ContactIdentity = tuple[str | None, str | None, str]
+    contact_by_owner_type: dict[tuple[str, str], int] = {}
+    contact_by_name_org: dict[tuple[str | None, str | None], int] = {}
+    contact_store: dict[int, dict[str, Any]] = {}
+    pid_to_contact_ids: dict[str, set[int]] = defaultdict(set)
+    next_contact_id = 1
+
+    def _make_name(first: Any, last: Any) -> str | None:
+        f = _normalize_text(first)
+        l = _normalize_text(last)
+        if not f and not l:
+            return None
+        if f and not l:
+            return f
+        if not f and l:
+            return l
+        return f"{f} {l}"
+
+    def _safe_make_name(
+        first: Any,
+        last: Any,
+        owner_key: str | None,
+        organization: str | None,
+        fallback_suffix: str | None,
+    ) -> str | None:
+        name = _make_name(first, last)
+        if name is None and not organization:
+            fallback = _normalize_text(owner_key) or None
+            if fallback and fallback_suffix:
+                fallback = f"{fallback}-{fallback_suffix}"
+            return fallback
+        return name
+
+    def _resolve_contact(
+        owner_key: str | None,
+        contact_type: str,
+        name: str | None,
+        organization: str | None,
+    ) -> tuple[int | None, bool]:
+        nonlocal next_contact_id
+        key_owner = (
+            (_normalize_text(owner_key), contact_type)
+            if _normalize_text(owner_key)
+            else None
+        )
+        key_name_org = (name, organization)
+        allow_name_org_fallback = (not _normalize_text(owner_key)) or bool(organization)
+
+        if key_owner and key_owner in contact_by_owner_type:
+            return contact_by_owner_type[key_owner], False
+
+        if allow_name_org_fallback and key_name_org in contact_by_name_org:
+            contact_id = contact_by_name_org[key_name_org]
+            if key_owner:
+                contact_by_owner_type[key_owner] = contact_id
+            return contact_id, False
+
+        if not name and not organization:
+            return None, False
+
+        contact_id = next_contact_id
+        next_contact_id += 1
+        contact_store[contact_id] = {
+            "name": name,
+            "organization": organization,
+            "contact_type": contact_type,
+            "phones": set(),
+            "emails": set(),
+            "addresses": set(),
+        }
+        contact_by_name_org[key_name_org] = contact_id
+        if key_owner:
+            contact_by_owner_type[key_owner] = contact_id
+        return contact_id, True
+
+    for row in owners.itertuples(index=False):
+        pid = _normalize_text(getattr(row, "PointID", None))
+        if not pid:
+            continue
+
+        owner_key = _normalize_text(getattr(row, "OwnerKey", None)) or None
+        has_secondary_info = any(
+            _has_text(getattr(row, field, None))
+            for field in (
+                "SecondFirstName",
+                "SecondLastName",
+                "SecondCtctEmail",
+                "SecondCtctPhone",
+            )
+        )
+        company = _normalize_source_organization(
+            getattr(row, "Company", None), owner_org_mapper
+        )
+        company = company or None
+
+        primary_name = _safe_make_name(
+            getattr(row, "FirstName", None),
+            getattr(row, "LastName", None),
+            owner_key,
+            company,
+            "primary",
+        )
+        primary_contact, primary_new = _resolve_contact(
+            owner_key, "Primary", primary_name, company
+        )
+        if primary_contact:
+            pid_to_contact_ids[pid].add(primary_contact)
+        if primary_contact:
+            c = contact_store[primary_contact]
+            for phone_value in (
+                getattr(row, "Phone", None),
+                getattr(row, "CellPhone", None),
+            ):
+                pn = _normalize_phone(phone_value)
+                if pn:
+                    c["phones"].add(pn)
+
+            em = _normalize_email(getattr(row, "Email", None)).lower()
+            if em:
+                if _looks_like_phone(em):
+                    pn = _normalize_phone(em)
+                    if pn:
+                        c["phones"].add(pn)
+                else:
+                    c["emails"].add(em)
+
+            for prefix in ("Mail", "Physical"):
+                line1 = _normalize_text(
+                    getattr(
+                        row,
+                        (
+                            f"{prefix}ingAddress"
+                            if prefix == "Mail"
+                            else "PhysicalAddress"
+                        ),
+                        None,
+                    )
+                )
+                city = _normalize_text(getattr(row, f"{prefix}City", None))
+                state = _normalize_text(getattr(row, f"{prefix}State", None))
+                zipc = _normalize_text(getattr(row, f"{prefix}ZipCode", None))
+                if line1:
+                    c["addresses"].add(
+                        f"{line1.lower()}|{city.lower()}|{state.lower()}|{zipc.lower()}"
+                    )
+
+        if has_secondary_info:
+            secondary_name = _safe_make_name(
+                getattr(row, "SecondFirstName", None),
+                getattr(row, "SecondLastName", None),
+                owner_key,
+                company,
+                "secondary",
+            )
+            secondary_contact, secondary_new = _resolve_contact(
+                owner_key, "Secondary", secondary_name, company
+            )
+            if secondary_contact:
+                pid_to_contact_ids[pid].add(secondary_contact)
+            if secondary_contact:
+                c = contact_store[secondary_contact]
+                pn = _normalize_phone(getattr(row, "SecondCtctPhone", None))
+                if pn:
+                    c["phones"].add(pn)
+
+                em = _normalize_email(getattr(row, "SecondCtctEmail", None)).lower()
+                if em:
+                    if _looks_like_phone(em):
+                        pn = _normalize_phone(em)
+                        if pn:
+                            c["phones"].add(pn)
+                    else:
+                        c["emails"].add(em)
+
+    for pid in pointids:
+        for contact_id in pid_to_contact_ids.get(pid, set()):
+            c = contact_store.get(contact_id)
+            if not c:
+                continue
+            signatures["contacts"][pid].add(
+                f"{_normalize_text(c.get('contact_type')).lower()}|{_normalize_contact_name(c.get('name'))}|{_normalize_text(c.get('organization')).lower()}"
+            )
+            for pn in c.get("phones", set()):
+                signatures["contact_phones"][pid].add(pn)
+            for em in c.get("emails", set()):
+                signatures["contact_emails"][pid].add(em)
+            for addr in c.get("addresses", set()):
+                signatures["contact_addresses"][pid].add(addr)
+
+    return signatures
+
+
+def _rows_to_count_dict(
+    rows: list[tuple[str, int]], pointids: list[str]
+) -> dict[str, int]:
+    lut = {pid: 0 for pid in pointids}
+    for pid, n in rows:
+        if pid in lut:
+            lut[pid] = int(n)
+    return lut
+
+
+def _destination_entity_counts(pointids: list[str]) -> dict[str, dict[str, int]]:
+    if not pointids:
+        return {
+            "thing": {},
+            "wellscreens": {},
+            "contacts": {},
+            "contact_phones": {},
+            "contact_emails": {},
+            "contact_addresses": {},
+            "waterlevel_observations": {},
+            "deployments": {},
+        }
+
+    with session_ctx() as session:
+        thing_rows = session.execute(
+            select(Thing.name, func.count(Thing.id))
+            .where(Thing.name.in_(pointids))
+            .where(Thing.thing_type == "water well")
+            .group_by(Thing.name)
+        ).all()
+
+        screen_rows = session.execute(
+            select(Thing.name, func.count(WellScreen.id))
+            .join(WellScreen, WellScreen.thing_id == Thing.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        contact_rows = session.execute(
+            select(Thing.name, func.count(ThingContactAssociation.id))
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        phone_rows = session.execute(
+            select(Thing.name, func.count(Phone.id))
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Phone, Phone.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+        incomplete_phone_rows = session.execute(
+            select(Thing.name, func.count(IncompleteNMAPhone.id))
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(IncompleteNMAPhone, IncompleteNMAPhone.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        email_rows = session.execute(
+            select(Thing.name, func.count(Email.id))
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Email, Email.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        address_rows = session.execute(
+            select(Thing.name, func.count(Address.id))
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Address, Address.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        deployment_rows = session.execute(
+            select(Thing.name, func.count(Deployment.id))
+            .join(Deployment, Deployment.thing_id == Thing.id)
+            .where(Thing.name.in_(pointids))
+            .group_by(Thing.name)
+        ).all()
+
+        waterlevel_obs_rows = session.execute(
+            select(Thing.name, func.count(Observation.id))
+            .join(FieldEvent, FieldEvent.thing_id == Thing.id)
+            .join(FieldActivity, FieldActivity.field_event_id == FieldEvent.id)
+            .join(Sample, Sample.field_activity_id == FieldActivity.id)
+            .join(Observation, Observation.sample_id == Sample.id)
+            .where(Thing.name.in_(pointids))
+            .where(Sample.nma_pk_waterlevels.is_not(None))
+            .group_by(Thing.name)
+        ).all()
+
+    results = {
+        "thing": _rows_to_count_dict(thing_rows, pointids),
+        "wellscreens": _rows_to_count_dict(screen_rows, pointids),
+        "contacts": _rows_to_count_dict(contact_rows, pointids),
+        "contact_phones": _rows_to_count_dict(phone_rows, pointids),
+        "contact_emails": _rows_to_count_dict(email_rows, pointids),
+        "contact_addresses": _rows_to_count_dict(address_rows, pointids),
+        "waterlevel_observations": _rows_to_count_dict(waterlevel_obs_rows, pointids),
+        "deployments": _rows_to_count_dict(deployment_rows, pointids),
+    }
+    incomplete_phone_counts = _rows_to_count_dict(incomplete_phone_rows, pointids)
+    for pid in pointids:
+        results["contact_phones"][pid] = int(
+            results["contact_phones"].get(pid, 0)
+        ) + int(incomplete_phone_counts.get(pid, 0))
+    return results
+
+
+def _destination_entity_signatures(
+    pointids: list[str],
+) -> dict[str, dict[str, set[str]]]:
+    signatures = {
+        "thing": _blank_signature_map(pointids),
+        "wellscreens": _blank_signature_map(pointids),
+        "contacts": _blank_signature_map(pointids),
+        "contact_phones": _blank_signature_map(pointids),
+        "contact_emails": _blank_signature_map(pointids),
+        "contact_addresses": _blank_signature_map(pointids),
+        "waterlevel_observations": _blank_signature_map(pointids),
+        "deployments": _blank_signature_map(pointids),
+    }
+    if not pointids:
+        return signatures
+
+    with session_ctx() as session:
+        thing_rows = session.execute(
+            select(
+                Thing.name, Thing.well_depth, Thing.hole_depth, Thing.nma_formation_zone
+            )
+            .where(Thing.name.in_(pointids))
+            .where(Thing.thing_type == "water well")
+        ).all()
+        for pid, wd, hd, fz in thing_rows:
+            signatures["thing"][pid].add(
+                "|".join(
+                    [
+                        _normalize_number(wd),
+                        _normalize_number(hd),
+                        _normalize_text(fz).upper(),
+                    ]
+                )
+            )
+
+        ws_rows = session.execute(
+            select(
+                Thing.name,
+                WellScreen.screen_depth_top,
+                WellScreen.screen_depth_bottom,
+                WellScreen.screen_type,
+            )
+            .join(WellScreen, WellScreen.thing_id == Thing.id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, top, bottom, stype in ws_rows:
+            signatures["wellscreens"][pid].add(
+                "|".join(
+                    [
+                        _normalize_number(top),
+                        _normalize_number(bottom),
+                        _normalize_text(stype).lower(),
+                    ]
+                )
+            )
+
+        contact_rows = session.execute(
+            select(Thing.name, Contact.contact_type, Contact.name, Contact.organization)
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, ctype, name, org in contact_rows:
+            signatures["contacts"][pid].add(
+                f"{_normalize_text(ctype).lower()}|{_normalize_contact_name(name)}|{_normalize_text(org).lower()}"
+            )
+
+        phone_rows = session.execute(
+            select(Thing.name, Phone.phone_number)
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Phone, Phone.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, phone in phone_rows:
+            pn = _normalize_phone(phone)
+            if pn:
+                signatures["contact_phones"][pid].add(pn)
+        incomplete_phone_rows = session.execute(
+            select(Thing.name, IncompleteNMAPhone.phone_number)
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(IncompleteNMAPhone, IncompleteNMAPhone.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, phone in incomplete_phone_rows:
+            pn = _normalize_phone(phone)
+            if pn:
+                signatures["contact_phones"][pid].add(pn)
+
+        email_rows = session.execute(
+            select(Thing.name, Email.email)
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Email, Email.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, email in email_rows:
+            em = _normalize_email(email).lower()
+            if em:
+                signatures["contact_emails"][pid].add(em)
+
+        address_rows = session.execute(
+            select(
+                Thing.name,
+                Address.address_line_1,
+                Address.city,
+                Address.state,
+                Address.postal_code,
+            )
+            .join(ThingContactAssociation, ThingContactAssociation.thing_id == Thing.id)
+            .join(Contact, Contact.id == ThingContactAssociation.contact_id)
+            .join(Address, Address.contact_id == Contact.id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, line1, city, state, zipc in address_rows:
+            if _has_text(line1):
+                signatures["contact_addresses"][pid].add(
+                    f"{_normalize_text(line1).lower()}|{_normalize_text(city).lower()}|{_normalize_text(state).lower()}|{_normalize_text(zipc).lower()}"
+                )
+
+        dep_rows = session.execute(
+            select(
+                Thing.name,
+                Sensor.serial_no,
+                Deployment.installation_date,
+                Deployment.removal_date,
+            )
+            .join(Deployment, Deployment.thing_id == Thing.id)
+            .join(Sensor, Sensor.id == Deployment.sensor_id)
+            .where(Thing.name.in_(pointids))
+        ).all()
+        for pid, sensor_serial, installed, removed in dep_rows:
+            signatures["deployments"][pid].add(
+                "|".join(
+                    [
+                        _normalize_text(sensor_serial).lower(),
+                        _normalize_text(installed)[:10],
+                        _normalize_text(removed)[:10],
+                    ]
+                )
+            )
+
+    return signatures
+
+
+def _status(source_count: int, destination_count: int) -> EntityStatus:
+    src = source_count > 0
+    dst = destination_count > 0
+    if src and dst:
+        return EntityStatus.present_in_both
+    if (not src) and (not dst):
+        return EntityStatus.absent_in_both
+    if src and (not dst):
+        return EntityStatus.missing_in_destination
+    return EntityStatus.extra_in_destination
+
+
+def _value_status(
+    source_values: set[str], destination_values: set[str], compare_enabled: bool
+) -> tuple[ValueStatus, list[str], list[str]]:
+    if not compare_enabled:
+        return ValueStatus.not_applicable, [], []
+
+    missing = sorted(source_values - destination_values)
+    extra = sorted(destination_values - source_values)
+    if not missing and not extra:
+        return ValueStatus.match, [], []
+    if missing and extra:
+        return ValueStatus.both_missing_and_extra, missing[:5], extra[:5]
+    if missing:
+        return ValueStatus.missing_in_destination, missing[:5], []
+    return ValueStatus.extra_in_destination, [], extra[:5]
+
+
+def run_well_smoke_test(
+    sample_size: int,
+    population: SmokePopulation,
+    seed: int,
+    all_wells: bool = False,
+) -> dict[str, Any]:
+    well_df = _load_well_population(population)
+    pointids = _sample_pointids(
+        well_df, sample_size=sample_size, seed=seed, all_wells=all_wells
+    )
+
+    if not pointids:
+        return {
+            "population": population.value,
+            "seed": seed,
+            "sample_size": sample_size,
+            "available_wells": 0,
+            "sampled_wells": 0,
+            "entity_results": [],
+            "mismatch_count": 0,
+            "well_fail_count": 0,
+        }
+
+    source = _source_entity_counts(pointids, well_df)
+    dest = _destination_entity_counts(pointids)
+    source_values = _source_entity_signatures(pointids, well_df)
+    dest_values = _destination_entity_signatures(pointids)
+
+    entities = [
+        "thing",
+        "wellscreens",
+        "contacts",
+        "contact_phones",
+        "contact_emails",
+        "contact_addresses",
+        "waterlevel_observations",
+        "deployments",
+    ]
+    value_compare_entities = {
+        "thing",
+        "wellscreens",
+        "contacts",
+        "contact_phones",
+        "contact_emails",
+        "contact_addresses",
+        "deployments",
+    }
+
+    results: list[SmokeResult] = []
+    for pid in pointids:
+        for entity in entities:
+            src_values_set = source_values.get(entity, {}).get(pid, set())
+            dst_values_set = dest_values.get(entity, {}).get(pid, set())
+            src_count = int(source.get(entity, {}).get(pid, 0))
+            dst_count = int(dest.get(entity, {}).get(pid, 0))
+            # For entities where we compare normalized value sets, use those sets
+            # for presence status to avoid false count mismatches from contact reuse.
+            if entity in value_compare_entities:
+                src_count = len(src_values_set)
+                dst_count = len(dst_values_set)
+            vstatus, missing_vals, extra_vals = _value_status(
+                src_values_set,
+                dst_values_set,
+                compare_enabled=entity in value_compare_entities,
+            )
+            results.append(
+                SmokeResult(
+                    pointid=pid,
+                    entity=entity,
+                    source_count=src_count,
+                    destination_count=dst_count,
+                    status=_status(src_count, dst_count),
+                    value_status=vstatus,
+                    missing_value_sample=missing_vals,
+                    extra_value_sample=extra_vals,
+                )
+            )
+
+    value_mismatches = [
+        r
+        for r in results
+        if r.value_status not in {ValueStatus.match, ValueStatus.not_applicable}
+    ]
+    mismatches = [r for r in results if not r.passed]
+    failed_wells = sorted(
+        {r.pointid for r in mismatches} | {r.pointid for r in value_mismatches}
+    )
+
+    payload = {
+        "population": population.value,
+        "seed": seed,
+        "sample_size": sample_size,
+        "available_wells": int(well_df["PointID"].dropna().nunique()),
+        "sampled_wells": len(pointids),
+        "mismatch_count": len(mismatches),
+        "value_mismatch_count": len(value_mismatches),
+        "well_fail_count": len(failed_wells),
+        "failed_wells": failed_wells,
+        "entity_results": [
+            {
+                "pointid": r.pointid,
+                "entity": r.entity,
+                "source_count": r.source_count,
+                "destination_count": r.destination_count,
+                "status": r.status.value,
+                "value_status": r.value_status.value,
+                "missing_value_sample": r.missing_value_sample,
+                "extra_value_sample": r.extra_value_sample,
+                "passed": r.passed,
+            }
+            for r in results
+        ],
+    }
+    return payload
+
+
+def write_smoke_outputs(
+    payload: dict[str, Any], detail_path: Path, summary_path: Path
+) -> None:
+    detail_path.parent.mkdir(parents=True, exist_ok=True)
+    summary_path.parent.mkdir(parents=True, exist_ok=True)
+
+    rows = payload.get("entity_results", [])
+    pd.DataFrame(rows).to_csv(detail_path, index=False)
+
+    summary = {k: v for k, v in payload.items() if k not in {"entity_results"}}
+    summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
diff --git a/transfers/waterlevels_transfer.py b/transfers/waterlevels_transfer.py
index 261faf53..9c45cf26 100644
--- a/transfers/waterlevels_transfer.py
+++ b/transfers/waterlevels_transfer.py
@@ -94,7 +94,7 @@ def __init__(self, *args, **kw):
         with open(path, "r") as f:
             self._measured_by_mapper = json.load(f)
 
-        self._created_contacts = {}
+        self._created_contact_id_by_key: dict[tuple[str, str], int] = {}
         self._thing_id_by_pointid: dict[str, int] = {}
         self._owner_contact_id_by_pointid: dict[str, int] = {}
         self._build_caches()
@@ -206,7 +206,7 @@ def _transfer_hook(self, session: Session) -> None:
 
                 release_status = "public" if row.PublicRelease else "private"
 
-                field_event_participants = self._get_field_event_participants(
+                field_event_participant_ids = self._get_field_event_participant_ids(
                     session, row
                 )
                 stats["contacts_created"] += getattr(
@@ -216,7 +216,7 @@ def _transfer_hook(self, session: Session) -> None:
                     self, "_last_contacts_reused_count", 0
                 )
 
-                if not field_event_participants:
+                if not field_event_participant_ids:
                     stats["rows_missing_participants"] += 1
 
                 is_destroyed = (
@@ -236,7 +236,7 @@ def _transfer_hook(self, session: Session) -> None:
                         "dt_utc": dt_utc,
                         "glv": glv,
                         "release_status": release_status,
-                        "participants": field_event_participants,
+                        "participant_ids": field_event_participant_ids,
                         "is_destroyed": is_destroyed,
                     }
                 )
@@ -273,11 +273,13 @@ def _transfer_hook(self, session: Session) -> None:
                 participant_rows: list[dict[str, Any]] = []
                 lead_row_pos_by_prepared_idx: dict[int, int] = {}
                 for prepared_idx, prep in enumerate(prepared_rows):
-                    for participant_idx, participant in enumerate(prep["participants"]):
+                    for participant_idx, participant_id in enumerate(
+                        prep["participant_ids"]
+                    ):
                         participant_rows.append(
                             {
                                 "field_event_id": field_event_ids[prepared_idx],
-                                "contact_id": participant.id,
+                                "contact_id": participant_id,
                                 "participant_role": (
                                     "Lead" if participant_idx == 0 else "Participant"
                                 ),
@@ -578,10 +580,10 @@ def _get_groundwater_level_reason(self, row) -> str:
             raise ValueError(f"Unknown groundwater level reason: {glv}")
         return glv
 
-    def _get_field_event_participants(self, session, row) -> list[Contact]:
+    def _get_field_event_participant_ids(self, session, row) -> list[int]:
         self._last_contacts_created_count = 0
         self._last_contacts_reused_count = 0
-        field_event_participants = []
+        field_event_participant_ids: list[int] = []
         measured_by = None if pd.isna(row.MeasuredBy) else row.MeasuredBy
 
         if measured_by not in ["Owner", "Owner report", "Well owner"]:
@@ -590,35 +592,58 @@ def _get_field_event_participants(self, session, row) -> list[Contact]:
                 contact_info = get_contacts_info(
                     row, measured_by, self._measured_by_mapper
                 )
+                contacts_to_create: list[dict[str, Any]] = []
+                missing_keys: list[tuple[str, str]] = []
                 for name, organization, role in contact_info:
-                    if (name, organization) in self._created_contacts:
-                        contact = self._created_contacts[(name, organization)]
+                    key = (name, organization)
+                    contact_id = self._created_contact_id_by_key.get(key)
+                    if contact_id is not None:
+                        field_event_participant_ids.append(contact_id)
                         self._last_contacts_reused_count += 1
                     else:
-                        try:
-                            # create new contact if not already created
-                            contact = Contact(
-                                name=name,
-                                role=role,
-                                contact_type="Field Event Participant",
-                                organization=organization,
-                                nma_pk_waterlevels=row.GlobalID,
-                            )
-                            session.add(contact)
-
-                            logger.info(
-                                f"{SPACE_2}Created contact: | Name {contact.name} | Role {contact.role} | Organization {contact.organization} | nma_pk_waterlevels {contact.nma_pk_waterlevels}"
+                        contacts_to_create.append(
+                            {
+                                "name": name,
+                                "role": role,
+                                "contact_type": "Field Event Participant",
+                                "organization": organization,
+                                "nma_pk_waterlevels": row.GlobalID,
+                            }
+                        )
+                        missing_keys.append(key)
+
+                if contacts_to_create:
+                    try:
+                        created_contact_ids = (
+                            session.execute(
+                                insert(Contact).returning(Contact.id),
+                                contacts_to_create,
                             )
-
-                            self._created_contacts[(name, organization)] = contact
+                            .scalars()
+                            .all()
+                        )
+                    except Exception as e:
+                        logger.critical(
+                            "Contact insert failed for PointID=%s, GlobalID=%s: %s",
+                            row.PointID,
+                            row.GlobalID,
+                            str(e),
+                        )
+                    else:
+                        for key, created_contact_id, payload in zip(
+                            missing_keys, created_contact_ids, contacts_to_create
+                        ):
+                            self._created_contact_id_by_key[key] = created_contact_id
+                            field_event_participant_ids.append(created_contact_id)
                             self._last_contacts_created_count += 1
-                        except Exception as e:
-                            logger.critical(
-                                f"Contact cannot be created: Name {name} | Role {role} | Organization {organization} because of the following: {str(e)}"
+                            logger.info(
+                                "%sCreated contact: | Name %s | Role %s | Organization %s | nma_pk_waterlevels %s",
+                                SPACE_2,
+                                payload["name"],
+                                payload["role"],
+                                payload["organization"],
+                                payload["nma_pk_waterlevels"],
                             )
-                            continue
-
-                    field_event_participants.append(contact)
         else:
             owner_contact_id = self._owner_contact_id_by_pointid.get(row.PointID)
             if owner_contact_id is None:
@@ -633,30 +658,16 @@ def _get_field_event_participants(self, session, row) -> list[Contact]:
                     "MeasuredBy",
                 )
             else:
-                contact = session.get(Contact, owner_contact_id)
-                if contact is None:
-                    logger.warning(
-                        "Owner contact id=%s not found for PointID=%s; cannot use owner fallback for %s",
-                        owner_contact_id,
-                        row.PointID,
-                        self._row_context(row),
-                    )
-                    self._capture_error(
-                        row.PointID,
-                        f"owner contact id {owner_contact_id} not found",
-                        "MeasuredBy",
-                    )
-                else:
-                    field_event_participants.append(contact)
-                    self._last_contacts_reused_count += 1
+                field_event_participant_ids.append(owner_contact_id)
+                self._last_contacts_reused_count += 1
 
-        if len(field_event_participants) == 0:
+        if len(field_event_participant_ids) == 0:
             logger.warning(
                 f"No contacts can be associated with the WaterLevels record with GlobalID {row.GlobalID}; "
                 f"continuing with nullable field_event_participant_id."
             )
 
-        return field_event_participants
+        return field_event_participant_ids
 
     def _row_context(self, row: Any) -> str:
         return (