Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 6 additions & 26 deletions transfers/radionuclides.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,25 +55,23 @@ class RadionuclidesTransferer(Transferer):
def __init__(self, *args, batch_size: int = 1000, **kwargs):
super().__init__(*args, **kwargs)
self.batch_size = batch_size
# Cache: legacy UUID -> (Integer id, thing_id)
self._sample_info_cache: dict[UUID, tuple[int, int]] = {}
# Cache: legacy UUID -> Integer chemistry_sample_info_id
self._sample_info_cache: dict[UUID, int] = {}
self._build_sample_info_cache()

def _build_sample_info_cache(self) -> None:
"""Build cache of nma_sample_pt_id -> (id, thing_id) for FK lookups."""
"""Build cache of nma_sample_pt_id -> chemistry_sample_info_id for FK lookups."""
with session_ctx() as session:
sample_infos = (
session.query(
NMA_Chemistry_SampleInfo.nma_sample_pt_id,
NMA_Chemistry_SampleInfo.id,
NMA_Chemistry_SampleInfo.thing_id,
)
.filter(NMA_Chemistry_SampleInfo.nma_sample_pt_id.isnot(None))
.all()
)
self._sample_info_cache = {
nma_sample_pt_id: (csi_id, thing_id)
for nma_sample_pt_id, csi_id, thing_id in sample_infos
nma_sample_pt_id: csi_id for nma_sample_pt_id, csi_id in sample_infos
}
logger.info(
f"Built ChemistrySampleInfo cache with {len(self._sample_info_cache)} entries"
Expand Down Expand Up @@ -105,7 +103,6 @@ def _filter_to_valid_sample_infos(self, df: pd.DataFrame) -> pd.DataFrame:
def _transfer_hook(self, session: Session) -> None:
row_dicts = []
skipped_global_id = 0
skipped_thing_id = 0
for row in self.cleaned_df.to_dict("records"):
row_dict = self._row_dict(row)
if row_dict is None:
Expand All @@ -117,13 +114,6 @@ def _transfer_hook(self, session: Session) -> None:
row_dict.get("nma_SamplePtID"),
)
continue
if row_dict.get("thing_id") is None:
skipped_thing_id += 1
logger.warning(
"Skipping Radionuclides nma_SamplePtID=%s - Thing not found",
row_dict.get("nma_SamplePtID"),
)
continue
if row_dict.get("chemistry_sample_info_id") is None:
logger.warning(
"Skipping Radionuclides nma_SamplePtID=%s - chemistry_sample_info_id not found",
Expand All @@ -137,12 +127,6 @@ def _transfer_hook(self, session: Session) -> None:
"Skipped %s Radionuclides records without valid nma_GlobalID",
skipped_global_id,
)
if skipped_thing_id > 0:
logger.warning(
"Skipped %s Radionuclides records without valid Thing",
skipped_thing_id,
)

rows = self._dedupe_rows(row_dicts, key="nma_GlobalID")
insert_stmt = insert(NMA_Radionuclides)
excluded = insert_stmt.excluded
Expand All @@ -156,7 +140,6 @@ def _transfer_hook(self, session: Session) -> None:
stmt = insert_stmt.values(chunk).on_conflict_do_update(
index_elements=["nma_GlobalID"],
set_={
"thing_id": excluded.thing_id,
"chemistry_sample_info_id": excluded.chemistry_sample_info_id,
Copy link

Copilot AI Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The upsert no longer sets thing_id. If NMA_Radionuclides.thing_id is a non-nullable column or is expected to stay in sync with the associated sample/thing, inserts may fail (NULL) and updates may leave stale values. Either (a) keep populating/updating thing_id, or (b) remove/relax the thing_id requirement at the schema/model level and ensure downstream code does not rely on it.

Suggested change
"chemistry_sample_info_id": excluded.chemistry_sample_info_id,
"chemistry_sample_info_id": excluded.chemistry_sample_info_id,
"thing_id": excluded.thing_id,

Copilot uses AI. Check for mistakes.
"nma_SamplePtID": excluded.nma_SamplePtID,
"nma_SamplePointID": excluded.nma_SamplePointID,
Expand Down Expand Up @@ -220,18 +203,15 @@ def int_val(key: str) -> Optional[int]:
)
return None

# Look up Integer FK and thing_id from cache
cache_entry = self._sample_info_cache.get(legacy_sample_pt_id)
chemistry_sample_info_id = cache_entry[0] if cache_entry else None
thing_id = cache_entry[1] if cache_entry else None
# Look up Integer FK from cache
chemistry_sample_info_id = self._sample_info_cache.get(legacy_sample_pt_id)

nma_global_id = self._uuid_val(val("GlobalID"))

return {
# Legacy UUID PK -> nma_global_id (unique audit column)
"nma_GlobalID": nma_global_id,
# FKs
Copy link

Copilot AI Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With thing_id removed from the row payload, it would help to document (in a brief comment here or in the class docstring) why thing_id is no longer transferred (e.g., derived via join, deprecated column, or guaranteed by chemistry_sample_info_id). This makes the intentional schema/behavior change clearer for future maintainers.

Suggested change
# FKs
# FKs
# NOTE: The legacy `thing_id` column is intentionally not transferred here.
# In the integer-PK schema, `chemistry_sample_info_id` (backed by the
# NMA_Chemistry_SampleInfo row) and the legacy IDs below provide the
# necessary linkage, so a separate `thing_id` value is no longer required.

Copilot uses AI. Check for mistakes.
"thing_id": thing_id,
"chemistry_sample_info_id": chemistry_sample_info_id,
# Legacy ID columns (renamed with nma_ prefix)
"nma_SamplePtID": legacy_sample_pt_id,
Expand Down
Loading