-
Notifications
You must be signed in to change notification settings - Fork 4
transfer-fix #477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
transfer-fix #477
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -55,25 +55,23 @@ class RadionuclidesTransferer(Transferer): | |||||||||||||
| def __init__(self, *args, batch_size: int = 1000, **kwargs): | ||||||||||||||
| super().__init__(*args, **kwargs) | ||||||||||||||
| self.batch_size = batch_size | ||||||||||||||
| # Cache: legacy UUID -> (Integer id, thing_id) | ||||||||||||||
| self._sample_info_cache: dict[UUID, tuple[int, int]] = {} | ||||||||||||||
| # Cache: legacy UUID -> Integer chemistry_sample_info_id | ||||||||||||||
| self._sample_info_cache: dict[UUID, int] = {} | ||||||||||||||
| self._build_sample_info_cache() | ||||||||||||||
|
|
||||||||||||||
| def _build_sample_info_cache(self) -> None: | ||||||||||||||
| """Build cache of nma_sample_pt_id -> (id, thing_id) for FK lookups.""" | ||||||||||||||
| """Build cache of nma_sample_pt_id -> chemistry_sample_info_id for FK lookups.""" | ||||||||||||||
| with session_ctx() as session: | ||||||||||||||
| sample_infos = ( | ||||||||||||||
| session.query( | ||||||||||||||
| NMA_Chemistry_SampleInfo.nma_sample_pt_id, | ||||||||||||||
| NMA_Chemistry_SampleInfo.id, | ||||||||||||||
| NMA_Chemistry_SampleInfo.thing_id, | ||||||||||||||
| ) | ||||||||||||||
| .filter(NMA_Chemistry_SampleInfo.nma_sample_pt_id.isnot(None)) | ||||||||||||||
| .all() | ||||||||||||||
| ) | ||||||||||||||
| self._sample_info_cache = { | ||||||||||||||
| nma_sample_pt_id: (csi_id, thing_id) | ||||||||||||||
| for nma_sample_pt_id, csi_id, thing_id in sample_infos | ||||||||||||||
| nma_sample_pt_id: csi_id for nma_sample_pt_id, csi_id in sample_infos | ||||||||||||||
| } | ||||||||||||||
| logger.info( | ||||||||||||||
| f"Built ChemistrySampleInfo cache with {len(self._sample_info_cache)} entries" | ||||||||||||||
|
|
@@ -105,7 +103,6 @@ def _filter_to_valid_sample_infos(self, df: pd.DataFrame) -> pd.DataFrame: | |||||||||||||
| def _transfer_hook(self, session: Session) -> None: | ||||||||||||||
| row_dicts = [] | ||||||||||||||
| skipped_global_id = 0 | ||||||||||||||
| skipped_thing_id = 0 | ||||||||||||||
| for row in self.cleaned_df.to_dict("records"): | ||||||||||||||
| row_dict = self._row_dict(row) | ||||||||||||||
| if row_dict is None: | ||||||||||||||
|
|
@@ -117,13 +114,6 @@ def _transfer_hook(self, session: Session) -> None: | |||||||||||||
| row_dict.get("nma_SamplePtID"), | ||||||||||||||
| ) | ||||||||||||||
| continue | ||||||||||||||
| if row_dict.get("thing_id") is None: | ||||||||||||||
| skipped_thing_id += 1 | ||||||||||||||
| logger.warning( | ||||||||||||||
| "Skipping Radionuclides nma_SamplePtID=%s - Thing not found", | ||||||||||||||
| row_dict.get("nma_SamplePtID"), | ||||||||||||||
| ) | ||||||||||||||
| continue | ||||||||||||||
| if row_dict.get("chemistry_sample_info_id") is None: | ||||||||||||||
| logger.warning( | ||||||||||||||
| "Skipping Radionuclides nma_SamplePtID=%s - chemistry_sample_info_id not found", | ||||||||||||||
|
|
@@ -137,12 +127,6 @@ def _transfer_hook(self, session: Session) -> None: | |||||||||||||
| "Skipped %s Radionuclides records without valid nma_GlobalID", | ||||||||||||||
| skipped_global_id, | ||||||||||||||
| ) | ||||||||||||||
| if skipped_thing_id > 0: | ||||||||||||||
| logger.warning( | ||||||||||||||
| "Skipped %s Radionuclides records without valid Thing", | ||||||||||||||
| skipped_thing_id, | ||||||||||||||
| ) | ||||||||||||||
|
|
||||||||||||||
| rows = self._dedupe_rows(row_dicts, key="nma_GlobalID") | ||||||||||||||
| insert_stmt = insert(NMA_Radionuclides) | ||||||||||||||
| excluded = insert_stmt.excluded | ||||||||||||||
|
|
@@ -156,7 +140,6 @@ def _transfer_hook(self, session: Session) -> None: | |||||||||||||
| stmt = insert_stmt.values(chunk).on_conflict_do_update( | ||||||||||||||
| index_elements=["nma_GlobalID"], | ||||||||||||||
| set_={ | ||||||||||||||
| "thing_id": excluded.thing_id, | ||||||||||||||
| "chemistry_sample_info_id": excluded.chemistry_sample_info_id, | ||||||||||||||
| "nma_SamplePtID": excluded.nma_SamplePtID, | ||||||||||||||
| "nma_SamplePointID": excluded.nma_SamplePointID, | ||||||||||||||
|
|
@@ -220,18 +203,15 @@ def int_val(key: str) -> Optional[int]: | |||||||||||||
| ) | ||||||||||||||
| return None | ||||||||||||||
|
|
||||||||||||||
| # Look up Integer FK and thing_id from cache | ||||||||||||||
| cache_entry = self._sample_info_cache.get(legacy_sample_pt_id) | ||||||||||||||
| chemistry_sample_info_id = cache_entry[0] if cache_entry else None | ||||||||||||||
| thing_id = cache_entry[1] if cache_entry else None | ||||||||||||||
| # Look up Integer FK from cache | ||||||||||||||
| chemistry_sample_info_id = self._sample_info_cache.get(legacy_sample_pt_id) | ||||||||||||||
|
|
||||||||||||||
| nma_global_id = self._uuid_val(val("GlobalID")) | ||||||||||||||
|
|
||||||||||||||
| return { | ||||||||||||||
| # Legacy UUID PK -> nma_global_id (unique audit column) | ||||||||||||||
| "nma_GlobalID": nma_global_id, | ||||||||||||||
| # FKs | ||||||||||||||
|
||||||||||||||
| # FKs | |
| # FKs | |
| # NOTE: The legacy `thing_id` column is intentionally not transferred here. | |
| # In the integer-PK schema, `chemistry_sample_info_id` (backed by the | |
| # NMA_Chemistry_SampleInfo row) and the legacy IDs below provide the | |
| # necessary linkage, so a separate `thing_id` value is no longer required. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The upsert no longer sets
thing_id. IfNMA_Radionuclides.thing_idis a non-nullable column or is expected to stay in sync with the associated sample/thing, inserts may fail (NULL) and updates may leave stale values. Either (a) keep populating/updatingthing_id, or (b) remove/relax thething_idrequirement at the schema/model level and ensure downstream code does not rely on it.