Merge pull request #53 from DataIntegrationGroup/dev/jab

jacob-a-brown · web-flow · commit b9fd61197d59 · 2025-05-02T08:17:32.000-07:00
0.9.5: NMED DWB fix
diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py
@@ -84,7 +84,24 @@ def get_records(self, *args, **kw):
 
             q = q.filter(" and ".join(fs))
             q = q.expand("Thing/Locations")
-            return [di.thing.locations.entities[0] for di in q.list()]
+
+            # NM ENV has multiple datastreams per parameter per location (e.g. id 8 and arsenic)
+            # because of this duplicative site information is retrieved (we operated under the assumption one datastream per location per parameter)
+            # so we need to filter out duplicates, otherwise there will be multiple site records and duplicative parameter records
+            all_sites = [di.thing.locations.entities[0] for di in q.list()]
+
+            # can't do list(set(all_sites)) because the Location entities are not hashable
+            site_dictionary = {}
+            for site in all_sites:
+                site_id = site.id
+                if site_id not in site_dictionary.keys():
+                    site_dictionary[site_id] = site
+
+            distinct_sites = list(site_dictionary.values())
+            # print(
+            #     f"Found {len(all_sites)} datastreams for {analyte} and {len(distinct_sites)} distinct sites."
+            # )
+            return distinct_sites
 
 
 class DWBAnalyteSource(STAnalyteSource):
@@ -120,16 +137,20 @@ def get_records(self, site, *args, **kw):
             f"Thing/Locations/id eq {site.id} and ObservedProperty/id eq {analyte}"
         )
 
-        ds = q.list().entities[0]
+        # NMED DWB has multiple datastreams per parameter per location (e.g. id 8 and arsenic)
+        # print(
+        #     f"Found {len(q.list().entities)} datastreams for {site.id} and {analyte}."
+        # )
         rs = []
-        for obs in ds.get_observations().query().list():
-            rs.append(
-                {
-                    "location": site,
-                    "datastream": ds,
-                    "observation": obs,
-                }
-            )
+        for datastream in q.list().entities:
+            for obs in datastream.get_observations().query().list():
+                rs.append(
+                    {
+                        "location": site,
+                        "datastream": datastream,
+                        "observation": obs,
+                    }
+                )
 
         return rs
 
diff --git a/backend/transformer.py b/backend/transformer.py
@@ -530,11 +530,7 @@ def do_transform(
                 if warning_msg != "":
                     msg = f"{warning_msg} for {klassed_record.id}"
                     self.warn(msg)
-            except TypeError:
-                msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data"
-                self.warn(msg)
-                converted_result = source_result
-            except ValueError:
+            except (TypeError, ValueError):
                 msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data"
                 self.warn(msg)
                 converted_result = source_result
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@
 
 setup(
     name="nmuwd",
-    version="0.9.4",
+    version="0.9.5",
     author="Jake Ross",
     description="New Mexico Water Data Integration Engine",
     long_description=long_description,