From 0e3422f95cd082d7447cb4695b437f450e6b2302 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 18 Mar 2025 08:58:37 -0600 Subject: [PATCH 001/143] Update for AMPAPI v 0.33.0 - pagination and removal of well from waterlevel records --- backend/connectors/nmbgmr/source.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index d75adae..8700a98 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -15,8 +15,6 @@ # =============================================================================== import os -import httpx - from backend.connectors import NM_STATE_BOUNDING_POLYGON from backend.connectors.nmbgmr.transformer import ( NMBGMRSiteTransformer, @@ -47,12 +45,12 @@ def _make_url(endpoint): if os.getenv("DEBUG") == "1": return f"http://localhost:8000/latest/{endpoint}" - return f"https://waterdata.nmt.edu/latest/{endpoint}" + return f"https://waterdata.nmt.edu//latest/{endpoint}" class NMBGMRSiteSource(BaseSiteSource): transformer_klass = NMBGMRSiteTransformer - chunk_size = 10 + chunk_size = 100 bounding_polygon = NM_STATE_BOUNDING_POLYGON def __repr__(self): @@ -168,7 +166,7 @@ def __repr__(self): def _clean_records(self, records): # remove records with no depth to water value - return [r for r in records if r["DepthToWaterBGS"] is not None] + return [r for r in records if r["DepthToWaterBGS"] is not None and r["DateMeasured"] is not None] def _extract_parameter_record(self, record, *args, **kw): record[PARAMETER_NAME] = DTW @@ -195,7 +193,7 @@ def _extract_source_parameter_results(self, records): return [r["DepthToWaterBGS"] for r in records] def _extract_site_records(self, records, site_record): - return [ri for ri in records if ri["Well"]["PointID"] == site_record.id] + return [ri for ri in records if ri["PointID"] == site_record.id] def _extract_source_parameter_names(self, records): return ["DepthToWaterBGS" for r in records] @@ -212,7 +210,19 @@ def get_records(self, site_record): # just use manual waterlevels temporarily url = _make_url("waterlevels/manual") - return self._execute_json_request(url, params) + paginated_records = self._execute_json_request(url, params, tag="") + items = paginated_records["items"] + page = paginated_records["page"] + pages = paginated_records["pages"] + + while page < pages: + page += 1 + params["page"] = page + new_records = self._execute_json_request(url, params, tag="") + items.extend(new_records["items"]) + pages = new_records["pages"] + + return items # ============= EOF ============================================= From 5e7eab99da74c1306c28eef3406724c13fb7126e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 24 Mar 2025 11:14:47 -0600 Subject: [PATCH 002/143] Change summary table column name from "location" to "name" This corresponds with the site table and is more explicity --- backend/record.py | 2 +- backend/transformer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/record.py b/backend/record.py index 8772edd..3834687 100644 --- a/backend/record.py +++ b/backend/record.py @@ -110,7 +110,7 @@ class SummaryRecord(BaseRecord): keys: tuple = ( "source", "id", - "location", + "name", "usgs_site_id", "alternate_site_id", "latitude", diff --git a/backend/transformer.py b/backend/transformer.py index 232bd16..2ce727c 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -668,7 +668,7 @@ def _transform(self, record, site_record): parameter, units = self._get_parameter_name_and_units() rec.update( { - "location": site_record.name, + "name": site_record.name, "usgs_site_id": site_record.usgs_site_id, "alternate_site_id": site_record.alternate_site_id, "latitude": site_record.latitude, From 6d77546b4bb226019a46c16f0841f1ca73baf94b Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 27 Mar 2025 09:23:14 -0600 Subject: [PATCH 003/143] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eff71cd..6614b51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Changed - NM OSE Roswell data is now pulled from ST2 and not CKAN +- renamed the column `location` to `name` in the summary table to match the format of the `sites` table when timeseries data are exported ### Fixed From 29a07fa16387ef1999261f5fd600bd6ddc7c9b92 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 27 Mar 2025 10:02:16 -0600 Subject: [PATCH 004/143] started sites only --- backend/bounding_polygons.py | 1 + backend/config.py | 15 ++- backend/connectors/nmbgmr/source.py | 41 ++++---- backend/connectors/nmbgmr/transformer.py | 6 +- backend/connectors/nmenv/source.py | 39 +++++--- backend/connectors/wqp/source.py | 18 ++-- backend/unifier.py | 116 ++++++++++++++++------- frontend/cli.py | 4 +- 8 files changed, 157 insertions(+), 83 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index d9cd100..32cbbe3 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -174,6 +174,7 @@ def get_state_polygon(state): # private helpers ============================ def _make_shape(obj, as_wkt): poly = shape(obj["geometry"]) + poly = poly.simplify(0.1) if as_wkt: return poly.wkt return poly diff --git a/backend/config.py b/backend/config.py index ec9be97..89c4f59 100644 --- a/backend/config.py +++ b/backend/config.py @@ -116,6 +116,8 @@ class Config(Loggable): county: str = "" wkt: str = "" + sites_only = False + # sources use_source_bernco: bool = True use_source_bor: bool = True @@ -186,6 +188,15 @@ def __init__(self, model=None, payload=None): for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in payload.get("sources", [])) + def finalize(self): + self.update_output_name() + self.make_output_path() + + def all_site_sources(self): + sources = self.water_level_sources() + sources.extend(self.analyte_sources()) + return sources + def analyte_sources(self): sources = [] @@ -384,7 +395,7 @@ def _validate_county(self): return True - def _update_output_name(self): + def update_output_name(self): """ Generate a unique output name based on existing directories in the output directory. @@ -419,7 +430,7 @@ def _update_output_name(self): self.output_name = output_name - def _make_output_path(self): + def make_output_path(self): if not os.path.exists(self.output_path): os.mkdir(self.output_path) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index d75adae..a91e29e 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -73,30 +73,33 @@ def get_records(self): if config.site_limit: params["limit"] = config.site_limit - if config.parameter.lower() != "waterlevels": - params["parameter"] = get_analyte_search_param( - config.parameter, NMBGMR_ANALYTE_MAPPING - ) - else: - params["parameter"] = "Manual groundwater levels" + if not config.sites_only: + + if config.parameter.lower() != "waterlevels": + params["parameter"] = get_analyte_search_param( + config.parameter, NMBGMR_ANALYTE_MAPPING + ) + else: + params["parameter"] = "Manual groundwater levels" # tags="features" because the response object is a GeoJSON sites = self._execute_json_request( _make_url("locations"), params, tag="features", timeout=30 ) - for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None - # site["properties"]["well_depth_units"] = FEET + if not config.sites_only: + for site in sites: + print(f"Obtaining well data for {site['properties']['point_id']}") + well_data = self._execute_json_request( + _make_url("wells"), + params={"pointid": site["properties"]["point_id"]}, + tag="", + ) + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] + site["properties"]["well_depth_units"] = FEET + # site["properties"]["formation"] = None + # site["properties"]["well_depth"] = None + # site["properties"]["well_depth_units"] = FEET return sites diff --git a/backend/connectors/nmbgmr/transformer.py b/backend/connectors/nmbgmr/transformer.py index dd1163e..420c7f6 100644 --- a/backend/connectors/nmbgmr/transformer.py +++ b/backend/connectors/nmbgmr/transformer.py @@ -38,9 +38,9 @@ def _transform(self, record): "vertical_datum": props["altitude_datum"], "usgs_site_id": props["site_id"], "alternate_site_id": props["alternate_site_id"], - "formation": props["formation"], - "well_depth": props["well_depth"], - "well_depth_units": props["well_depth_units"], + "formation": props.get("formation", ""), + "well_depth": props.get("well_depth", ""), + "well_depth_units": props.get("well_depth_units", ""), } return rec diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 335fd73..b28413f 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -47,29 +47,42 @@ def health(self): return self.get_records(top=10, analyte="TDS") def get_records(self, *args, **kw): + analyte = None if "analyte" in kw: analyte = kw["analyte"] elif self.config: analyte = self.config.parameter - analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) - if analyte is None: - return [] - service = self.get_service() - ds = service.datastreams() - q = ds.query() - fs = [f"ObservedProperty/id eq {analyte}"] - if self.config: + if self.config.sites_only: + ds = service.things() + q = ds.query() + fs = [] if self.config.has_bounds(): fs.append( - f"st_within(Thing/Location/location, geography'{self.config.bounding_wkt()}')" + f"st_within(Locations/location, geography'{self.config.bounding_wkt()}')" ) - - q = q.filter(" and ".join(fs)) - q = q.expand("Thing/Locations") - return [ds.thing.locations.entities[0] for ds in q.list()] + q = q.expand("Locations") + q = q.filter(" and ".join(fs)) + return [thing.locations.entities[0] for thing in q.list()] + else: + analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) + if analyte is None: + return [] + + ds = service.datastreams() + q = ds.query() + fs = [f"ObservedProperty/id eq {analyte}"] + if self.config: + if self.config.has_bounds(): + fs.append( + f"st_within(Thing/Location/location, geography'{self.config.bounding_wkt()}')" + ) + + q = q.filter(" and ".join(fs)) + q = q.expand("Thing/Locations") + return [di.thing.locations.entities[0] for di in q.list()] class DWBAnalyteSource(STAnalyteSource): diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 4987fee..996b3aa 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -87,15 +87,15 @@ def get_records(self): } if config.has_bounds(): params["bBox"] = ",".join([str(b) for b in config.bbox_bounding_points()]) - - if config.parameter.lower() != "waterlevels": - params["characteristicName"] = get_analyte_search_param( - config.parameter, WQP_ANALYTE_MAPPING - ) - else: - # every record with pCode 30210 (depth in m) has a corresponding - # record with pCode 72019 (depth in ft) but not vice versa - params["pCode"] = "30210" + if not config.sites_only: + if config.parameter.lower() != "waterlevels": + params["characteristicName"] = get_analyte_search_param( + config.parameter, WQP_ANALYTE_MAPPING + ) + else: + # every record with pCode 30210 (depth in m) has a corresponding + # record with pCode 72019 (depth in ft) but not vice versa + params["pCode"] = "30210" params.update(get_date_range(config)) diff --git a/backend/unifier.py b/backend/unifier.py index 9523da9..36dd3b5 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -73,6 +73,16 @@ def unify_waterlevels(config): return True +def unify_sites_only(config): + print("Unifying sites only\n") + + # config.report() -- report is done in cli.py, no need to do it twice + config.validate() + + if not config.dry: + _unify_parameter(config, config.all_site_sources()) + + return True def _perister_factory(config): """ @@ -136,39 +146,44 @@ def _site_wrapper(site_source, parameter_source, persister, config): start_ind = 1 end_ind = 0 first_flag = True - for sites in site_source.chunks(sites): - if site_limit and sites_with_records_count == site_limit: - break - - if type(sites) == list: - if first_flag: - end_ind += len(sites) - first_flag = False + + if config.sites_only: + persister.sites.extend(sites) + else: + for sites in site_source.chunks(sites): + if site_limit and sites_with_records_count == site_limit: + break + + if type(sites) == list: + n = len(sites) + if first_flag: + first_flag = False + else: + start_ind = end_ind + 1 + + end_ind += n + + if use_summarize: + summary_records = parameter_source.read( + sites, use_summarize, start_ind, end_ind + ) + if summary_records: + persister.records.extend(summary_records) else: - start_ind = end_ind + 1 - end_ind += len(sites) - - if use_summarize: - summary_records = parameter_source.read( - sites, use_summarize, start_ind, end_ind - ) - if summary_records: - persister.records.extend(summary_records) - else: - results = parameter_source.read( - sites, use_summarize, start_ind, end_ind - ) - # no records are returned if there is no site record for parameter - # or if the record isn't clean (doesn't have the correct fields) - # don't count these sites to apply to site_limit - if results is None or len(results) == 0: - continue - - for site, records in results: - persister.timeseries.append((site, records)) - persister.sites.append(site) - - sites_with_records_count += 1 + results = parameter_source.read( + sites, use_summarize, start_ind, end_ind + ) + # no records are returned if there is no site record for parameter + # or if the record isn't clean (doesn't have the correct fields) + # don't count these sites to apply to site_limit + if results is None or len(results) == 0: + continue + + for site, records in results: + persister.timeseries.append((site, records)) + persister.sites.append(site) + + sites_with_records_count += 1 except BaseException: import traceback @@ -191,6 +206,8 @@ def _unify_parameter( elif config.output_timeseries_unified: persister.dump_timeseries_unified(config.output_path) persister.dump_sites(config.output_path) + elif config.sites_only: + persister.dump_sites(config.output_path) else: # config.output_timeseries_separated persister.dump_timeseries_separated(config.output_path) persister.dump_sites(config.output_path) @@ -297,13 +314,41 @@ def waterlevel_unification_test(): cfg.use_source_nwis = False cfg.use_source_nmbgmr = False cfg.use_source_iscsevenrivers = False - # cfg.use_source_pvacd = False - cfg.use_source_oseroswell = False + cfg.use_source_pvacd = False + # cfg.use_source_oseroswell = False cfg.use_source_bernco = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_ebid = False # cfg.site_limit = 10 unify_waterlevels(cfg) +def site_unification_test(): + cfg = Config() + cfg.county = "chaves" + + + cfg.output_summary = False + cfg.output_name = "sitesonly" + cfg.sites_only = True + # cfg.output_summary = True + # cfg.output_single_timeseries = True + + cfg.use_source_nwis = False + cfg.use_source_nmbgmr = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_pvacd = False + # cfg.use_source_oseroswell = False + cfg.use_source_bernco = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_ebid = False + + cfg.finalize() + + unify_sites_only(cfg) + def get_datastream(siteid): import httpx @@ -329,7 +374,8 @@ def get_datastreams(): # shandler = logging.StreamHandler() # get_sources(Config()) setup_logging() - waterlevel_unification_test() + site_unification_test() + # waterlevel_unification_test() # analyte_unification_test() # print(health_check("nwis")) # generate_site_bounds() diff --git a/frontend/cli.py b/frontend/cli.py index e03ac0b..8dd330c 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -228,10 +228,10 @@ def weave( config.parameter = parameter # make sure config.output_name is properly set - config._update_output_name() + config.update_output_name() # make output_path now so that die.log can be written to it live - config._make_output_path() + config.make_output_path() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) From 5f6406c90037c067d22d388639265c5c9e997f24 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 27 Mar 2025 16:58:45 -0600 Subject: [PATCH 005/143] Use terminal function for most recent record to enable eariest record retrieval --- backend/connectors/bor/source.py | 4 ++-- backend/connectors/ckan/source.py | 4 ++-- backend/connectors/isc_seven_rivers/source.py | 6 +++--- backend/connectors/nmbgmr/source.py | 6 +++--- backend/connectors/nmenv/source.py | 6 +++--- backend/connectors/st2/source.py | 4 ++-- backend/connectors/st_connector.py | 6 +++--- backend/connectors/usgs/source.py | 4 ++-- backend/connectors/wqp/source.py | 4 ++-- backend/source.py | 10 ++++++++-- 10 files changed, 30 insertions(+), 24 deletions(-) diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index eac6fb3..3fd6a73 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -33,7 +33,7 @@ BaseSource, BaseSiteSource, BaseAnalyteSource, - get_most_recent, + get_terminal_record, get_analyte_search_param, ) @@ -95,7 +95,7 @@ def _extract_source_parameter_names(self, records): def _extract_most_recent(self, rs): - record = get_most_recent(rs, "attributes.dateTime") + record = get_terminal_record(rs, "attributes.dateTime", side="last") return { "value": record["attributes"]["result"], "datetime": parse_dt(record["attributes"]["dateTime"]), diff --git a/backend/connectors/ckan/source.py b/backend/connectors/ckan/source.py index 32bfed0..9301a70 100644 --- a/backend/connectors/ckan/source.py +++ b/backend/connectors/ckan/source.py @@ -46,7 +46,7 @@ BaseSource, BaseSiteSource, BaseWaterLevelSource, - get_most_recent, + get_terminal_record, ) @@ -139,7 +139,7 @@ def _extract_source_parameter_results(self, records): return [float(r["DTWGS"]) for r in records] def _extract_most_recent(self, records): - record = get_most_recent(records, tag="Date") + record = get_terminal_record(records, tag="Date", side="last") return { "value": record["DTWGS"], "datetime": record["Date"], diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index b791bb1..d022aa8 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -39,7 +39,7 @@ BaseSiteSource, BaseWaterLevelSource, BaseAnalyteSource, - get_most_recent, + get_terminal_record, get_analyte_search_param, ) @@ -121,7 +121,7 @@ def _extract_parameter_record(self, record): return record def _extract_most_recent(self, records): - record = get_most_recent(records, "dateTime") + record = get_terminal_record(records, "dateTime", side="last") return { "value": record["result"], @@ -211,7 +211,7 @@ def _extract_source_parameter_units(self, records): return [self._source_parameter_units for r in records] def _extract_most_recent(self, records): - record = get_most_recent(records, "dateTime") + record = get_terminal_record(records, "dateTime", side="last") t = get_datetime(record) return { "value": record["depthToWaterFeet"], diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index d75adae..5509cea 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -38,7 +38,7 @@ BaseWaterLevelSource, BaseSiteSource, BaseAnalyteSource, - get_most_recent, + get_terminal_record, get_analyte_search_param, make_site_list, ) @@ -132,7 +132,7 @@ def _extract_source_parameter_units(self, records): return [r["Units"] for r in records] def _extract_most_recent(self, records): - record = get_most_recent(records, "info.CollectionDate") + record = get_terminal_record(records, "info.CollectionDate", side="last") return { "value": record["SampleValue"], "datetime": record["info"]["CollectionDate"], @@ -180,7 +180,7 @@ def _extract_parameter_record(self, record, *args, **kw): return record def _extract_most_recent(self, records): - record = get_most_recent(records, "DateMeasured") + record = get_terminal_record(records, "DateMeasured", side="last") return { "value": record["DepthToWaterBGS"], "datetime": (record["DateMeasured"], record["TimeMeasured"]), diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 335fd73..b18bd85 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -28,7 +28,7 @@ SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, ) -from backend.source import get_analyte_search_param, get_most_recent +from backend.source import get_analyte_search_param, get_terminal_record URL = "https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/" @@ -152,8 +152,8 @@ def _extract_source_parameter_names(self, records: list) -> list: def _extract_most_recent(self, records): # this is only used in summary output - record = get_most_recent( - records, tag=lambda x: x["observation"].phenomenon_time + record = get_terminal_record( + records, tag=lambda x: x["observation"].phenomenon_time, side="last" ) return { diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index 69739f4..04b7540 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -114,8 +114,8 @@ class ST2WaterLevelSource(STWaterLevelSource): url = URL def _extract_most_recent(self, records): - record = get_most_recent( - records, tag=lambda x: x["observation"].phenomenon_time + record = get_terminal_record( + records, tag=lambda x: x["observation"].phenomenon_time, side="last" ) return { diff --git a/backend/connectors/st_connector.py b/backend/connectors/st_connector.py index d6b78ea..5a6c743 100644 --- a/backend/connectors/st_connector.py +++ b/backend/connectors/st_connector.py @@ -23,7 +23,7 @@ BaseSiteSource, BaseWaterLevelSource, BaseAnalyteSource, - get_most_recent, + get_terminal_record, ) from backend.transformer import SiteTransformer @@ -57,8 +57,8 @@ def _get_things( return things.list() def _extract_most_recent(self, records): - record = get_most_recent( - records, tag=lambda x: x["observation"].phenomenon_time + record = get_terminal_record( + records, tag=lambda x: x["observation"].phenomenon_time, side="last" ) return { diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index cd0e1ad..ad23065 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -37,7 +37,7 @@ BaseWaterLevelSource, BaseSiteSource, make_site_list, - get_most_recent, + get_terminal_record, ) @@ -194,7 +194,7 @@ def _extract_source_parameter_units(self, records): return [r["source_parameter_units"] for r in records] def _extract_most_recent(self, records): - record = get_most_recent(records, "datetime_measured") + record = get_terminal_record(records, "datetime_measured", side="last") return { "value": float(record["value"]), # "datetime": (record["date_measured"], record["time_measured"]), diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 4987fee..193620b 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -38,7 +38,7 @@ BaseWaterLevelSource, BaseParameterSource, make_site_list, - get_most_recent, + get_terminal_record, get_analyte_search_param, ) @@ -140,7 +140,7 @@ def _extract_source_parameter_names(self, records): return [ri["CharacteristicName"] for ri in records] def _extract_most_recent(self, records): - ri = get_most_recent(records, "ActivityStartDate") + ri = get_terminal_record(records, "ActivityStartDate", side="last") return { "value": ri["ResultMeasureValue"], "datetime": ri["ActivityStartDate"], diff --git a/backend/source.py b/backend/source.py index 457006b..2b591b6 100644 --- a/backend/source.py +++ b/backend/source.py @@ -65,7 +65,7 @@ def make_site_list(site_record: list | dict) -> list | str: return sites -def get_most_recent(records: list, tag: Union[str, callable]) -> dict: +def get_terminal_record(records: list, tag: Union[str, callable], side: str) -> dict: """ Returns the most recent record based on the tag @@ -77,6 +77,9 @@ def get_most_recent(records: list, tag: Union[str, callable]) -> dict: tag: str or callable the tag to use to sort the records + side: str + determines if the first or last record is retrieved + Returns ------- dict @@ -97,7 +100,10 @@ def func(x): def func(x): return x[tag] - return sorted(records, key=func)[-1] + if side == "first": + return sorted(records, key=func)[0] + elif side == "last": + return sorted(records, key=func)[-1] def get_analyte_search_param(parameter: str, mapping: dict) -> str: From c2169db023b5567deafc61a10960fcd9616dbc9c Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 27 Mar 2025 16:59:12 -0600 Subject: [PATCH 006/143] Forgot to save a file before previous commit --- backend/connectors/st2/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index 04b7540..e6efd13 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -50,7 +50,7 @@ SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, ) -from backend.source import BaseSiteSource, BaseWaterLevelSource, get_most_recent +from backend.source import BaseSiteSource, BaseWaterLevelSource, get_terminal_record URL = "https://st2.newmexicowaterdata.org/FROST-Server/v1.1" From 3510008c8f67b9e2e083b708edaa75fcde69f1b5 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:15:59 -0600 Subject: [PATCH 007/143] sites only --- backend/connectors/nmenv/source.py | 3 +- backend/unifier.py | 40 +++++++------- frontend/cli.py | 83 +++++++++++------------------- frontend/cronjob_worker.sh | 3 ++ 4 files changed, 53 insertions(+), 76 deletions(-) create mode 100644 frontend/cronjob_worker.sh diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index b28413f..3646073 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -64,7 +64,8 @@ def get_records(self, *args, **kw): f"st_within(Locations/location, geography'{self.config.bounding_wkt()}')" ) q = q.expand("Locations") - q = q.filter(" and ".join(fs)) + if fs: + q = q.filter(" and ".join(fs)) return [thing.locations.entities[0] for thing in q.list()] else: analyte = get_analyte_search_param(analyte, DWB_ANALYTE_MAPPING) diff --git a/backend/unifier.py b/backend/unifier.py index 36dd3b5..0646ace 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -40,22 +40,11 @@ def health_check(source: BaseSiteSource) -> bool: return bool(source.health()) -def unify_sites(config): - print("Unifying sites\n") - - # def func(config, persister): - # for source in config.site_sources(): - # s = source() - # persister.load(s.read(config)) - - # _unify_wrapper(config, func) - - def unify_analytes(config): print("Unifying analytes\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() - + config.finalize() if not config.dry: _unify_parameter(config, config.analyte_sources()) @@ -67,17 +56,18 @@ def unify_waterlevels(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - + config.finalize() if not config.dry: _unify_parameter(config, config.water_level_sources()) return True -def unify_sites_only(config): +def unify_sites(config): print("Unifying sites only\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() + config.finalize() if not config.dry: _unify_parameter(config, config.all_site_sources()) @@ -335,19 +325,25 @@ def site_unification_test(): # cfg.output_summary = True # cfg.output_single_timeseries = True - cfg.use_source_nwis = False - cfg.use_source_nmbgmr = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_pvacd = False - # cfg.use_source_oseroswell = False cfg.use_source_bernco = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_bor = False + cfg.use_source_cabq = False cfg.use_source_ebid = False + cfg.use_source_nmbgmr_amp = False + cfg.use_source_nmed_dwb = False + cfg.use_source_nmose_isc_seven_rivers = False + cfg.use_source_nmose_roswell = False + cfg.use_source_nwis = False + cfg.use_source_pvacd = False + cfg.use_source_wqp = False + + cfg.use_source_nmed_dwb = True + + cfg.finalize() - unify_sites_only(cfg) + unify_sites(cfg) def get_datastream(siteid): diff --git a/frontend/cli.py b/frontend/cli.py index 8dd330c..760f303 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -227,11 +227,11 @@ def weave( config = setup_config(f"{parameter}", bbox, county, site_limit, dry) config.parameter = parameter - # make sure config.output_name is properly set - config.update_output_name() - - # make output_path now so that die.log can be written to it live - config.make_output_path() + # # make sure config.output_name is properly set + # config.update_output_name() + # + # # make output_path now so that die.log can be written to it live + # config.make_output_path() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) @@ -249,53 +249,33 @@ def weave( summary = False timeseries_unified = False timeseries_separated = True + else: + click.echo(f"Invalid output type: {output}") + return config.output_summary = summary config.output_timeseries_unified = timeseries_unified config.output_timeseries_separated = timeseries_separated + false_agencies = [] + config_agencies = [] # sources if parameter == "waterlevels": - config.use_source_bernco = no_bernco - config.use_source_cabq = no_cabq - config.use_source_ebid = no_ebid - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_nmose_roswell = no_nmose_roswell - config.use_source_nwis = no_nwis - config.use_source_pvacd = no_pvacd - config.use_source_wqp = no_wqp - - config.use_source_bor = False - config.use_source_nmed_dwb = False + config_agencies =["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + + false_agencies = ['bor', 'nmed_dwb'] elif parameter == "carbonate": - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_wqp = no_wqp - - config.use_source_bor = False - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmed_dwb = False - config.use_source_nmose_isc_seven_rivers = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['nmbgmr_amp', 'wqp'] + false_agencies = ['bor', 'bernco', 'cabq', 'ebid', 'nmed_dwb', + 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in ["arsenic", "uranium"]: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmose_isc_seven_rivers = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'wqp'] + false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_isc_seven_rivers', + 'nmose_roswell', 'nwis', 'pvacd'] + elif parameter in [ "bicarbonate", @@ -311,19 +291,16 @@ def weave( "sulfate", "tds", ]: - config.use_source_bor = no_bor - config.use_source_nmbgmr_amp = no_nmbgmr_amp - config.use_source_nmed_dwb = no_nmed_dwb - config.use_source_nmose_isc_seven_rivers = no_nmose_isc_seven_rivers - config.use_source_wqp = no_wqp - - config.use_source_bernco = False - config.use_source_cabq = False - config.use_source_ebid = False - config.use_source_nmose_roswell = False - config.use_source_nwis = False - config.use_source_pvacd = False + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb','nmose_isc_seven_rivers', 'wqp'] + false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_roswell', 'nwis', 'pvacd'] + + if false_agencies: + for agency in false_agencies: + setattr(config, f"use_source_{agency}", False) + if config_agencies: + for agency in config_agencies: + setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) # dates config.start_date = start_date config.end_date = end_date diff --git a/frontend/cronjob_worker.sh b/frontend/cronjob_worker.sh new file mode 100644 index 0000000..4a3925f --- /dev/null +++ b/frontend/cronjob_worker.sh @@ -0,0 +1,3 @@ + + +die weave \ No newline at end of file From e5ec261485d17f13d64bfdd52b9b46ce6f2f09a5 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:49:26 -0600 Subject: [PATCH 008/143] sites only cli --- frontend/cli.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 760f303..534b225 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -321,11 +321,34 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) -def wells(bbox, county): +@add_options(ALL_SOURCE_OPTIONS) +@add_options(DEBUG_OPTIONS) +def wells(bbox, county, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry,): """ Get locations """ - config = setup_config("sites", bbox, county) + + + config = setup_config("sites", bbox, county, site_limit, dry) + config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", + "wqp"] + for agency in config_agencies: + setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + unify_sites(config) From 5feace88541f9ecbb77621dd3479bb152bfb8eab Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:54:29 -0600 Subject: [PATCH 009/143] sites only cli --- frontend/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 534b225..eb09581 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -298,9 +298,10 @@ def weave( for agency in false_agencies: setattr(config, f"use_source_{agency}", False) + lcs = locals() if config_agencies: for agency in config_agencies: - setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) # dates config.start_date = start_date config.end_date = end_date @@ -341,13 +342,13 @@ def wells(bbox, county, Get locations """ - config = setup_config("sites", bbox, county, site_limit, dry) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp"] + lcs = locals() for agency in config_agencies: - setattr(config, f"use_source_{agency}", getattr(locals(),f'no_{agency}')) + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) unify_sites(config) From b77984678dd869905512ccbe33ba1d3cd156306f Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 11:56:59 -0600 Subject: [PATCH 010/143] sites only cli --- frontend/cli.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/frontend/cli.py b/frontend/cli.py index eb09581..37b51ae 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -350,6 +350,11 @@ def wells(bbox, county, for agency in config_agencies: setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + config.report() + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return + unify_sites(config) From f9b6a87dededa1beb0cf2377cd579ddbeac1e41d Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 12:08:08 -0600 Subject: [PATCH 011/143] sites only cli --- frontend/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/cli.py b/frontend/cli.py index 37b51ae..89b16f1 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -350,6 +350,7 @@ def wells(bbox, county, for agency in config_agencies: setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + config.sites_only = True config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): From 21c505e9c0127afbfce47f47586a5f61b0c9ff81 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 14:40:44 -0600 Subject: [PATCH 012/143] Add earliest records to summary table | rename most_recent latest This update adds the earliest record to the summary table. It also renames "most_recent" to "latest" in the summary table. The latter change was made to juxtapose earliest/latest, and it more aptly applies to datetime filters (for when they are implemented) --- CHANGELOG.md | 5 +- README.md | 14 ++- backend/connectors/bor/source.py | 7 +- backend/connectors/ckan/source.py | 4 +- backend/connectors/isc_seven_rivers/source.py | 19 ++-- backend/connectors/nmbgmr/source.py | 34 +++---- backend/connectors/nmenv/source.py | 6 +- backend/connectors/st2/source.py | 12 --- backend/connectors/st_connector.py | 10 +- backend/connectors/usgs/source.py | 6 +- backend/connectors/wqp/source.py | 14 +-- backend/constants.py | 3 + backend/record.py | 12 ++- backend/source.py | 93 +++++++++++++++---- backend/transformer.py | 67 ++++++++++--- 15 files changed, 205 insertions(+), 101 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6614b51..6734cb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added - water level for WQP +- `earliest_date`, `earliest_time`, `earliest_value`, and `earliest_units` to the summary table ### Changed - NM OSE Roswell data is now pulled from ST2 and not CKAN - renamed the column `location` to `name` in the summary table to match the format of the `sites` table when timeseries data are exported - -### Fixed +- renamed the columns `most_recent_date`, `most_recent_time`, `most_recent_value`, and `most_recent_units` to `latest_date`, `latest_time`, `latest_value`, and `latest_units` respectively for succinctness and juxtaposition with the newly added `earliest` columns. + - This naming schema also enables the development of datetime filters as the descriptor will apply to the latest datetime within the provided time frame filter, whereas most recent indicates np filters. ## 0.7.0 diff --git a/README.md b/README.md index 9fa7ab7..1e83f8d 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ A log of the inputs and processes, called `die.log`, is also saved to the output | :----------- | :---------- | :-------- | :------------- | | source | the organization/source for the site | string | Y | | id | the id of the site. The id is used as the key to join the site and timeseries tables | string | Y | -| location | the colloquial name for the site | string | Y | +| name | the colloquial name for the site | string | Y | | usgs_site_id | USGS site id | string | N | | alternate_site_id | alternate site id | string | N | | latitude | latitude in decimal degrees | float | Y | @@ -114,10 +114,14 @@ A log of the inputs and processes, called `die.log`, is also saved to the output | min | the minimum observation | float | Y | | max | the maximum observation | float | Y | | mean | the mean value of the observations | float | Y | -| most_recent_date| date of most recent record in YYYY-MM-DD | string | Y | -| most_recent_time | time of most recent record in HH:MM:SS or HH:MM:SS.mmm | string | N | -| most_recent_value | value of the most recent record | float | Y | -| most_recent_units | units of the most recent record | string | Y | +| earliest_date| date of the earliest record in YYYY-MM-DD | string | Y | +| earliest_time | time of the earliest record in HH:MM:SS or HH:MM:SS.mmm | string | N | +| earliest_value | value of the earliest recent record | float | Y | +| earliest_units | units of the earliest record | string | Y | +| latest_date| date of the latest record in YYYY-MM-DD | string | Y | +| latest_time | time of the latest record in HH:MM:SS or HH:MM:SS.mmm | string | N | +| latest_value | value of the latest recent record | float | Y | +| latest_units | units of the latest record | string | Y | *CABQ elevation is calculated as [elevation at top of casing] - [stickup height]; if stickup height < 0 the measuring point is assumed to be beneath the ground surface diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index 3fd6a73..29b5d38 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -27,6 +27,8 @@ SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, DT_MEASURED, + EARLIEST, + LATEST ) from backend.source import ( @@ -93,9 +95,8 @@ def _extract_parameter_dates(self, records): def _extract_source_parameter_names(self, records): return [self._source_parameter_name for ri in records] - def _extract_most_recent(self, rs): - - record = get_terminal_record(rs, "attributes.dateTime", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "attributes.dateTime", bookend=bookend) return { "value": record["attributes"]["result"], "datetime": parse_dt(record["attributes"]["dateTime"]), diff --git a/backend/connectors/ckan/source.py b/backend/connectors/ckan/source.py index 9301a70..736d668 100644 --- a/backend/connectors/ckan/source.py +++ b/backend/connectors/ckan/source.py @@ -138,8 +138,8 @@ def _parse_response(self, site_record, resp): def _extract_source_parameter_results(self, records): return [float(r["DTWGS"]) for r in records] - def _extract_most_recent(self, records): - record = get_terminal_record(records, tag="Date", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, tag="Date", bookend=bookend) return { "value": record["DTWGS"], "datetime": record["Date"], diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index d022aa8..a7fb384 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -28,6 +28,8 @@ PARAMETER_UNITS, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, + EARLIEST, + LATEST ) from backend.connectors.isc_seven_rivers.transformer import ( ISCSevenRiversSiteTransformer, @@ -120,8 +122,8 @@ def _extract_parameter_record(self, record): return record - def _extract_most_recent(self, records): - record = get_terminal_record(records, "dateTime", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "dateTime", bookend=bookend) return { "value": record["result"], @@ -185,7 +187,7 @@ def get_records(self, site_record): ) def _clean_records(self, records): - return [r for r in records if r["depthToWaterFeet"] is not None] + return [r for r in records if r["depthToWaterFeet"] is not None and not r["invalid"] and not r["dry"]] def _extract_parameter_record(self, record): record[PARAMETER_NAME] = DTW @@ -197,9 +199,7 @@ def _extract_parameter_record(self, record): return record def _extract_source_parameter_results(self, records): - return [ - r["depthToWaterFeet"] for r in records if not r["invalid"] and not r["dry"] - ] + return [r["depthToWaterFeet"] for r in records] def _extract_parameter_dates(self, records: list) -> list: return [get_datetime(r) for r in records] @@ -210,15 +210,14 @@ def _extract_source_parameter_names(self, records): def _extract_source_parameter_units(self, records): return [self._source_parameter_units for r in records] - def _extract_most_recent(self, records): - record = get_terminal_record(records, "dateTime", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "dateTime", bookend=bookend) t = get_datetime(record) return { "value": record["depthToWaterFeet"], "datetime": t, "source_parameter_units": self._source_parameter_units, - "source_parameter_name": DTW, + "source_parameter_name": self._source_parameter_name, } - # ============= EOF ============================================= diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 5509cea..5940926 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -33,6 +33,8 @@ PARAMETER_VALUE, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, + EARLIEST, + LATEST, ) from backend.source import ( BaseWaterLevelSource, @@ -85,18 +87,18 @@ def get_records(self): _make_url("locations"), params, tag="features", timeout=30 ) for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None + # print(f"Obtaining well data for {site['properties']['point_id']}") + # well_data = self._execute_json_request( + # _make_url("wells"), + # params={"pointid": site["properties"]["point_id"]}, + # tag="", + # ) + # site["properties"]["formation"] = well_data["formation"] + # site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] # site["properties"]["well_depth_units"] = FEET + site["properties"]["formation"] = None + site["properties"]["well_depth"] = None + site["properties"]["well_depth_units"] = FEET return sites @@ -130,9 +132,9 @@ def _extract_site_records(self, records, site_record): def _extract_source_parameter_units(self, records): return [r["Units"] for r in records] - - def _extract_most_recent(self, records): - record = get_terminal_record(records, "info.CollectionDate", side="last") + + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "info.CollectionDate", bookend=bookend) return { "value": record["SampleValue"], "datetime": record["info"]["CollectionDate"], @@ -179,8 +181,8 @@ def _extract_parameter_record(self, record, *args, **kw): record[SOURCE_PARAMETER_UNITS] = record["DepthToWaterBGSUnits"] return record - def _extract_most_recent(self, records): - record = get_terminal_record(records, "DateMeasured", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "DateMeasured", bookend=bookend) return { "value": record["DepthToWaterBGS"], "datetime": (record["DateMeasured"], record["TimeMeasured"]), diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index b18bd85..fb3d9b0 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -27,6 +27,8 @@ DT_MEASURED, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, + EARLIEST, + LATEST, ) from backend.source import get_analyte_search_param, get_terminal_record @@ -150,10 +152,10 @@ def _extract_parameter_dates(self, records: list) -> list: def _extract_source_parameter_names(self, records: list) -> list: return [r["datastream"].observed_property.name for r in records] - def _extract_most_recent(self, records): + def _extract_terminal_record(self, records, bookend): # this is only used in summary output record = get_terminal_record( - records, tag=lambda x: x["observation"].phenomenon_time, side="last" + records, tag=lambda x: x["observation"].phenomenon_time, bookend=bookend ) return { diff --git a/backend/connectors/st2/source.py b/backend/connectors/st2/source.py index e6efd13..181513b 100644 --- a/backend/connectors/st2/source.py +++ b/backend/connectors/st2/source.py @@ -113,18 +113,6 @@ def __repr__(self): class ST2WaterLevelSource(STWaterLevelSource): url = URL - def _extract_most_recent(self, records): - record = get_terminal_record( - records, tag=lambda x: x["observation"].phenomenon_time, side="last" - ) - - return { - "value": record["observation"].result, - "datetime": record["observation"].phenomenon_time, - "source_parameter_units": record["datastream"].unit_of_measurement.symbol, - "source_parameter_name": record["datastream"].name, - } - def _extract_parameter_record(self, record): record[PARAMETER_NAME] = DTW record[PARAMETER_VALUE] = record["observation"].result diff --git a/backend/connectors/st_connector.py b/backend/connectors/st_connector.py index 5a6c743..bee66f3 100644 --- a/backend/connectors/st_connector.py +++ b/backend/connectors/st_connector.py @@ -19,6 +19,7 @@ from shapely import MultiPolygon, Polygon, unary_union from backend.bounding_polygons import get_state_polygon +from backend.constants import EARLIEST, LATEST from backend.source import ( BaseSiteSource, BaseWaterLevelSource, @@ -55,16 +56,17 @@ def _get_things( things.filter(" and ".join(fs)) return things.list() - - def _extract_most_recent(self, records): + + def _extract_terminal_record(self, records, bookend): record = get_terminal_record( - records, tag=lambda x: x["observation"].phenomenon_time, side="last" + records, tag=lambda x: x["observation"].phenomenon_time, bookend=bookend ) return { "value": self._parse_result(record["observation"].result), "datetime": record["observation"].phenomenon_time, - "units": record["datastream"].unit_of_measurement.symbol, + "source_parameter_units": record["datastream"].unit_of_measurement.symbol, + "source_parameter_name": record["datastream"].name, } def _parse_result(self, result): diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index ad23065..e60029b 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -27,6 +27,8 @@ PARAMETER_UNITS, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, + EARLIEST, + LATEST, ) from backend.connectors.usgs.transformer import ( NWISSiteTransformer, @@ -193,8 +195,8 @@ def _extract_source_parameter_names(self, records: list) -> list: def _extract_source_parameter_units(self, records): return [r["source_parameter_units"] for r in records] - def _extract_most_recent(self, records): - record = get_terminal_record(records, "datetime_measured", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "datetime_measured", bookend=bookend) return { "value": float(record["value"]), # "datetime": (record["date_measured"], record["time_measured"]), diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 193620b..12f21a2 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -26,6 +26,8 @@ SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, DT_MEASURED, + EARLIEST, + LATEST, ) from backend.connectors.wqp.transformer import ( WQPSiteTransformer, @@ -139,13 +141,13 @@ def _extract_parameter_dates(self, records): def _extract_source_parameter_names(self, records): return [ri["CharacteristicName"] for ri in records] - def _extract_most_recent(self, records): - ri = get_terminal_record(records, "ActivityStartDate", side="last") + def _extract_terminal_record(self, records, bookend): + record = get_terminal_record(records, "ActivityStartDate", bookend=bookend) return { - "value": ri["ResultMeasureValue"], - "datetime": ri["ActivityStartDate"], - "source_parameter_units": ri["ResultMeasure/MeasureUnitCode"], - "source_parameter_name": ri["CharacteristicName"], + "value": record["ResultMeasureValue"], + "datetime": record["ActivityStartDate"], + "source_parameter_units": record["ResultMeasure/MeasureUnitCode"], + "source_parameter_name": record["CharacteristicName"], } def get_records(self, site_record): diff --git a/backend/constants.py b/backend/constants.py index 2f56f9c..b7635ab 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -13,6 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +EARLIEST = "earliest" +LATEST = "latest" + WATERLEVELS = "waterlevels" ARSENIC = "arsenic" diff --git a/backend/record.py b/backend/record.py index 3834687..5cfe5e9 100644 --- a/backend/record.py +++ b/backend/record.py @@ -126,10 +126,14 @@ class SummaryRecord(BaseRecord): "min", "max", "mean", - "most_recent_date", - "most_recent_time", - "most_recent_value", - "most_recent_units", + "earliest_date", + "earliest_time", + "earliest_value", + "earliest_units", + "latest_date", + "latest_time", + "latest_value", + "latest_units", ) defaults: dict = {} diff --git a/backend/source.py b/backend/source.py index 2b591b6..f66defb 100644 --- a/backend/source.py +++ b/backend/source.py @@ -32,6 +32,8 @@ PARAMETER_NAME, PARAMETER_UNITS, PARAMETER_VALUE, + EARLIEST, + LATEST, ) from backend.logging import Loggable from backend.persister import BasePersister, CSVPersister @@ -65,7 +67,7 @@ def make_site_list(site_record: list | dict) -> list | str: return sites -def get_terminal_record(records: list, tag: Union[str, callable], side: str) -> dict: +def get_terminal_record(records: list, tag: Union[str, callable], bookend: str) -> dict: """ Returns the most recent record based on the tag @@ -77,8 +79,8 @@ def get_terminal_record(records: list, tag: Union[str, callable], side: str) -> tag: str or callable the tag to use to sort the records - side: str - determines if the first or last record is retrieved + bookend: str + determines if the earliest or lastest record is retrieved Returns ------- @@ -100,9 +102,9 @@ def func(x): def func(x): return x[tag] - if side == "first": + if bookend == EARLIEST: return sorted(records, key=func)[0] - elif side == "last": + elif bookend == LATEST: return sorted(records, key=func)[-1] @@ -553,6 +555,14 @@ class BaseParameterSource(BaseSource): Methods With Universal Implementations (Already Implemented) ============================================================================ + _extract_earliest_record + Returns the earliest record for a particular site. Requires _extract_terminal_record + to be implemented for each source + + _extract_latest_record + Returns the most recent record for a particular site. Requires _extract_terminal_record + to be implemented for each source + read Reads the parameter records and returns the transformed records, where the transform standardizes the records so the format is the same for all sources @@ -579,8 +589,9 @@ class BaseParameterSource(BaseSource): _extract_site_records Returns all records for a single site as a list of records - _extract_most_recent - Returns the most recent record + _extract_terminal_record + Returns the terminal record for a particular site. This is only used for + summary, not time series, outputs. _clean_records (optional) Returns cleaned records if this function is defined for each source. @@ -606,6 +617,39 @@ class BaseParameterSource(BaseSource): # Methods Already Implemented # ========================================================================== + def _extract_earliest_record(self, records: list) -> dict: + """ + Returns the earliest record for a particular site + + Parameters + ---------- + records : list + a list of records + + Returns + ------- + dict + the earliest record + """ + return self._extract_terminal_record(records, bookend=EARLIEST) + + + def _extract_latest_record(self, records: list) -> dict: + """ + Returns the most recent record for a particular site + + Parameters + ---------- + records : list + a list of records + + Returns + ------- + dict + the most recent record + """ + return self._extract_terminal_record(records, bookend=LATEST) + def read( self, site_record: SiteRecord, use_summarize: bool, start_ind: int, end_ind: int ) -> List[ @@ -657,7 +701,6 @@ def read( if not site_records: self.warn(f"{site.id}: No records found") continue - # get cleaned records if _clean_records is defined by the source. This usually removes Nones/Null cleaned = self._clean_records(site_records) if not cleaned: @@ -711,20 +754,29 @@ def read( if kept_items is not None and len(kept_items): n = len(kept_items) - most_recent_result = self._extract_most_recent(cleaned) - if not most_recent_result: + earliest_result = self._extract_earliest_record(cleaned) + latest_result = self._extract_latest_record(cleaned) + if not latest_result: continue rec = { "nrecords": n, "min": min(kept_items), "max": max(kept_items), "mean": sum(kept_items) / n, - "most_recent_datetime": most_recent_result["datetime"], - "most_recent_value": most_recent_result["value"], - "most_recent_source_units": most_recent_result[ + "earliest_datetime": earliest_result["datetime"], + "earliest_value": earliest_result["value"], + "earliest_source_units": earliest_result[ + "source_parameter_units" + ], + "earliest_source_name": earliest_result[ + "source_parameter_name" + ], + "latest_datetime": latest_result["datetime"], + "latest_value": latest_result["value"], + "latest_source_units": latest_result[ "source_parameter_units" ], - "most_recent_source_name": most_recent_result[ + "latest_source_name": latest_result[ "source_parameter_name" ], } @@ -850,23 +902,26 @@ def _clean_records(self, records: list) -> list: source. Otherwise returns the records as is. """ return records - - def _extract_most_recent(self, records: list) -> dict: + + def _extract_terminal_record(self, records, bookend): """ - Returns the most recent record for a particular site + Returns the terminal record for a particular site Parameters ---------- records : list a list of records + bookend : str + determines if the first or last record is retrieved + Returns ------- dict - the most recent record + the most recent record for every site """ raise NotImplementedError( - f"{self.__class__.__name__} Must implement _extract_most_recent" + f"{self.__class__.__name__} Must implement _extract_terminal_record" ) def _extract_source_parameter_units(self, records: list) -> list: diff --git a/backend/transformer.py b/backend/transformer.py index 2ce727c..3a78556 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -30,6 +30,8 @@ MICROGRAMS_PER_LITER, DT_MEASURED, DTW, + EARLIEST, + LATEST ) from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logging import Loggable @@ -411,7 +413,7 @@ def do_transform( record["date_measured"] = d record["time_measured"] = t else: - mrd = record.get("most_recent_datetime") + mrd = record.get("latest_datetime") if mrd: d, t = standardize_datetime(mrd, record["id"]) record["date_measured"] = d @@ -663,7 +665,8 @@ def _transform(self, record, site_record): rec = {} if self.config.output_summary: - self._transform_most_recents(record, site_record.id) + self._transform_earliest_record(record, site_record.id) + self._transform_latest_record(record, site_record.id) parameter, units = self._get_parameter_name_and_units() rec.update( @@ -695,25 +698,61 @@ def _transform(self, record, site_record): rec.update(source_id) return rec - def _transform_most_recents(self, record, site_id): - # convert most_recents - dt, tt = standardize_datetime(record["most_recent_datetime"], site_id) - record["most_recent_date"] = dt - record["most_recent_time"] = tt - parameter_name, unit = self._get_parameter_name_and_units() + def _transform_terminal_record(self, record, site_id, bookend): + """ + Convert either the earliest or latest record to the standard format. + + Parameters + -------- + record: dict + The record to convert + + site_id: str + The site ID for the record - converted_most_recent_value, conversion_factor, warning_msg = convert_units( - record["most_recent_value"], - record["most_recent_source_units"], + bookend: str + The bookend of the record to convert. Either "earliest" or "latest" + """ + if bookend == EARLIEST: + datetime_key = "earliest_datetime" + date_key = "earliest_date" + time_key = "earliest_time" + value_key = "earliest_value" + unit_key = "earliest_units" + source_units_key = "earliest_source_units" + source_name_key = "earliest_source_name" + elif bookend == LATEST: + datetime_key = "latest_datetime" + date_key = "latest_date" + time_key = "latest_time" + value_key = "latest_value" + unit_key = "latest_units" + source_units_key = "latest_source_units" + source_name_key = "latest_source_name" + + dt, tt = standardize_datetime(record[datetime_key], site_id) + parameter_name, unit = self._get_parameter_name_and_units() + converted_value, conversion_factor, warning_msg = convert_units( + record[value_key], + record[source_units_key], unit, - record["most_recent_source_name"], + record[source_name_key], parameter_name, dt, ) # all failed conversions are skipped and handled in source.read(), so no need to duplicate here - record["most_recent_value"] = converted_most_recent_value - record["most_recent_units"] = unit + record[date_key] = dt + record[time_key] = tt + record[value_key] = converted_value + record[unit_key] = unit + + + def _transform_earliest_record(self, record, site_id): + self._transform_terminal_record(record, site_id, EARLIEST) + + def _transform_latest_record(self, record, site_id): + self._transform_terminal_record(record, site_id, LATEST) class WaterLevelTransformer(ParameterTransformer): From f86885cb8c9ef652ae5d51f2eb88452b15ad9bdd Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 14:42:31 -0600 Subject: [PATCH 013/143] bump version to 0.8.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1b99d57..ab8fd6d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.7.1", + version="0.8.0", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 05cae071846c7ee6a4d3da343b39e1307c3ae9ac Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 14:47:57 -0600 Subject: [PATCH 014/143] Remove old log/warn --- backend/source.py | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/backend/source.py b/backend/source.py index f66defb..fddd157 100644 --- a/backend/source.py +++ b/backend/source.py @@ -205,43 +205,6 @@ def discover(self, *args, **kw): # Methods Already Implemented # ========================================================================== - # def warn(self, msg): - # """ - # Prints warning messages to the console in red - # - # Parameters - # ---------- - # msg : str - # the message to print - # - # Returns - # ------- - # None - # """ - # s = self.log(msg, fg="red") - # self.config.warnings.append(s) - - # def log(self, msg, fg="yellow"): - # """ - # Prints the message to the console in yellow - # - # Parameters - # ---------- - # msg : str - # the message to print - # - # fg : str - # the color of the message, defaults to yellow - # - # Returns - # ------- - # None - # """ - # s = f"{self.__class__.__name__:25s} -- {msg}" - # click.secho(s, fg=fg) - # self.config.logs.append(s) - # return s - def _execute_text_request(self, url: str, params=None, **kw) -> str: """ Executes a get request to the provided url and returns the text response. From c7b1e3d27daf8e3dfb33359838d5732b2f3b0a18 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 14:55:03 -0600 Subject: [PATCH 015/143] sites only cli --- frontend/cli.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 89b16f1..7201bd4 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -136,6 +136,12 @@ def cli(): default=False, help="Dry run. Do not execute unifier. Used by unit tests", ), + click.option( + "--yes", + is_flag=True, + default=False, + help="Do not ask for confirmation before running", + ), ] DT_OPTIONS = [ @@ -337,7 +343,8 @@ def wells(bbox, county, no_pvacd, no_wqp, site_limit, - dry,): + dry, + yes): """ Get locations """ @@ -352,9 +359,10 @@ def wells(bbox, county, config.sites_only = True config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return + if not yes: + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return unify_sites(config) From a9d671f77ebb839dff876b71f18229f2e44ac2fa Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:01:02 -0600 Subject: [PATCH 016/143] sites only cli --- backend/config.py | 1 + backend/unifier.py | 5 ++--- frontend/cli.py | 16 +++++++++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/backend/config.py b/backend/config.py index 89c4f59..0396a73 100644 --- a/backend/config.py +++ b/backend/config.py @@ -189,6 +189,7 @@ def __init__(self, model=None, payload=None): setattr(self, f"use_source_{s}", s in payload.get("sources", [])) def finalize(self): + self._update_output_units() self.update_output_name() self.make_output_path() diff --git a/backend/unifier.py b/backend/unifier.py index 0646ace..3e46008 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -44,7 +44,7 @@ def unify_analytes(config): print("Unifying analytes\n") # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() + if not config.dry: _unify_parameter(config, config.analyte_sources()) @@ -56,7 +56,7 @@ def unify_waterlevels(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() + if not config.dry: _unify_parameter(config, config.water_level_sources()) @@ -67,7 +67,6 @@ def unify_sites(config): # config.report() -- report is done in cli.py, no need to do it twice config.validate() - config.finalize() if not config.dry: _unify_parameter(config, config.all_site_sources()) diff --git a/frontend/cli.py b/frontend/cli.py index 7201bd4..8e1ebe1 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -239,9 +239,6 @@ def weave( # # make output_path now so that die.log can be written to it live # config.make_output_path() - # setup logging here so that the path can be set to config.output_path - setup_logging(path=config.output_path) - # output type if output == "summary": summary = True @@ -312,14 +309,16 @@ def weave( config.start_date = start_date config.end_date = end_date + config.finalize() + # setup logging here so that the path can be set to config.output_path + setup_logging(path=config.output_path) + if not dry: config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): return - config._update_output_units() - if parameter.lower() == "waterlevels": unify_waterlevels(config) else: @@ -328,9 +327,11 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) +@add_options(OUTPUT_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, + output, no_bernco, no_bor, no_cabq, @@ -358,6 +359,11 @@ def wells(bbox, county, setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) config.sites_only = True + + config.finalize() + # setup logging here so that the path can be set to config.output_path + setup_logging(path=config.output_path) + config.report() if not yes: # prompt user to continue From 49e4f6845691e22a5e80265cbfd5819bc9ab44e0 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:04:46 -0600 Subject: [PATCH 017/143] sites only cli --- frontend/cli.py | 61 +++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 8e1ebe1..580d153 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -23,6 +23,7 @@ from backend.logging import setup_logging + # setup_logging() @@ -180,7 +181,12 @@ def cli(): type=click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", - ) + ), + click.option( + "--output-dir", + default=".", + help="Output root directory. Default is current directory", + ), ] @@ -205,25 +211,26 @@ def _add_options(func): @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def weave( - weave, - output, - start_date, - end_date, - bbox, - county, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, + weave, + output, + output_dir, + start_date, + end_date, + bbox, + county, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, ): """ Get parameter timeseries or summary data @@ -264,20 +271,20 @@ def weave( config_agencies = [] # sources if parameter == "waterlevels": - config_agencies =["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + config_agencies = ["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] false_agencies = ['bor', 'nmed_dwb'] elif parameter == "carbonate": config_agencies = ['nmbgmr_amp', 'wqp'] false_agencies = ['bor', 'bernco', 'cabq', 'ebid', 'nmed_dwb', - 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] + 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in ["arsenic", "uranium"]: config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'wqp'] false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_isc_seven_rivers', - 'nmose_roswell', 'nwis', 'pvacd'] + 'nmose_roswell', 'nwis', 'pvacd'] elif parameter in [ @@ -294,7 +301,7 @@ def weave( "sulfate", "tds", ]: - config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb','nmose_isc_seven_rivers', 'wqp'] + config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'nmose_isc_seven_rivers', 'wqp'] false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_roswell', 'nwis', 'pvacd'] if false_agencies: @@ -332,6 +339,7 @@ def weave( @add_options(DEBUG_OPTIONS) def wells(bbox, county, output, + output_dir, no_bernco, no_bor, no_cabq, @@ -359,7 +367,7 @@ def wells(bbox, county, setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) config.sites_only = True - + config.output_dir = output_dir config.finalize() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) @@ -414,5 +422,4 @@ def setup_config(tag, bbox, county, site_limit, dry): return config - # ============= EOF ============================================= From 60eadafee1b8b39c54060b60c6e6dda9896500b4 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:06:52 -0600 Subject: [PATCH 018/143] sites only cli --- frontend/cli.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 580d153..3f04aab 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -182,11 +182,14 @@ def cli(): required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", ), - click.option( + +] +PERSISTER_OPTIONS = [ + click.option(click.option( "--output-dir", default=".", help="Output root directory. Default is current directory", - ), + )) ] @@ -206,6 +209,7 @@ def _add_options(func): required=True, ) @add_options(OUTPUT_OPTIONS) +@add_options(PERSISTER_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @@ -335,6 +339,7 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) @add_options(OUTPUT_OPTIONS) +@add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, From 5ddf81441d0b56187e3d9d90db65a3743342fd89 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:08:01 -0600 Subject: [PATCH 019/143] sites only cli --- frontend/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 3f04aab..73c1a61 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -338,12 +338,10 @@ def weave( @cli.command() @add_options(SPATIAL_OPTIONS) -@add_options(OUTPUT_OPTIONS) @add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def wells(bbox, county, - output, output_dir, no_bernco, no_bor, From 9ad78f85add651621e3cc169b59a3d80c7974797 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:08:40 -0600 Subject: [PATCH 020/143] sites only cli --- frontend/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 73c1a61..8bb9811 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -185,11 +185,11 @@ def cli(): ] PERSISTER_OPTIONS = [ - click.option(click.option( + click.option( "--output-dir", default=".", help="Output root directory. Default is current directory", - )) + ) ] From e1754477d711770b113f55f56a3b1cfd40fd20d7 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:10:24 -0600 Subject: [PATCH 021/143] sites only cli --- backend/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/config.py b/backend/config.py index 0396a73..03595fb 100644 --- a/backend/config.py +++ b/backend/config.py @@ -192,6 +192,7 @@ def finalize(self): self._update_output_units() self.update_output_name() self.make_output_path() + self.make_output_directory() def all_site_sources(self): sources = self.water_level_sources() @@ -395,6 +396,12 @@ def _validate_county(self): return bool(get_county_polygon(self.county)) return True + def make_output_directory(self): + """ + Create the output directory if it doesn't exist. + """ + if not os.path.exists(self.output_dir): + os.mkdir(self.output_dir) def update_output_name(self): """ From deae8d327003bd1f5d23467e7fff673ebad69ac7 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:11:01 -0600 Subject: [PATCH 022/143] sites only cli --- backend/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 03595fb..b9dea0c 100644 --- a/backend/config.py +++ b/backend/config.py @@ -190,9 +190,9 @@ def __init__(self, model=None, payload=None): def finalize(self): self._update_output_units() + self.make_output_directory() self.update_output_name() self.make_output_path() - self.make_output_directory() def all_site_sources(self): sources = self.water_level_sources() From 12e6d076655bfc685eff8b466300d7fb377f9a5f Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 15:22:40 -0600 Subject: [PATCH 023/143] sites only cli --- backend/config.py | 59 ++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/backend/config.py b/backend/config.py index b9dea0c..4639987 100644 --- a/backend/config.py +++ b/backend/config.py @@ -61,46 +61,31 @@ from .connectors.usgs.source import NWISSiteSource, NWISWaterLevelSource from .connectors.wqp.source import WQPSiteSource, WQPAnalyteSource, WQPWaterLevelSource -SOURCE_KEYS = ( - "bernco", - "bor", - "cabq", - "ebid", - "nmbgmr_amp", - "nmed_dwb", - "nmose_isc_seven_rivers", - "nmose_roswell", - "nwis", - "pvacd", - "wqp", -) +SOURCE_DICT = { + "bernco": BernCoSiteSource, + "bor": BORSiteSource, + "cabq": CABQSiteSource, + "ebid": EBIDSiteSource, + "nmbgmr_amp": NMBGMRSiteSource, + "nmed_dwb": DWBSiteSource, + "nmose_isc_seven_rivers": ISCSevenRiversSiteSource, + "nmose_roswell": NMOSERoswellSiteSource, + "nwis": NWISSiteSource, + "pvacd": PVACDSiteSource, + "wqp": WQPSiteSource, +} + +SOURCE_KEYS = list(SOURCE_DICT.keys()) def get_source(source): - if source == "bernco": - return BernCoSiteSource() - elif source == "bor": - return BORSiteSource() - elif source == "cabq": - return CABQSiteSource() - elif source == "ebid": - return EBIDSiteSource() - elif source == "nmbgmr_amp": - return NMBGMRSiteSource() - elif source == "nmed_dwb": - return DWBSiteSource() - elif source == "nmose_isc_seven_rivers": - return ISCSevenRiversSiteSource() - elif source == "nmose_roswell": - return NMOSERoswellSiteSource() - elif source == "nwis": - return NWISSiteSource() - elif source == "pvacd": - return PVACDSiteSource() - elif source == "wqp": - return WQPSiteSource() - - return None + try: + klass = SOURCE_DICT[source] + except KeyError: + raise ValueError(f"Unknown source {source}") + + if klass: + return klass() class Config(Loggable): From c29e12bd61a4c3c587594f6d2511afd856e39689 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 15:31:29 -0600 Subject: [PATCH 024/143] change logging.py to logger.py logging.py is a common name for a module in Python, and renaming it to logger.py can help avoid errors with the built-in logging module. --- backend/bounding_polygons.py | 4 +--- backend/config.py | 2 +- backend/{logging.py => logger.py} | 0 backend/source.py | 2 +- backend/transformer.py | 2 +- frontend/cli.py | 2 +- 6 files changed, 5 insertions(+), 7 deletions(-) rename backend/{logging.py => logger.py} (100%) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index d9cd100..e0426c9 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -232,7 +232,5 @@ def _get_cached_object(name, msg, url): if __name__ == "__main__": - # w = get_huc_polygon('0101000201') - # print(w) - print(get_state_hucs_boundaries(state="CO", level=4)) + print(get_state_polygon("NM")) # ============= EOF ============================================= diff --git a/backend/config.py b/backend/config.py index ec9be97..3ccd570 100644 --- a/backend/config.py +++ b/backend/config.py @@ -20,7 +20,7 @@ import shapely.wkt -from backend.logging import Loggable +from backend.logger import Loggable from .bounding_polygons import get_county_polygon from .connectors.nmbgmr.source import ( diff --git a/backend/logging.py b/backend/logger.py similarity index 100% rename from backend/logging.py rename to backend/logger.py diff --git a/backend/source.py b/backend/source.py index fddd157..1faa697 100644 --- a/backend/source.py +++ b/backend/source.py @@ -35,7 +35,7 @@ EARLIEST, LATEST, ) -from backend.logging import Loggable +from backend.logger import Loggable from backend.persister import BasePersister, CSVPersister from backend.record import ( AnalyteRecord, diff --git a/backend/transformer.py b/backend/transformer.py index 3a78556..cb774d0 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -34,7 +34,7 @@ LATEST ) from backend.geo_utils import datum_transform, ALLOWED_DATUMS -from backend.logging import Loggable +from backend.logger import Loggable from backend.record import ( WaterLevelSummaryRecord, WaterLevelRecord, diff --git a/frontend/cli.py b/frontend/cli.py index e03ac0b..2cbab6f 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -21,7 +21,7 @@ from backend.constants import PARAMETER_OPTIONS from backend.unifier import unify_sites, unify_waterlevels, unify_analytes -from backend.logging import setup_logging +from backend.logger import setup_logging # setup_logging() From 79b651291a70e1e0a13f2e68407d38cfb51e6205 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 16:39:12 -0600 Subject: [PATCH 025/143] Remove sites outside of NM --- backend/bounding_polygons.py | 8 +++++--- backend/transformer.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index e0426c9..1bf713a 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -15,6 +15,7 @@ # =============================================================================== import json import os +from pprint import pprint import click import httpx @@ -167,9 +168,7 @@ def get_state_polygon(state): f"{state} state", f"https://reference.geoconnex.us/collections/states/items/{statefp}?&f=json", ) - - return shape(obj["geometry"]) - + return shape(obj["features"][0]["geometry"]) # private helpers ============================ def _make_shape(obj, as_wkt): @@ -231,6 +230,9 @@ def _get_cached_object(name, msg, url): return obj +NM_BOUNDARY = get_state_polygon("NM") + + if __name__ == "__main__": print(get_state_polygon("NM")) # ============= EOF ============================================= diff --git a/backend/transformer.py b/backend/transformer.py index cb774d0..c76ebf0 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -20,6 +20,7 @@ import shapely from shapely import Point +from backend.bounding_polygons import NM_BOUNDARY from backend.constants import ( MILLIGRAMS_PER_LITER, PARTS_PER_MILLION, @@ -458,10 +459,19 @@ def do_transform( input_horizontal_datum, output_horizontal_datum, ) + + if not self.in_nm(lng, lat): + self.warn( + f"Skipping site {record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not in New Mexico" + ) + return None + record.update(latitude=lat) record.update(longitude=lng) record.update(horizontal_datum=datum) + + elevation, elevation_unit = transform_length_units( record.elevation, record.elevation_units, @@ -522,6 +532,29 @@ def do_transform( return record + def in_nm(self, lng: float | int | str, lat: float | int | str) -> bool: + """ + Returns True if the point is in New Mexico, otherwise returns False + + Parameters + -------- + lng: float | int | str + The longitude of the point + + lat: float | int | str + The latitude of the point + + Returns + -------- + bool + True if the point is in New Mexico, otherwise False + """ + point = Point(lng, lat) + if NM_BOUNDARY.contains(point): + return True + else: + return False + def contained( self, lng: float | int | str, From f6e6f757191d8354dc302e532d68e9942d7e4f39 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 16:39:44 -0600 Subject: [PATCH 026/143] Add nmbgmr well data back after testing --- backend/connectors/nmbgmr/source.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 5940926..ec227a3 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -87,18 +87,18 @@ def get_records(self): _make_url("locations"), params, tag="features", timeout=30 ) for site in sites: - # print(f"Obtaining well data for {site['properties']['point_id']}") - # well_data = self._execute_json_request( - # _make_url("wells"), - # params={"pointid": site["properties"]["point_id"]}, - # tag="", - # ) - # site["properties"]["formation"] = well_data["formation"] - # site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - # site["properties"]["well_depth_units"] = FEET - site["properties"]["formation"] = None - site["properties"]["well_depth"] = None + print(f"Obtaining well data for {site['properties']['point_id']}") + well_data = self._execute_json_request( + _make_url("wells"), + params={"pointid": site["properties"]["point_id"]}, + tag="", + ) + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] site["properties"]["well_depth_units"] = FEET + # site["properties"]["formation"] = None + # site["properties"]["well_depth"] = None + # site["properties"]["well_depth_units"] = FEET return sites From 1bb59b5ec0538936390bf88509dd19afac2b276e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 31 Mar 2025 17:03:34 -0600 Subject: [PATCH 027/143] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6734cb9..27220f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - renamed the column `location` to `name` in the summary table to match the format of the `sites` table when timeseries data are exported - renamed the columns `most_recent_date`, `most_recent_time`, `most_recent_value`, and `most_recent_units` to `latest_date`, `latest_time`, `latest_value`, and `latest_units` respectively for succinctness and juxtaposition with the newly added `earliest` columns. - This naming schema also enables the development of datetime filters as the descriptor will apply to the latest datetime within the provided time frame filter, whereas most recent indicates np filters. +- removed sites that are not in New Mexico ## 0.7.0 From 358aebbc81f4d039c9e3937c5a6617756b7a4ca9 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 31 Mar 2025 17:09:06 -0600 Subject: [PATCH 028/143] added wqp to config_agency when parameter = 'waterlevels' --- frontend/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/cli.py b/frontend/cli.py index 8bb9811..cf038ac 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -276,7 +276,7 @@ def weave( # sources if parameter == "waterlevels": config_agencies = ["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd"] + "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp"] false_agencies = ['bor', 'nmed_dwb'] From 3b6434815fe84f2789c734084fa04ea890f4b3e8 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 1 Apr 2025 08:19:59 -0600 Subject: [PATCH 029/143] Sort sources alphabetically --- backend/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 6b12e8b..7ae6cdc 100644 --- a/backend/config.py +++ b/backend/config.py @@ -76,7 +76,7 @@ "wqp": WQPSiteSource, } -SOURCE_KEYS = list(SOURCE_DICT.keys()) +SOURCE_KEYS = sorted(list(SOURCE_DICT.keys())) def get_source(source): try: From cf5a3bbe815c3a125c882b29413fe1948c48ad2d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 1 Apr 2025 09:30:32 -0600 Subject: [PATCH 030/143] Report sources for a given parameter --- backend/config.py | 86 +++++++++++++++++++++++++++++++++++++++++++++- backend/source.py | 3 -- backend/unifier.py | 3 +- frontend/cli.py | 61 +++++++------------------------- 4 files changed, 99 insertions(+), 54 deletions(-) diff --git a/backend/config.py b/backend/config.py index 7ae6cdc..3f6a45d 100644 --- a/backend/config.py +++ b/backend/config.py @@ -39,7 +39,27 @@ OSERoswellWaterLevelSource, ) from .connectors.nmenv.source import DWBSiteSource, DWBAnalyteSource -from .constants import MILLIGRAMS_PER_LITER, WGS84, FEET +from .constants import ( + MILLIGRAMS_PER_LITER, + WGS84, + FEET, + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, + ) from .connectors.isc_seven_rivers.source import ( ISCSevenRiversSiteSource, ISCSevenRiversWaterLevelSource, @@ -173,6 +193,70 @@ def __init__(self, model=None, payload=None): for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in payload.get("sources", [])) + def get_config_and_false_agencies(self): + if self.parameter == WATERLEVELS: + config_agencies = [ + "bernco", + "cabq", + "ebid", + "nmbgmr_amp", + "nmed_dwb", + "nmose_isc_seven_rivers", + "nmose_roswell", + "nwis", + "pvacd", + "wqp" + ] + false_agencies = ["bor", "nmed_dwb"] + elif self.parameter == CARBONATE: + config_agencies = ["nmbgmr_amp", "wqp"] + false_agencies = [ + "bor", + "bernco", + "cabq", + "ebid", + "nmed_dwb", + "nmose_isc_seven_rivers", + "nmose_roswell", + "nwis", + "pvacd", + ] + elif self.parameter in [ARSENIC, URANIUM]: + config_agencies = ["bor", "nmbgmr_amp", "nmed_dwb", "wqp"] + false_agencies = [ + "bernco", + "cabq", + "ebid", + "nmose_isc_seven_rivers", + "nmose_roswell", + "nwis", + "pvacd", + ] + elif self.parameter in [ + BICARBONATE, + CALCIUM, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + ]: + config_agencies = ["bor", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "wqp"] + false_agencies = [ + "bernco", + "cabq", + "ebid", + "nmose_roswell", + "nwis", + "pvacd", + ] + return config_agencies, false_agencies + def finalize(self): self._update_output_units() self.make_output_directory() diff --git a/backend/source.py b/backend/source.py index 1faa697..a00f8eb 100644 --- a/backend/source.py +++ b/backend/source.py @@ -700,9 +700,6 @@ def read( else: msg = f"{warning_msg} for {site.id}" self.warn(msg) - skipped_items.append( - (site.id, source_result, source_unit) - ) except TypeError: skipped_items.append((site.id, source_result, source_unit)) except ValueError: diff --git a/backend/unifier.py b/backend/unifier.py index 3e46008..1f72032 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -16,6 +16,7 @@ import shapely from backend.config import Config, get_source +from backend.constants import WATERLEVELS from backend.logging import setup_logging from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister from backend.source import BaseSiteSource @@ -250,7 +251,7 @@ def get_sources(config=None): config = Config() sources = [] - if config.parameter.lower() == "waterlevels": + if config.parameter == WATERLEVELS: allsources = config.water_level_sources() else: allsources = config.analyte_sources() diff --git a/frontend/cli.py b/frontend/cli.py index 0fce31b..29f8dbf 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -235,6 +235,7 @@ def weave( no_wqp, site_limit, dry, + yes, ): """ Get parameter timeseries or summary data @@ -244,12 +245,6 @@ def weave( config = setup_config(f"{parameter}", bbox, county, site_limit, dry) config.parameter = parameter - # # make sure config.output_name is properly set - # config.update_output_name() - # - # # make output_path now so that die.log can be written to it live - # config.make_output_path() - # output type if output == "summary": summary = True @@ -271,51 +266,14 @@ def weave( config.output_timeseries_unified = timeseries_unified config.output_timeseries_separated = timeseries_separated - false_agencies = [] - config_agencies = [] - # sources - if parameter == "waterlevels": - config_agencies = ["bernco", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp"] - - false_agencies = ['bor', 'nmed_dwb'] - - elif parameter == "carbonate": - config_agencies = ['nmbgmr_amp', 'wqp'] - false_agencies = ['bor', 'bernco', 'cabq', 'ebid', 'nmed_dwb', - 'nmose_isc_seven_rivers', 'nmose_roswell', 'nwis', 'pvacd'] - - elif parameter in ["arsenic", "uranium"]: - config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'wqp'] - false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_isc_seven_rivers', - 'nmose_roswell', 'nwis', 'pvacd'] - - - elif parameter in [ - "bicarbonate", - "calcium", - "chloride", - "fluoride", - "magnesium", - "nitrate", - "ph", - "potassium", - "silica", - "sodium", - "sulfate", - "tds", - ]: - config_agencies = ['bor', 'nmbgmr_amp', 'nmed_dwb', 'nmose_isc_seven_rivers', 'wqp'] - false_agencies = ['bernco', 'cabq', 'ebid', 'nmose_roswell', 'nwis', 'pvacd'] - - if false_agencies: - for agency in false_agencies: - setattr(config, f"use_source_{agency}", False) + config_agencies, false_agencies = config.get_config_and_false_agencies() + + for agency in false_agencies: + setattr(config, f"use_source_{agency}", False) lcs = locals() - if config_agencies: - for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + for agency in config_agencies: + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) # dates config.start_date = start_date config.end_date = end_date @@ -405,6 +363,11 @@ def sources(sources, bbox, county): parameter = sources config.parameter = parameter + config_agencies, false_agencies = config.get_config_and_false_agencies() + + for agency in false_agencies: + setattr(config, f"use_source_{agency}", False) + sources = get_sources(config) for s in sources: click.echo(s) From d68e273d45413510385afe91353d06bf32804b02 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 1 Apr 2025 09:55:53 -0600 Subject: [PATCH 031/143] Remove sites further than 25km from NM border --- CHANGELOG.md | 2 ++ backend/bounding_polygons.py | 15 ++++++++++++--- backend/geo_utils.py | 14 ++++++++++++++ backend/transformer.py | 6 +++--- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27220f4..10a501e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), ### Added - water level for WQP - `earliest_date`, `earliest_time`, `earliest_value`, and `earliest_units` to the summary table +- `die wells` to get all wells for which the DIE reports observations +- `die source {parameter}` to list sources that report a particular parameter ### Changed - NM OSE Roswell data is now pulled from ST2 and not CKAN diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index d5dbc57..ddecb61 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -22,6 +22,8 @@ from shapely import Polygon, box from shapely.geometry import shape +from backend.geo_utils import transform_srid, SRID_WGS84, SRID_UTM_ZONE_13N + # polygon retrivial functions # multiple polygons @@ -160,7 +162,7 @@ def get_county_polygon(name, as_wkt=True): _warning(f"Invalid state. {state}") -def get_state_polygon(state): +def get_state_polygon(state, buffer): statefp = _statelookup(state) if statefp: obj = _get_cached_object( @@ -168,7 +170,14 @@ def get_state_polygon(state): f"{state} state", f"https://reference.geoconnex.us/collections/states/items/{statefp}?&f=json", ) - return shape(obj["features"][0]["geometry"]) + geom_gcs = shape(obj["features"][0]["geometry"]) + + if buffer: + geom_utm = transform_srid(geom_gcs, SRID_WGS84, SRID_UTM_ZONE_13N) + geom_utm = geom_utm.buffer(buffer) + geom_gcs = transform_srid(geom_utm, SRID_UTM_ZONE_13N, SRID_WGS84) + + return geom_gcs # private helpers ============================ def _make_shape(obj, as_wkt): @@ -231,7 +240,7 @@ def _get_cached_object(name, msg, url): return obj -NM_BOUNDARY = get_state_polygon("NM") +NM_BOUNDARY_BUFFERED = get_state_polygon("NM", 25000) if __name__ == "__main__": diff --git a/backend/geo_utils.py b/backend/geo_utils.py index 930f76d..8e8b2d8 100644 --- a/backend/geo_utils.py +++ b/backend/geo_utils.py @@ -14,12 +14,26 @@ # limitations under the License. # =============================================================================== import pyproj +from shapely.ops import transform PROJECTIONS = {} TRANSFORMS = {} ALLOWED_DATUMS = ["NAD27", "NAD83", "WGS84"] +# srids for NM +SRID_WGS84 = 4326 +SRID_UTM_ZONE_13N = 26913 + +def transform_srid(geometry, source_srid, target_srid): + """ + geometry must be a shapely geometry object, like Point, Polygon, or MultiPolygon + """ + source_crs = pyproj.CRS(f"EPSG:{source_srid}") + target_crs = pyproj.CRS(f"EPSG:{target_srid}") + transformer = pyproj.Transformer.from_crs(source_crs, target_crs, always_xy=True) + return transform(transformer.transform, geometry) + def datum_transform(x, y, in_datum, out_datum): """ diff --git a/backend/transformer.py b/backend/transformer.py index c76ebf0..a1d8064 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -20,7 +20,7 @@ import shapely from shapely import Point -from backend.bounding_polygons import NM_BOUNDARY +from backend.bounding_polygons import NM_BOUNDARY_BUFFERED from backend.constants import ( MILLIGRAMS_PER_LITER, PARTS_PER_MILLION, @@ -462,7 +462,7 @@ def do_transform( if not self.in_nm(lng, lat): self.warn( - f"Skipping site {record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not in New Mexico" + f"Skipping site {record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not within 25km of New Mexico" ) return None @@ -550,7 +550,7 @@ def in_nm(self, lng: float | int | str, lat: float | int | str) -> bool: True if the point is in New Mexico, otherwise False """ point = Point(lng, lat) - if NM_BOUNDARY.contains(point): + if NM_BOUNDARY_BUFFERED.contains(point): return True else: return False From 23976e7b123a14055c4e8742a4b863113d82e491 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 1 Apr 2025 10:00:12 -0600 Subject: [PATCH 032/143] Removed NMED DWB from waterlevels sources --- backend/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 3f6a45d..8fe55ee 100644 --- a/backend/config.py +++ b/backend/config.py @@ -200,7 +200,6 @@ def get_config_and_false_agencies(self): "cabq", "ebid", "nmbgmr_amp", - "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", From a04dea3d9ba16fb238720c955eeb96e023043477 Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 10:08:47 -0600 Subject: [PATCH 033/143] fixed all_site_sources --- backend/config.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/config.py b/backend/config.py index 4639987..26afe88 100644 --- a/backend/config.py +++ b/backend/config.py @@ -180,8 +180,11 @@ def finalize(self): self.make_output_path() def all_site_sources(self): - sources = self.water_level_sources() - sources.extend(self.analyte_sources()) + sources =[] + for s in SOURCE_KEYS: + if getattr(self, f"use_source_{s}"): + sources.append((get_source(s), None)) + return sources def analyte_sources(self): From 11e6a77d2930420bef6c69fb5cc724b20e1d8c63 Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 10:11:23 -0600 Subject: [PATCH 034/143] fixed all_site_sources --- backend/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 26afe88..b7fb2e8 100644 --- a/backend/config.py +++ b/backend/config.py @@ -183,7 +183,9 @@ def all_site_sources(self): sources =[] for s in SOURCE_KEYS: if getattr(self, f"use_source_{s}"): - sources.append((get_source(s), None)) + source = get_source(s) + source.set_config(self) + sources.append((source, None)) return sources From 2cbf406c311db93ca1253e1286a4cca0e319eeed Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 11:51:42 -0600 Subject: [PATCH 035/143] added pods --- backend/config.py | 6 +++ backend/connectors/nmose/source.py | 57 +++++++++++++++++++++++++ backend/connectors/nmose/transformer.py | 35 +++++++++++++++ backend/transformer.py | 1 + backend/unifier.py | 3 +- 5 files changed, 101 insertions(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index b7fb2e8..45885a1 100644 --- a/backend/config.py +++ b/backend/config.py @@ -39,6 +39,7 @@ OSERoswellWaterLevelSource, ) from .connectors.nmenv.source import DWBSiteSource, DWBAnalyteSource +from .connectors.nmose.source import NMOSEPODSiteSource from .constants import MILLIGRAMS_PER_LITER, WGS84, FEET from .connectors.isc_seven_rivers.source import ( ISCSevenRiversSiteSource, @@ -74,6 +75,7 @@ "nwis": NWISSiteSource, "pvacd": PVACDSiteSource, "wqp": WQPSiteSource, + "nmose_pod": NMOSEPODSiteSource, } SOURCE_KEYS = list(SOURCE_DICT.keys()) @@ -115,6 +117,7 @@ class Config(Loggable): use_source_nwis: bool = True use_source_pvacd: bool = True use_source_wqp: bool = True + use_source_nmose_pod: bool = False # parameter parameter: str = "" @@ -187,6 +190,9 @@ def all_site_sources(self): source.set_config(self) sources.append((source, None)) + # pods = NMOSEPODSiteSource() + # pods.set_config(self) + # sources.append((pods, None)) return sources def analyte_sources(self): diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 5cb7a3e..9b848b0 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,2 +1,59 @@ import os + +from shapely import wkt +from shapely.geometry.polygon import Polygon + +from backend.connectors import NM_STATE_BOUNDING_POLYGON +from backend.connectors.nmose.transformer import NMOSEPODSiteTransformer from backend.source import BaseSiteSource + + +def wkt_to_arcgis_json(polygon_wkt): + obj = wkt.loads(polygon_wkt) + coords = [[coord[0], coord[1]] for coord in obj.exterior.coords] + return { + 'rings': [coords], + 'spatialReference': { + 'wkid': 4326 + } + } + +class NMOSEPODSiteSource(BaseSiteSource): + """ + NMOSEPODSiteSource is a class that inherits from BaseSiteSource. + It is used to fetch site data from the NMOSEPOD API. + """ + + transformer_klass = NMOSEPODSiteTransformer + chunk_size = 1000 + bounding_polygon = NM_STATE_BOUNDING_POLYGON + + def get_records(self, *args, **kw) -> dict: + config = self.config + params = {} + # if config.has_bounds(): + # bbox = config.bbox_bounding_points() + # params["bBox"] = ",".join([str(b) for b in bbox]) + # else: + # params["stateCd"] = "NM" + # + # if config.start_date: + # params["startDt"] = config.start_dt.date().isoformat() + # if config.end_date: + # params["endDt"] = config.end_dt.date().isoformat() + + params['where'] = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" + params["outFields"] = "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" + params["outSR"] = 4326 + params["f"] = "json" + if config.has_bounds(): + wkt = config.bounding_wkt() + else: + wkt = NM_STATE_BOUNDING_POLYGON + + params["geometry"] = wkt_to_arcgis_json(wkt) + params["geometryType"] = "esriGeometryPolygon" + url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" + obj = self._execute_json_request(url, params, tag='features') + + return obj \ No newline at end of file diff --git a/backend/connectors/nmose/transformer.py b/backend/connectors/nmose/transformer.py index e69de29..2ed99e6 100644 --- a/backend/connectors/nmose/transformer.py +++ b/backend/connectors/nmose/transformer.py @@ -0,0 +1,35 @@ +from backend.transformer import BaseTransformer, SiteTransformer + + +class NMOSEPODSiteTransformer(SiteTransformer): + def _transform(self, record) -> dict: + """ + Transform the record into a dictionary format. + + Args: + record (dict): The record to transform. + + Returns: + dict: The transformed record. + """ + + properties = record['attributes'] + geometry = record['geometry'] + + # print(properties.keys()) + # print(geometry.keys()) + rec = { + "source": "NMOSEPOD", + "id": properties["pod_file"], + # "name": record["station_nm"], + "latitude": geometry["y"], + "longitude": geometry["x"], + # "elevation": elevation, + # "elevation_units": "ft", + # "horizontal_datum": datum, + # "vertical_datum": record["alt_datum_cd"], + # "aquifer": record["nat_aqfr_cd"], + # "well_depth": record["well_depth_va"], + # "well_depth_units": "ft", + } + return rec \ No newline at end of file diff --git a/backend/transformer.py b/backend/transformer.py index 232bd16..3356320 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -394,6 +394,7 @@ def do_transform( if not record: return + print(record) # ensure that a site or summary record is contained within the boundaing polygon if "longitude" in record and "latitude" in record: if not self.contained(record["longitude"], record["latitude"]): diff --git a/backend/unifier.py b/backend/unifier.py index 3e46008..41fddac 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -335,8 +335,9 @@ def site_unification_test(): cfg.use_source_nwis = False cfg.use_source_pvacd = False cfg.use_source_wqp = False + cfg.use_source_nmose_pod = True - cfg.use_source_nmed_dwb = True + cfg.use_source_nmed_dwb = False From d9bdd29c38c9e8210d62b8063920664c5f2dbcce Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 11:53:01 -0600 Subject: [PATCH 036/143] added pods --- backend/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 45885a1..306a82c 100644 --- a/backend/config.py +++ b/backend/config.py @@ -117,7 +117,7 @@ class Config(Loggable): use_source_nwis: bool = True use_source_pvacd: bool = True use_source_wqp: bool = True - use_source_nmose_pod: bool = False + use_source_nmose_pod: bool = True # parameter parameter: str = "" From c237a60b7c6f3bc8c0c714f27c78c951f64e4243 Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 11:53:42 -0600 Subject: [PATCH 037/143] added pods --- backend/transformer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/transformer.py b/backend/transformer.py index 3356320..232bd16 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -394,7 +394,6 @@ def do_transform( if not record: return - print(record) # ensure that a site or summary record is contained within the boundaing polygon if "longitude" in record and "latitude" in record: if not self.contained(record["longitude"], record["latitude"]): From 27491c271b7b4b45a6423545ff33392662c62abc Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 11:55:29 -0600 Subject: [PATCH 038/143] added pods --- backend/connectors/nmose/source.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 9b848b0..5060577 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -8,8 +8,9 @@ from backend.source import BaseSiteSource -def wkt_to_arcgis_json(polygon_wkt): - obj = wkt.loads(polygon_wkt) +def wkt_to_arcgis_json(obj): + if isinstance(obj, str): + obj = wkt.loads(obj) coords = [[coord[0], coord[1]] for coord in obj.exterior.coords] return { 'rings': [coords], From ef08db75c9151ac9db96376bad6e2a38d718645b Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 12:18:59 -0600 Subject: [PATCH 039/143] added pods --- backend/bounding_polygons.py | 11 +++++++++++ backend/connectors/nmose/source.py | 28 ++++++++++++++++++++-------- backend/source.py | 1 + backend/unifier.py | 2 +- 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index 32cbbe3..a591ffa 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -119,6 +119,17 @@ def get_huc_polygon(huc, as_wkt=True): return _make_shape(obj, as_wkt) +def get_county_names(state="NM"): + state, statefp = _get_statefp(state) + obj = _get_cached_object( + f"{state}.counties", + f"{state} counties", + f"https://reference.geoconnex.us/collections/counties/items?statefp={statefp}&f=json", + ) + + return [f['properties']['name'] for f in obj['features']] + + def get_county_polygon(name, as_wkt=True): if ":" in name: state, county = name.split(":") diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 5060577..28ac9e2 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -3,6 +3,7 @@ from shapely import wkt from shapely.geometry.polygon import Polygon +from backend.bounding_polygons import get_county_polygon, get_county_names from backend.connectors import NM_STATE_BOUNDING_POLYGON from backend.connectors.nmose.transformer import NMOSEPODSiteTransformer from backend.source import BaseSiteSource @@ -26,7 +27,7 @@ class NMOSEPODSiteSource(BaseSiteSource): """ transformer_klass = NMOSEPODSiteTransformer - chunk_size = 1000 + chunk_size = 5000 bounding_polygon = NM_STATE_BOUNDING_POLYGON def get_records(self, *args, **kw) -> dict: @@ -43,18 +44,29 @@ def get_records(self, *args, **kw) -> dict: # if config.end_date: # params["endDt"] = config.end_dt.date().isoformat() + url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" + params['where'] = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" params["outFields"] = "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" params["outSR"] = 4326 params["f"] = "json" + params["resultRecordCount"] = self.chunk_size + params['resultOffset'] = 0 + if config.has_bounds(): wkt = config.bounding_wkt() - else: - wkt = NM_STATE_BOUNDING_POLYGON + params["geometry"] = wkt_to_arcgis_json(wkt) + params["geometryType"] = "esriGeometryPolygon" - params["geometry"] = wkt_to_arcgis_json(wkt) - params["geometryType"] = "esriGeometryPolygon" - url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - obj = self._execute_json_request(url, params, tag='features') + records = [] + i=1 + while 1: + rs = self._execute_json_request(url, params, tag='features') + records.extend(rs) + params['resultOffset'] += self.chunk_size + print((i, len(rs))) + if len(rs) < self.chunk_size: + break + i+=1 - return obj \ No newline at end of file + return records \ No newline at end of file diff --git a/backend/source.py b/backend/source.py index 457006b..0ba7e91 100644 --- a/backend/source.py +++ b/backend/source.py @@ -300,6 +300,7 @@ def _execute_json_request( self.warn(f"service responded but with no data. \n{resp.text}") return [] else: + print('ffasdfsafasdfasdf', resp.url) self.warn(f"service responded with status {resp.status_code}") self.warn(f"service responded with text {resp.text}") return [] diff --git a/backend/unifier.py b/backend/unifier.py index 41fddac..f31fa99 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -315,7 +315,7 @@ def waterlevel_unification_test(): def site_unification_test(): cfg = Config() - cfg.county = "chaves" + # cfg.county = "chaves" cfg.output_summary = False From 104ae3c3fba105546cebdee16d5b47033b296ba5 Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 15:43:59 -0600 Subject: [PATCH 040/143] added pods --- backend/connectors/nmose/source.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 28ac9e2..c0a1eb9 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -64,7 +64,6 @@ def get_records(self, *args, **kw) -> dict: rs = self._execute_json_request(url, params, tag='features') records.extend(rs) params['resultOffset'] += self.chunk_size - print((i, len(rs))) if len(rs) < self.chunk_size: break i+=1 From 88fb0b3a8ff4ee83abb7085f70b95a35713f8fb0 Mon Sep 17 00:00:00 2001 From: jross Date: Tue, 1 Apr 2025 15:48:04 -0600 Subject: [PATCH 041/143] added pods --- backend/bounding_polygons.py | 11 ----------- backend/connectors/nmose/source.py | 3 --- backend/source.py | 2 +- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index a591ffa..32cbbe3 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -119,17 +119,6 @@ def get_huc_polygon(huc, as_wkt=True): return _make_shape(obj, as_wkt) -def get_county_names(state="NM"): - state, statefp = _get_statefp(state) - obj = _get_cached_object( - f"{state}.counties", - f"{state} counties", - f"https://reference.geoconnex.us/collections/counties/items?statefp={statefp}&f=json", - ) - - return [f['properties']['name'] for f in obj['features']] - - def get_county_polygon(name, as_wkt=True): if ":" in name: state, county = name.split(":") diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index c0a1eb9..53fe355 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,9 +1,6 @@ import os from shapely import wkt -from shapely.geometry.polygon import Polygon - -from backend.bounding_polygons import get_county_polygon, get_county_names from backend.connectors import NM_STATE_BOUNDING_POLYGON from backend.connectors.nmose.transformer import NMOSEPODSiteTransformer from backend.source import BaseSiteSource diff --git a/backend/source.py b/backend/source.py index 0ba7e91..44e705f 100644 --- a/backend/source.py +++ b/backend/source.py @@ -300,9 +300,9 @@ def _execute_json_request( self.warn(f"service responded but with no data. \n{resp.text}") return [] else: - print('ffasdfsafasdfasdf', resp.url) self.warn(f"service responded with status {resp.status_code}") self.warn(f"service responded with text {resp.text}") + self.warn(f"service at url: {resp.url}") return [] # ========================================================================== From 723b3f91e1a815b643225da74d37a36dd6a5c941 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 1 Apr 2025 22:05:36 +0000 Subject: [PATCH 042/143] Formatting changes --- backend/bounding_polygons.py | 1 + backend/config.py | 16 ++- backend/connectors/bor/source.py | 2 +- backend/connectors/isc_seven_rivers/source.py | 9 +- backend/connectors/nmbgmr/source.py | 2 +- backend/connectors/st_connector.py | 2 +- backend/geo_utils.py | 1 + backend/source.py | 3 +- backend/transformer.py | 5 +- backend/unifier.py | 6 +- frontend/cli.py | 99 +++++++++++-------- 11 files changed, 85 insertions(+), 61 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index ddecb61..9ac50aa 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -179,6 +179,7 @@ def get_state_polygon(state, buffer): return geom_gcs + # private helpers ============================ def _make_shape(obj, as_wkt): poly = shape(obj["geometry"]) diff --git a/backend/config.py b/backend/config.py index 8fe55ee..2851bf7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -59,7 +59,7 @@ SULFATE, TDS, URANIUM, - ) +) from .connectors.isc_seven_rivers.source import ( ISCSevenRiversSiteSource, ISCSevenRiversWaterLevelSource, @@ -98,6 +98,7 @@ SOURCE_KEYS = sorted(list(SOURCE_DICT.keys())) + def get_source(source): try: klass = SOURCE_DICT[source] @@ -204,8 +205,8 @@ def get_config_and_false_agencies(self): "nmose_roswell", "nwis", "pvacd", - "wqp" - ] + "wqp", + ] false_agencies = ["bor", "nmed_dwb"] elif self.parameter == CARBONATE: config_agencies = ["nmbgmr_amp", "wqp"] @@ -245,7 +246,13 @@ def get_config_and_false_agencies(self): SULFATE, TDS, ]: - config_agencies = ["bor", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "wqp"] + config_agencies = [ + "bor", + "nmbgmr_amp", + "nmed_dwb", + "nmose_isc_seven_rivers", + "wqp", + ] false_agencies = [ "bernco", "cabq", @@ -464,6 +471,7 @@ def _validate_county(self): return bool(get_county_polygon(self.county)) return True + def make_output_directory(self): """ Create the output directory if it doesn't exist. diff --git a/backend/connectors/bor/source.py b/backend/connectors/bor/source.py index 29b5d38..5ad03e1 100644 --- a/backend/connectors/bor/source.py +++ b/backend/connectors/bor/source.py @@ -28,7 +28,7 @@ SOURCE_PARAMETER_UNITS, DT_MEASURED, EARLIEST, - LATEST + LATEST, ) from backend.source import ( diff --git a/backend/connectors/isc_seven_rivers/source.py b/backend/connectors/isc_seven_rivers/source.py index a7fb384..5679fad 100644 --- a/backend/connectors/isc_seven_rivers/source.py +++ b/backend/connectors/isc_seven_rivers/source.py @@ -29,7 +29,7 @@ SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, EARLIEST, - LATEST + LATEST, ) from backend.connectors.isc_seven_rivers.transformer import ( ISCSevenRiversSiteTransformer, @@ -187,7 +187,11 @@ def get_records(self, site_record): ) def _clean_records(self, records): - return [r for r in records if r["depthToWaterFeet"] is not None and not r["invalid"] and not r["dry"]] + return [ + r + for r in records + if r["depthToWaterFeet"] is not None and not r["invalid"] and not r["dry"] + ] def _extract_parameter_record(self, record): record[PARAMETER_NAME] = DTW @@ -220,4 +224,5 @@ def _extract_terminal_record(self, records, bookend): "source_parameter_name": self._source_parameter_name, } + # ============= EOF ============================================= diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index a7851ef..39c1170 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -135,7 +135,7 @@ def _extract_site_records(self, records, site_record): def _extract_source_parameter_units(self, records): return [r["Units"] for r in records] - + def _extract_terminal_record(self, records, bookend): record = get_terminal_record(records, "info.CollectionDate", bookend=bookend) return { diff --git a/backend/connectors/st_connector.py b/backend/connectors/st_connector.py index bee66f3..d596fe1 100644 --- a/backend/connectors/st_connector.py +++ b/backend/connectors/st_connector.py @@ -56,7 +56,7 @@ def _get_things( things.filter(" and ".join(fs)) return things.list() - + def _extract_terminal_record(self, records, bookend): record = get_terminal_record( records, tag=lambda x: x["observation"].phenomenon_time, bookend=bookend diff --git a/backend/geo_utils.py b/backend/geo_utils.py index 8e8b2d8..4484ee9 100644 --- a/backend/geo_utils.py +++ b/backend/geo_utils.py @@ -25,6 +25,7 @@ SRID_WGS84 = 4326 SRID_UTM_ZONE_13N = 26913 + def transform_srid(geometry, source_srid, target_srid): """ geometry must be a shapely geometry object, like Point, Polygon, or MultiPolygon diff --git a/backend/source.py b/backend/source.py index a00f8eb..be4be44 100644 --- a/backend/source.py +++ b/backend/source.py @@ -595,7 +595,6 @@ def _extract_earliest_record(self, records: list) -> dict: the earliest record """ return self._extract_terminal_record(records, bookend=EARLIEST) - def _extract_latest_record(self, records: list) -> dict: """ @@ -862,7 +861,7 @@ def _clean_records(self, records: list) -> list: source. Otherwise returns the records as is. """ return records - + def _extract_terminal_record(self, records, bookend): """ Returns the terminal record for a particular site diff --git a/backend/transformer.py b/backend/transformer.py index a1d8064..229fc0c 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -32,7 +32,7 @@ DT_MEASURED, DTW, EARLIEST, - LATEST + LATEST, ) from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logger import Loggable @@ -470,8 +470,6 @@ def do_transform( record.update(longitude=lng) record.update(horizontal_datum=datum) - - elevation, elevation_unit = transform_length_units( record.elevation, record.elevation_units, @@ -780,7 +778,6 @@ def _transform_terminal_record(self, record, site_id, bookend): record[value_key] = converted_value record[unit_key] = unit - def _transform_earliest_record(self, record, site_id): self._transform_terminal_record(record, site_id, EARLIEST) diff --git a/backend/unifier.py b/backend/unifier.py index 1f72032..b2ddd7a 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -63,6 +63,7 @@ def unify_waterlevels(config): return True + def unify_sites(config): print("Unifying sites only\n") @@ -74,6 +75,7 @@ def unify_sites(config): return True + def _perister_factory(config): """ Determines the type of persister to use based on the configuration. The @@ -314,11 +316,11 @@ def waterlevel_unification_test(): unify_waterlevels(cfg) + def site_unification_test(): cfg = Config() cfg.county = "chaves" - cfg.output_summary = False cfg.output_name = "sitesonly" cfg.sites_only = True @@ -339,8 +341,6 @@ def site_unification_test(): cfg.use_source_nmed_dwb = True - - cfg.finalize() unify_sites(cfg) diff --git a/frontend/cli.py b/frontend/cli.py index 29f8dbf..490d9ef 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -182,7 +182,6 @@ def cli(): required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", ), - ] PERSISTER_OPTIONS = [ click.option( @@ -215,27 +214,27 @@ def _add_options(func): @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def weave( - weave, - output, - output_dir, - start_date, - end_date, - bbox, - county, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, - yes, + weave, + output, + output_dir, + start_date, + end_date, + bbox, + county, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, + yes, ): """ Get parameter timeseries or summary data @@ -273,7 +272,7 @@ def weave( lcs = locals() for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + setattr(config, f"use_source_{agency}", lcs.get(f"no_{agency}", False)) # dates config.start_date = start_date config.end_date = end_date @@ -299,33 +298,46 @@ def weave( @add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -def wells(bbox, county, - output_dir, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, - yes): +def wells( + bbox, + county, + output_dir, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, + yes, +): """ Get locations """ config = setup_config("sites", bbox, county, site_limit, dry) - config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", - "wqp"] + config_agencies = [ + "bernco", + "bor", + "cabq", + "ebid", + "nmbgmr_amp", + "nmed_dwb", + "nmose_isc_seven_rivers", + "nmose_roswell", + "nwis", + "pvacd", + "wqp", + ] lcs = locals() for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + setattr(config, f"use_source_{agency}", lcs.get(f"no_{agency}", False)) config.sites_only = True config.output_dir = output_dir @@ -388,4 +400,5 @@ def setup_config(tag, bbox, county, site_limit, dry): return config + # ============= EOF ============================================= From 0f1253597848761fab5691d7806468c52a19987a Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 1 Apr 2025 22:10:22 +0000 Subject: [PATCH 043/143] Formatting changes --- backend/config.py | 2 +- backend/connectors/nmose/source.py | 28 ++++++++++++------------- backend/connectors/nmose/transformer.py | 6 +++--- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/backend/config.py b/backend/config.py index ee46600..7c8dc46 100644 --- a/backend/config.py +++ b/backend/config.py @@ -273,7 +273,7 @@ def finalize(self): self.make_output_path() def all_site_sources(self): - sources =[] + sources = [] for s in SOURCE_KEYS: if getattr(self, f"use_source_{s}"): source = get_source(s) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 53fe355..92e6fb3 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -10,12 +10,8 @@ def wkt_to_arcgis_json(obj): if isinstance(obj, str): obj = wkt.loads(obj) coords = [[coord[0], coord[1]] for coord in obj.exterior.coords] - return { - 'rings': [coords], - 'spatialReference': { - 'wkid': 4326 - } - } + return {"rings": [coords], "spatialReference": {"wkid": 4326}} + class NMOSEPODSiteSource(BaseSiteSource): """ @@ -43,12 +39,16 @@ def get_records(self, *args, **kw) -> dict: url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - params['where'] = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" - params["outFields"] = "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" + params["where"] = ( + "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" + ) + params["outFields"] = ( + "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" + ) params["outSR"] = 4326 params["f"] = "json" params["resultRecordCount"] = self.chunk_size - params['resultOffset'] = 0 + params["resultOffset"] = 0 if config.has_bounds(): wkt = config.bounding_wkt() @@ -56,13 +56,13 @@ def get_records(self, *args, **kw) -> dict: params["geometryType"] = "esriGeometryPolygon" records = [] - i=1 + i = 1 while 1: - rs = self._execute_json_request(url, params, tag='features') + rs = self._execute_json_request(url, params, tag="features") records.extend(rs) - params['resultOffset'] += self.chunk_size + params["resultOffset"] += self.chunk_size if len(rs) < self.chunk_size: break - i+=1 + i += 1 - return records \ No newline at end of file + return records diff --git a/backend/connectors/nmose/transformer.py b/backend/connectors/nmose/transformer.py index 2ed99e6..8ebbc5f 100644 --- a/backend/connectors/nmose/transformer.py +++ b/backend/connectors/nmose/transformer.py @@ -13,8 +13,8 @@ def _transform(self, record) -> dict: dict: The transformed record. """ - properties = record['attributes'] - geometry = record['geometry'] + properties = record["attributes"] + geometry = record["geometry"] # print(properties.keys()) # print(geometry.keys()) @@ -32,4 +32,4 @@ def _transform(self, record) -> dict: # "well_depth": record["well_depth_va"], # "well_depth_units": "ft", } - return rec \ No newline at end of file + return rec From 5a478d57044a192e3c8f2a3e2bcbf9fa48675f09 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 1 Apr 2025 16:38:49 -0600 Subject: [PATCH 044/143] Further document NMOSE POD & Update README/CHANGELOG --- CHANGELOG.md | 1 + README.md | 4 ++++ backend/config.py | 10 +++++++--- frontend/cli.py | 9 +++++++++ setup.py | 2 +- 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10a501e..74b032f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - `earliest_date`, `earliest_time`, `earliest_value`, and `earliest_units` to the summary table - `die wells` to get all wells for which the DIE reports observations - `die source {parameter}` to list sources that report a particular parameter +- NM OSE PODs, though its information is only currently available for the invocation of `die wells` ### Changed - NM OSE Roswell data is now pulled from ST2 and not CKAN diff --git a/README.md b/README.md index 1e83f8d..4e43866 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ Data comes from the following sources. We are continuously adding new sources as - Available data: `water levels`, `water quality` - [New Mexico Environment Department Drinking Water Bureau (NMED DWB)](https://nmenv.newmexicowaterdata.org/FROST-Server/v1.1/) - Available data: `water quality` +- [New Mexico Office of the State Engineer Points of Diversions (NMOSEPODs)](https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/ArcGIS/rest/services/OSE_PODs/FeatureServer/0) + - Available data: `None` - [New Mexico Office of the State Engineer ISC Seven Rivers (NMOSE ISC Seven Rivers)](https://nmisc-wf.gladata.com/api/getMonitoringPoints.ashx) - Available data: `water levels`, `water quality` - [New Mexico Office of the State Engineer Roswell District Office (NMOSE Roswell)](https://catalog.newmexicowaterdata.org/dataset/pecos_region_manual_groundwater_levels) @@ -63,6 +65,7 @@ where `{parameter}` is the name of the parameter whose data is to be retrieved, | **nmbgmr-amp** | X | X | X | X | X | X | X | X | X | X | X | X | X | X | X | X | | **nmed-dwb** | - | X | X | X | - | X | X | X | X | X | X | X | X | X | X | X | | **nmose-isc-seven-rivers** | X | - | X | X | - | X | X | X | X | X | X | X | X | X | X | - | +| **nmose-pod** | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | **nmose-roswell** | X | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | **nwis** | X | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | **pvacd** | X | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | @@ -171,6 +174,7 @@ The Data Integration Engine enables the user to obtain groundwater level and gro - `--no-nmbgmr-amp` to exclude New Mexico Bureau of Geology and Mineral Resources (NMBGMR) Aquifer Mapping Program (AMP) data - `--no-nmed-dwb` to exclude New Mexico Environment Department (NMED) Drinking Water Bureau (DWB) data - `--no-nmose-isc-seven-rivers` to exclude New Mexico Office of State Engineer (NMOSE) Interstate Stream Commission (ISC) Seven Rivers data +- `--no-nmose-pod` to exclude New Mexico Office of State Engineer (NMOSE) Point of Diversion (POD) data (though none except for well information is currently available) - `--no-nmose-roswell` to exclude New Mexico Office of State Engineer (NMOSE) Roswell data - `--no-nwis` to exclude USGS NWIS data - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data diff --git a/backend/config.py b/backend/config.py index 7c8dc46..9f0f2d7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -91,11 +91,11 @@ "nmbgmr_amp": NMBGMRSiteSource, "nmed_dwb": DWBSiteSource, "nmose_isc_seven_rivers": ISCSevenRiversSiteSource, + "nmose_pod": NMOSEPODSiteSource, "nmose_roswell": NMOSERoswellSiteSource, "nwis": NWISSiteSource, "pvacd": PVACDSiteSource, "wqp": WQPSiteSource, - "nmose_pod": NMOSEPODSiteSource, } SOURCE_KEYS = sorted(list(SOURCE_DICT.keys())) @@ -134,11 +134,11 @@ class Config(Loggable): use_source_nmbgmr_amp: bool = True use_source_nmed_dwb: bool = True use_source_nmose_isc_seven_rivers: bool = True + use_source_nmose_pod: bool = True use_source_nmose_roswell: bool = True use_source_nwis: bool = True use_source_pvacd: bool = True use_source_wqp: bool = True - use_source_nmose_pod: bool = True # parameter parameter: str = "" @@ -205,12 +205,13 @@ def get_config_and_false_agencies(self): "ebid", "nmbgmr_amp", "nmose_isc_seven_rivers", + "nmose_pod", "nmose_roswell", "nwis", "pvacd", "wqp", ] - false_agencies = ["bor", "nmed_dwb"] + false_agencies = ["bor", "nmose_pod", "nmed_dwb"] elif self.parameter == CARBONATE: config_agencies = ["nmbgmr_amp", "wqp"] false_agencies = [ @@ -220,6 +221,7 @@ def get_config_and_false_agencies(self): "ebid", "nmed_dwb", "nmose_isc_seven_rivers", + "nmose_pod", "nmose_roswell", "nwis", "pvacd", @@ -232,6 +234,7 @@ def get_config_and_false_agencies(self): "ebid", "nmose_isc_seven_rivers", "nmose_roswell", + "nmose_pod", "nwis", "pvacd", ] @@ -261,6 +264,7 @@ def get_config_and_false_agencies(self): "cabq", "ebid", "nmose_roswell", + "nmose_pod", "nwis", "pvacd", ] diff --git a/frontend/cli.py b/frontend/cli.py index 490d9ef..425fdfa 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -82,6 +82,13 @@ def cli(): show_default=True, help="Exclude NMOSE ISC Seven Rivers data. Default is to include", ), + click.option( + "--no-nmose-pod", + is_flag=True, + default=True, + show_default=True, + help="Exclude NMOSE POD data. Default is to include", + ), click.option( "--no-nmose-roswell", is_flag=True, @@ -228,6 +235,7 @@ def weave( no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, + no_nmose_pod, no_nmose_roswell, no_nwis, no_pvacd, @@ -309,6 +317,7 @@ def wells( no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, + no_nmose_pod, no_nmose_roswell, no_nwis, no_pvacd, diff --git a/setup.py b/setup.py index ab8fd6d..9f43e06 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.8.0", + version="0.8.1", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 6436be4cd91bdb8db4389cea4c27c1779619942a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 2 Apr 2025 14:40:34 -0600 Subject: [PATCH 045/143] Fix logger for persister and unifier --- backend/persister.py | 2 +- backend/unifier.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 38e8493..a89572c 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -21,7 +21,7 @@ import pandas as pd import geopandas as gpd -from backend.logging import Loggable +from backend.logger import Loggable try: from google.cloud import storage diff --git a/backend/unifier.py b/backend/unifier.py index 524f695..32042f2 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -17,7 +17,7 @@ from backend.config import Config, get_source from backend.constants import WATERLEVELS -from backend.logging import setup_logging +from backend.logger import setup_logging from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister from backend.source import BaseSiteSource From 3e71a593a6e0c5f06f2ba6b751614fd4bf700bda Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 2 Apr 2025 16:32:07 -0600 Subject: [PATCH 046/143] Remove USGS records where value is "-999999" --- backend/connectors/usgs/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index e60029b..b6eff31 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -181,7 +181,7 @@ def _extract_site_records(self, records, site_record): return [ri for ri in records if ri["site_code"] == site_record.id] def _clean_records(self, records): - return [r for r in records if r["value"] is not None and r["value"].strip()] + return [r for r in records if r["value"] is not None and r["value"].strip() and r["value"] != "-999999"] def _extract_source_parameter_results(self, records): return [float(r["value"]) for r in records] From 5a6681470c2fef875825f33ef1ab8e84557f2c98 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 2 Apr 2025 17:38:11 -0600 Subject: [PATCH 047/143] Update change log --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74b032f..80bf398 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - This naming schema also enables the development of datetime filters as the descriptor will apply to the latest datetime within the provided time frame filter, whereas most recent indicates np filters. - removed sites that are not in New Mexico +### Fixed +- removed records from USGS where the value is "-999999" + ## 0.7.0 From 40f3b9b50b2df90383afe132e5ec443a48d15a1c Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 2 Apr 2025 17:38:20 -0600 Subject: [PATCH 048/143] Work on updating and maintaining tests --- tests/__init__.py | 126 ++++++++++++++++++++++++++++++ tests/source_tests/__init__.py | 0 tests/source_tests/test_nmbgmr.py | 40 ++++++++++ 3 files changed, 166 insertions(+) create mode 100644 tests/source_tests/__init__.py create mode 100644 tests/source_tests/test_nmbgmr.py diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..1eb65b5 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,126 @@ +from pathlib import Path +import pytest + +from backend.config import Config, SOURCE_KEYS, get_source +from backend.constants import WATERLEVELS +from backend.unifier import unify_analytes, unify_waterlevels + +class BaseTestClass: + + parameter = None + units = None + agency = None + + dirs_and_files_to_delete = [] + + # restrict results to 10 for testing + site_limit = 39 + + @pytest.fixture(autouse=True) + def setup(self): + # Setup code + self.config = Config() + + for agency in SOURCE_KEYS: + setattr(self.config, f"use_source_{agency}", False) + + setattr(self.config, "site_limit", self.site_limit) + setattr(self.config, "parameter", self.parameter) + setattr(self.config, "units", self.units) + setattr(self.config, f"use_source_{self.agency}", True) + + self.config.finalize() + + # run test + yield + + # Teardown code + self.config = None + self.unifier = None + for p in self.dirs_and_files_to_delete: + if p.is_file(): + p.unlink() + elif p.is_dir(): + for f in p.iterdir(): + f.unlink() + p.rmdir() + self.dirs_and_files_to_delete = [] + + def _unify(self): + self.unifier(self.config) + + def _test_health(self): + # do a health check for the agency + source = self.config.all_site_sources()[0][0] + assert source.health() + + def _test_summary(self): + # Arrange + self.config.output_summary = True + self.config.report() + + # Act + if self.parameter == WATERLEVELS: + unify_waterlevels(self.config) + else: + unify_analytes(self.config) + + # Assert + # Check the summary file + summary_file = Path(self.config.output_path) / "summary.csv" + assert summary_file.exists() + + # Check the column headers + with open(summary_file, "r") as f: + headers = f.readline().strip().split(",") + expected_headers = [ + "source", + "id", + "name", + "usgs_site_id", + "alternate_site_id", + "latitude", + "longitude", + "horizontal_datum", + "elevation", + "elevation_units", + "well_depth", + "well_depth_units", + "parameter_name", + "parameter_units", + "nrecords", + "min", + "max", + "mean", + "earliest_date", + "earliest_time", + "earliest_value", + "earliest_units", + "latest_date", + "latest_time", + "latest_value", + "latest_units", + ] + assert headers == expected_headers + self.dirs_and_files_to_delete.append(summary_file) + + def _test_timeseries_unified(self): + pass + + def _test_timeseries_separated(self): + pass + + def _test_date_range(self): + pass + + def _test_wkt(self): + pass + + def _test_county(self): + pass + + def _test_huc(self): + pass + + def _text_bbox(self): + pass \ No newline at end of file diff --git a/tests/source_tests/__init__.py b/tests/source_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/source_tests/test_nmbgmr.py b/tests/source_tests/test_nmbgmr.py new file mode 100644 index 0000000..17540e7 --- /dev/null +++ b/tests/source_tests/test_nmbgmr.py @@ -0,0 +1,40 @@ +from backend.constants import WATERLEVELS +from tests import BaseTestClass + +import pytest + +class TestNMBGMRWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = "ft" + agency = "nmbgmr_amp" + + def test_health(self): + self._test_health() + + def test_summary(self): + self._test_summary() + + @pytest.mark.skip(reason="Not implemented yet") + def test_timeseries_unified(self): + self._test_timeseries_unified() + + @pytest.mark.skip(reason="Not implemented yet") + def test_timeseries_separated(self): + self._test_timeseries_separated() + + @pytest.mark.skip(reason="Not implemented yet") + def test_date_range(self): + self._test_date_range() + + @pytest.mark.skip(reason="Not implemented yet") + def test_wkt(self): + self._test_wkt() + + @pytest.mark.skip(reason="Not implemented yet") + def test_county(self): + self._test_county() + + @pytest.mark.skip(reason="Not implemented yet") + def test_huc(self): + self._test_huc() \ No newline at end of file From 2ad8ecfbc898af528a4f5523a8864baf3bd700e2 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 2 Apr 2025 17:38:35 -0600 Subject: [PATCH 049/143] Work on fixing chunk sizes --- backend/connectors/nmbgmr/source.py | 25 +++++++++++-------------- backend/unifier.py | 10 +++++++++- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 39c1170..2ec5d10 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -72,9 +72,6 @@ def get_records(self): if config.has_bounds(): params["wkt"] = config.bounding_wkt() - if config.site_limit: - params["limit"] = config.site_limit - if not config.sites_only: if config.parameter.lower() != "waterlevels": @@ -90,18 +87,18 @@ def get_records(self): ) if not config.sites_only: for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None + # print(f"Obtaining well data for {site['properties']['point_id']}") + # well_data = self._execute_json_request( + # _make_url("wells"), + # params={"pointid": site["properties"]["point_id"]}, + # tag="", + # ) + # site["properties"]["formation"] = well_data["formation"] + # site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] # site["properties"]["well_depth_units"] = FEET + site["properties"]["formation"] = None + site["properties"]["well_depth"] = None + site["properties"]["well_depth_units"] = FEET return sites diff --git a/backend/unifier.py b/backend/unifier.py index 32042f2..86fd157 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -156,11 +156,18 @@ def _site_wrapper(site_source, parameter_source, persister, config): end_ind += n if use_summarize: + print("summarize") + print(sites_with_records_count, site_limit) summary_records = parameter_source.read( sites, use_summarize, start_ind, end_ind ) if summary_records: + print("here", len(summary_records)) persister.records.extend(summary_records) + sites_with_records_count += len(summary_records) + else: + print("there") + continue else: results = parameter_source.read( sites, use_summarize, start_ind, end_ind @@ -175,7 +182,8 @@ def _site_wrapper(site_source, parameter_source, persister, config): persister.timeseries.append((site, records)) persister.sites.append(site) - sites_with_records_count += 1 + print("incrementing sites_with_records_count") + sites_with_records_count += 1 except BaseException: import traceback From 86d63fa39c6bec6547e67019f20101ce57fd763c Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 2 Apr 2025 23:40:12 +0000 Subject: [PATCH 050/143] Formatting changes --- backend/connectors/usgs/source.py | 6 +++++- tests/__init__.py | 9 +++++---- tests/source_tests/test_nmbgmr.py | 3 ++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index b6eff31..25e4e87 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -181,7 +181,11 @@ def _extract_site_records(self, records, site_record): return [ri for ri in records if ri["site_code"] == site_record.id] def _clean_records(self, records): - return [r for r in records if r["value"] is not None and r["value"].strip() and r["value"] != "-999999"] + return [ + r + for r in records + if r["value"] is not None and r["value"].strip() and r["value"] != "-999999" + ] def _extract_source_parameter_results(self, records): return [float(r["value"]) for r in records] diff --git a/tests/__init__.py b/tests/__init__.py index 1eb65b5..d40fd4a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -5,6 +5,7 @@ from backend.constants import WATERLEVELS from backend.unifier import unify_analytes, unify_waterlevels + class BaseTestClass: parameter = None @@ -23,7 +24,7 @@ def setup(self): for agency in SOURCE_KEYS: setattr(self.config, f"use_source_{agency}", False) - + setattr(self.config, "site_limit", self.site_limit) setattr(self.config, "parameter", self.parameter) setattr(self.config, "units", self.units) @@ -58,13 +59,13 @@ def _test_summary(self): # Arrange self.config.output_summary = True self.config.report() - + # Act if self.parameter == WATERLEVELS: unify_waterlevels(self.config) else: unify_analytes(self.config) - + # Assert # Check the summary file summary_file = Path(self.config.output_path) / "summary.csv" @@ -123,4 +124,4 @@ def _test_huc(self): pass def _text_bbox(self): - pass \ No newline at end of file + pass diff --git a/tests/source_tests/test_nmbgmr.py b/tests/source_tests/test_nmbgmr.py index 17540e7..051c7da 100644 --- a/tests/source_tests/test_nmbgmr.py +++ b/tests/source_tests/test_nmbgmr.py @@ -3,6 +3,7 @@ import pytest + class TestNMBGMRWaterlevels(BaseTestClass): parameter = WATERLEVELS @@ -37,4 +38,4 @@ def test_county(self): @pytest.mark.skip(reason="Not implemented yet") def test_huc(self): - self._test_huc() \ No newline at end of file + self._test_huc() From 11c8a1a151505aafc94ac3182486101d59d16006 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 3 Apr 2025 08:28:57 -0600 Subject: [PATCH 051/143] Remove duplicative variable for clarity --- backend/unifier.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 86fd157..c2c9bfb 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -129,9 +129,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): use_summarize = config.output_summary site_limit = config.site_limit - sites = site_source.read() - - if not sites: + if not site_source.read(): return sites_with_records_count = 0 @@ -140,7 +138,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): first_flag = True if config.sites_only: - persister.sites.extend(sites) + persister.sites.extend(site_source.read()) else: for sites in site_source.chunks(sites): if site_limit and sites_with_records_count == site_limit: From 4f57d46d35058a8becb60fbbffcfda4533d39a97 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 3 Apr 2025 16:49:28 -0600 Subject: [PATCH 052/143] Intermediate step for this debug --- backend/source.py | 27 ++++++++++++++++++++++++++- backend/unifier.py | 36 +++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/backend/source.py b/backend/source.py index b631185..746fb28 100644 --- a/backend/source.py +++ b/backend/source.py @@ -256,7 +256,8 @@ def _execute_json_request( dict the json response """ - # print(url) + print(url) + print(params) resp = httpx.get(url, params=params, **kw) if tag is None: tag = "data" @@ -508,6 +509,30 @@ def chunks(self, records: list, chunk_size: int = None) -> list: ] else: return records + + def get_chunk_size(self): + """ + Returns the chunk size for the source. This is used to determine how many records + to process at once. + + Returns + ------- + int + the chunk size for the source + """ + return self.chunk_size + + def set_chunk_size(self, chunk_size: int): + """ + Sets the chunk size for the source. This is used to determine how many records + to process at once. + + Parameters + ---------- + chunk_size : int + the chunk size for the source + """ + self.chunk_size = chunk_size class BaseParameterSource(BaseSource): diff --git a/backend/unifier.py b/backend/unifier.py index c2c9bfb..ecb377d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -129,23 +129,35 @@ def _site_wrapper(site_source, parameter_source, persister, config): use_summarize = config.output_summary site_limit = config.site_limit - if not site_source.read(): + sites = site_source.read() + + if not sites: return sites_with_records_count = 0 - start_ind = 1 + start_ind = 0 end_ind = 0 first_flag = True + """ + If site_source.chunk_size is greater than site_limit, set it to site_limit + so that we don't get too many sites at once. This will need to be repeated + within the for loop in conjunction with sites_with_records_count so that + the site_limit is not surpassed + """ + if site_limit > 0 and site_source.get_chunk_size() > site_limit: + site_source.set_chunk_size(site_limit) + if config.sites_only: - persister.sites.extend(site_source.read()) + persister.sites.extend(sites) else: - for sites in site_source.chunks(sites): + for site_records in site_source.chunks(sites): if site_limit and sites_with_records_count == site_limit: break + # elif - if type(sites) == list: - n = len(sites) + if type(site_records) == list: + n = len(site_records) if first_flag: first_flag = False else: @@ -155,9 +167,10 @@ def _site_wrapper(site_source, parameter_source, persister, config): if use_summarize: print("summarize") - print(sites_with_records_count, site_limit) + print("sites_with_records_count:", sites_with_records_count, "site_limit:", site_limit, "chunk_size:", site_source.get_chunk_size()) + print("start_ind:", start_ind, "end_ind:", end_ind) summary_records = parameter_source.read( - sites, use_summarize, start_ind, end_ind + site_records, use_summarize, start_ind, end_ind ) if summary_records: print("here", len(summary_records)) @@ -168,7 +181,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): continue else: results = parameter_source.read( - sites, use_summarize, start_ind, end_ind + site_records, use_summarize, start_ind, end_ind ) # no records are returned if there is no site record for parameter # or if the record isn't clean (doesn't have the correct fields) @@ -183,6 +196,11 @@ def _site_wrapper(site_source, parameter_source, persister, config): print("incrementing sites_with_records_count") sites_with_records_count += 1 + if site_limit > 0 and site_limit < sites_with_records_count + site_source.get_chunk_size(): + new_chunk_size = site_limit - sites_with_records_count + site_source.set_chunk_size(new_chunk_size) + print("new_chunk_size:", new_chunk_size) + except BaseException: import traceback From 2e19085b83d598ac2d2aa1303e87130a88c50c39 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 3 Apr 2025 17:16:38 -0600 Subject: [PATCH 053/143] Ensure that no more sites than site_limit are returned --- backend/source.py | 26 -------------------------- backend/unifier.py | 34 +++++++++++++++++----------------- 2 files changed, 17 insertions(+), 43 deletions(-) diff --git a/backend/source.py b/backend/source.py index 746fb28..acf47d9 100644 --- a/backend/source.py +++ b/backend/source.py @@ -256,8 +256,6 @@ def _execute_json_request( dict the json response """ - print(url) - print(params) resp = httpx.get(url, params=params, **kw) if tag is None: tag = "data" @@ -509,30 +507,6 @@ def chunks(self, records: list, chunk_size: int = None) -> list: ] else: return records - - def get_chunk_size(self): - """ - Returns the chunk size for the source. This is used to determine how many records - to process at once. - - Returns - ------- - int - the chunk size for the source - """ - return self.chunk_size - - def set_chunk_size(self, chunk_size: int): - """ - Sets the chunk size for the source. This is used to determine how many records - to process at once. - - Parameters - ---------- - chunk_size : int - the chunk size for the source - """ - self.chunk_size = chunk_size class BaseParameterSource(BaseSource): diff --git a/backend/unifier.py b/backend/unifier.py index ecb377d..ec27a1d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -145,16 +145,27 @@ def _site_wrapper(site_source, parameter_source, persister, config): within the for loop in conjunction with sites_with_records_count so that the site_limit is not surpassed """ - if site_limit > 0 and site_source.get_chunk_size() > site_limit: - site_source.set_chunk_size(site_limit) + if site_limit > 0 and site_source. chunk_size > site_limit: + site_source.chunk_size = site_limit if config.sites_only: persister.sites.extend(sites) else: for site_records in site_source.chunks(sites): + print("sites_with_records_count:", sites_with_records_count, "|", "site_limit:", site_limit, "|", "chunk_size:", site_source.chunk_size) if site_limit and sites_with_records_count == site_limit: break - # elif + elif site_limit and sites_with_records_count > site_limit: + # remove any extra sites that were gathered + num_sites_to_remove = sites_with_records_count - site_limit + print("removing", num_sites_to_remove) + + if use_summarize: + persister.records = persister.records[:-num_sites_to_remove] + else: + persister.timeseries = persister.timeseries[:-num_sites_to_remove] + persister.sites = persister.sites[:-num_sites_to_remove] + break if type(site_records) == list: n = len(site_records) @@ -165,19 +176,14 @@ def _site_wrapper(site_source, parameter_source, persister, config): end_ind += n - if use_summarize: - print("summarize") - print("sites_with_records_count:", sites_with_records_count, "site_limit:", site_limit, "chunk_size:", site_source.get_chunk_size()) - print("start_ind:", start_ind, "end_ind:", end_ind) + if use_summarize: summary_records = parameter_source.read( site_records, use_summarize, start_ind, end_ind ) if summary_records: - print("here", len(summary_records)) persister.records.extend(summary_records) sites_with_records_count += len(summary_records) else: - print("there") continue else: results = parameter_source.read( @@ -188,19 +194,13 @@ def _site_wrapper(site_source, parameter_source, persister, config): # don't count these sites to apply to site_limit if results is None or len(results) == 0: continue + else: + sites_with_records_count += len(results) for site, records in results: persister.timeseries.append((site, records)) persister.sites.append(site) - print("incrementing sites_with_records_count") - sites_with_records_count += 1 - - if site_limit > 0 and site_limit < sites_with_records_count + site_source.get_chunk_size(): - new_chunk_size = site_limit - sites_with_records_count - site_source.set_chunk_size(new_chunk_size) - print("new_chunk_size:", new_chunk_size) - except BaseException: import traceback From 15803436a41bd6f8fea835330cb2a5074511cf5a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 3 Apr 2025 17:34:02 -0600 Subject: [PATCH 054/143] Make tests more efficient and clearer --- backend/unifier.py | 11 +---------- tests/__init__.py | 23 +++++++++-------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index ec27a1d..74b57e6 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -139,15 +139,6 @@ def _site_wrapper(site_source, parameter_source, persister, config): end_ind = 0 first_flag = True - """ - If site_source.chunk_size is greater than site_limit, set it to site_limit - so that we don't get too many sites at once. This will need to be repeated - within the for loop in conjunction with sites_with_records_count so that - the site_limit is not surpassed - """ - if site_limit > 0 and site_source. chunk_size > site_limit: - site_source.chunk_size = site_limit - if config.sites_only: persister.sites.extend(sites) else: @@ -158,7 +149,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): elif site_limit and sites_with_records_count > site_limit: # remove any extra sites that were gathered num_sites_to_remove = sites_with_records_count - site_limit - print("removing", num_sites_to_remove) + print(f"removing {num_sites_to_remove} to avoid exceeding the site limit") if use_summarize: persister.records = persister.records[:-num_sites_to_remove] diff --git a/tests/__init__.py b/tests/__init__.py index d40fd4a..b0c0bdd 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -12,10 +12,8 @@ class BaseTestClass: units = None agency = None - dirs_and_files_to_delete = [] - - # restrict results to 10 for testing - site_limit = 39 + # set set_limit for tests + site_limit = 8 @pytest.fixture(autouse=True) def setup(self): @@ -35,17 +33,15 @@ def setup(self): # run test yield - # Teardown code + # Teardown code + path_to_clean = Path(self.config.output_path) + print(f"Cleaning and removing {path_to_clean}") + for f in Path(path_to_clean).iterdir(): + f.unlink() + path_to_clean.rmdir() + self.dirs_to_delete = [] self.config = None self.unifier = None - for p in self.dirs_and_files_to_delete: - if p.is_file(): - p.unlink() - elif p.is_dir(): - for f in p.iterdir(): - f.unlink() - p.rmdir() - self.dirs_and_files_to_delete = [] def _unify(self): self.unifier(self.config) @@ -103,7 +99,6 @@ def _test_summary(self): "latest_units", ] assert headers == expected_headers - self.dirs_and_files_to_delete.append(summary_file) def _test_timeseries_unified(self): pass From d4c0155ba421f246893140f70498d1299048b664 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 3 Apr 2025 17:40:51 -0600 Subject: [PATCH 055/143] site_limit exceedance calculation simplification --- backend/unifier.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 74b57e6..7fb388e 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -144,19 +144,18 @@ def _site_wrapper(site_source, parameter_source, persister, config): else: for site_records in site_source.chunks(sites): print("sites_with_records_count:", sites_with_records_count, "|", "site_limit:", site_limit, "|", "chunk_size:", site_source.chunk_size) - if site_limit and sites_with_records_count == site_limit: - break - elif site_limit and sites_with_records_count > site_limit: - # remove any extra sites that were gathered - num_sites_to_remove = sites_with_records_count - site_limit - print(f"removing {num_sites_to_remove} to avoid exceeding the site limit") - - if use_summarize: - persister.records = persister.records[:-num_sites_to_remove] - else: - persister.timeseries = persister.timeseries[:-num_sites_to_remove] - persister.sites = persister.sites[:-num_sites_to_remove] - break + if site_limit: + if sites_with_records_count >= site_limit: + # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded + num_sites_to_remove = sites_with_records_count - site_limit + print(f"removing {num_sites_to_remove} to avoid exceeding the site limit") + + if use_summarize: + persister.records = persister.records[:-num_sites_to_remove] + else: + persister.timeseries = persister.timeseries[:-num_sites_to_remove] + persister.sites = persister.sites[:-num_sites_to_remove] + break if type(site_records) == list: n = len(site_records) From 656596b48236ae84cd15a22f70b26764fd547e3c Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Thu, 3 Apr 2025 23:42:37 +0000 Subject: [PATCH 056/143] Formatting changes --- backend/unifier.py | 23 ++++++++++++++++++----- tests/__init__.py | 2 +- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 7fb388e..82cd27f 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -143,17 +143,30 @@ def _site_wrapper(site_source, parameter_source, persister, config): persister.sites.extend(sites) else: for site_records in site_source.chunks(sites): - print("sites_with_records_count:", sites_with_records_count, "|", "site_limit:", site_limit, "|", "chunk_size:", site_source.chunk_size) + print( + "sites_with_records_count:", + sites_with_records_count, + "|", + "site_limit:", + site_limit, + "|", + "chunk_size:", + site_source.chunk_size, + ) if site_limit: if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded num_sites_to_remove = sites_with_records_count - site_limit - print(f"removing {num_sites_to_remove} to avoid exceeding the site limit") - + print( + f"removing {num_sites_to_remove} to avoid exceeding the site limit" + ) + if use_summarize: persister.records = persister.records[:-num_sites_to_remove] else: - persister.timeseries = persister.timeseries[:-num_sites_to_remove] + persister.timeseries = persister.timeseries[ + :-num_sites_to_remove + ] persister.sites = persister.sites[:-num_sites_to_remove] break @@ -166,7 +179,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): end_ind += n - if use_summarize: + if use_summarize: summary_records = parameter_source.read( site_records, use_summarize, start_ind, end_ind ) diff --git a/tests/__init__.py b/tests/__init__.py index b0c0bdd..466ba46 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -33,7 +33,7 @@ def setup(self): # run test yield - # Teardown code + # Teardown code path_to_clean = Path(self.config.output_path) print(f"Cleaning and removing {path_to_clean}") for f in Path(path_to_clean).iterdir(): From cf350b4e3e4181dd5c3e484ddf4a886e8d6dee35 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 08:54:48 -0600 Subject: [PATCH 057/143] Clearer site limit communication --- backend/unifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/unifier.py b/backend/unifier.py index 7fb388e..859f86b 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -148,7 +148,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded num_sites_to_remove = sites_with_records_count - site_limit - print(f"removing {num_sites_to_remove} to avoid exceeding the site limit") + print(f"removing {num_sites_to_remove} records to avoid exceeding the site limit") if use_summarize: persister.records = persister.records[:-num_sites_to_remove] From 7b12e8ea5a0b31e2315abfe72df8bcfdd9b16063 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 11:32:50 -0600 Subject: [PATCH 058/143] Archive old/outdated tests but keep for records --- pytest.ini | 3 + tests/archived/__init__.py | 0 tests/archived/test_cli.py | 408 ++++++++++++++++++++++++++++ tests/archived/test_unifier.py | 473 +++++++++++++++++++++++++++++++++ 4 files changed, 884 insertions(+) create mode 100644 pytest.ini create mode 100644 tests/archived/__init__.py create mode 100644 tests/archived/test_cli.py create mode 100644 tests/archived/test_unifier.py diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..8ea4712 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +; skip archived tests but keep for reference +norecursedirs = tests/archived \ No newline at end of file diff --git a/tests/archived/__init__.py b/tests/archived/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/archived/test_cli.py b/tests/archived/test_cli.py new file mode 100644 index 0000000..3d65365 --- /dev/null +++ b/tests/archived/test_cli.py @@ -0,0 +1,408 @@ +# =============================================================================== +# Copyright 2024 Jake Ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +import os + +from click.testing import CliRunner +from frontend.cli import analytes, waterlevels + + +def _tester(function, args, fail=False): + runner = CliRunner() + print(f"invoked with {args}") + result = runner.invoke(function, args) + print(f"result.exit_code={result.exit_code}") + print(f"result.output=\n{result.output}") + + if fail: + assert result.exit_code != 0 + else: + assert result.exit_code == 0 + + +def _make_args(source): + args = [] + if source: + nosources = [ + f + for f in ( + "--no-amp", + "--no-nwis", + "--no-pvacd", + "--no-bor", + "--no-dwb", + "--no-wqp", + "--no-isc-seven-rivers", + "--no-ckan", + ) + if f != f"--no-{source}" + ] + args += nosources + + args += ["--site-limit", 10, "--dry"] + + return args + + +def _make_tds_args(source): + return ["TDS"] + _make_args(source) + + +def _make_wl_args(source=None): + return _make_args(source) + + +def test_waterlevels_nwis(): + args = _make_wl_args("nwis") + _tester(waterlevels, args) + + +def test_waterlevels_pvacd(): + args = _make_wl_args("pvacd") + _tester(waterlevels, args) + + +def test_waterlevels_nmbgmr(): + args = _make_wl_args("nmbgmr") + _tester(waterlevels, args) + + +def test_waterlevels_isc_seven_rivers(): + args = _make_wl_args("iscsevenrivers") + _tester(waterlevels, args) + + +def test_waterlevels_invalid_source(): + args = _make_wl_args() + args.append("--no-foo") + _tester(waterlevels, args, fail=True) + + +def test_waterlevels_invalid_bbox(): + args = _make_wl_args() + args.append("--bbox") + _tester(waterlevels, args, fail=True) + + +def test_waterlevels_invalid_bbox_format(): + args = _make_wl_args() + args.extend(["--bbox", "1 2 3"]) + _tester(waterlevels, args, fail=True) + + +def test_waterlevels_valid_bbox_format(): + args = _make_wl_args() + args.extend(["--bbox", "1 2,3 4"]) + _tester(waterlevels, args) + + +def test_waterlevels_invalid_county(): + args = _make_wl_args() + args.append("--county") + _tester(waterlevels, args, fail=True) + + +def test_waterlevels_invalid_county_name(): + args = _make_wl_args() + args.extend(["--county", "foo"]) + _tester(waterlevels, args, fail=True) + + +# Analyte Tests ======================================================= +def test_analytes_wqp(): + args = _make_tds_args("wqp") + _tester(analytes, args) + + +def test_analytes_bor(): + args = _make_tds_args("bor") + _tester(analytes, args) + + +def test_analytes_amp(): + args = _make_tds_args("amp") + _tester(analytes, args) + + +def test_analytes_dwb(): + args = _make_tds_args("dwb") + _tester(analytes, args) + + +def test_analytes_isc_seven_rivers(): + args = _make_tds_args("isc-seven-rivers") + _tester(analytes, args) + + +def test_analytes_invalid_analyte(): + args = _make_args("wqp") + args[0] = "Foo" + _tester(analytes, args, fail=True) + + +def test_analytes_invalid_source(): + args = _make_tds_args("wqp") + args.append("--no-foo") + _tester(analytes, args, fail=True) + + +def test_analytes_invalid_bbox(): + args = _make_tds_args("wqp") + args.append("--bbox") + _tester(analytes, args, fail=True) + + +def test_analytes_invalid_bbox_format(): + args = _make_tds_args("wqp") + args.extend(["--bbox", "1 2 3"]) + _tester(analytes, args, fail=True) + + +def test_analytes_valid_bbox_format(): + args = _make_tds_args("wqp") + args.extend(["--bbox", "1 2,3 4"]) + _tester(analytes, args) + + +def test_analytes_invalid_county(): + args = _make_tds_args("wqp") + args.append("--county") + _tester(analytes, args, fail=True) + + +def test_analytes_invalid_county_name(): + args = _make_tds_args("wqp") + args.extend(["--county", "foo"]) + _tester(analytes, args, fail=True) + + +def test_waterlevels_date_range_YMD(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) + _tester(waterlevels, args) + + +def test_waterlevels_date_range_YM(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) + _tester(waterlevels, args) + + +def test_waterlevels_date_range_Y(): + args = _make_wl_args() + args.extend(["--start-date", "2020", "--end-date", "2021"]) + _tester(waterlevels, args) + + +def test_waterlevels_invalid_start(): + args = _make_wl_args() + args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) + _tester(waterlevels, args, fail=True) + + +def test_waterlevels_invalid_end(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) + _tester(waterlevels, args, fail=True) + + +# +# def _tester(source, func, county, bbox, args=None): +# runner = CliRunner() +# +# nosources = [ +# f +# for f in ( +# "--no-amp", +# "--no-nwis", +# "--no-st2", +# "--no-bor", +# "--no-dwb", +# "--no-wqp", +# "--no-isc-seven-rivers", +# "--no-ckan", +# ) +# if f != f"--no-{source}" +# ] +# +# dargs = nosources + ["--site-limit", 10] +# +# if args: +# args += dargs +# else: +# args = dargs +# +# if county: +# args.extend(("--county", county)) +# elif bbox: +# args.extend(("--bbox", bbox)) +# +# print(" ".join([str(f) for f in args])) +# result = runner.invoke(func, args) +# +# return result + + +# def _summary_tester(source, func, county=None, bbox=None, args=None): +# if not (county or bbox): +# county = "eddy" +# +# runner = CliRunner() +# # with runner.isolated_filesystem(): +# # result = _tester(source, func, county, bbox, args) +# # assert result.exit_code == 0 +# # assert os.path.isfile("output.csv") +# +# +# def _timeseries_tester( +# source, +# func, +# combined_flag=True, +# timeseries_flag=True, +# county=None, +# bbox=None, +# args=None, +# ): +# if args is None: +# args = [] +# # runner = CliRunner() +# # with runner.isolated_filesystem(): +# # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) +# # assert result.exit_code == 0 +# # print("combined", os.path.isfile("output.combined.csv"), combined_flag) +# # assert os.path.isfile("output.combined.csv") == combined_flag +# # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) +# # assert os.path.isdir("output_timeseries") == timeseries_flag +# +# +# # ====== Analyte Tests ======================================================= +# def _analyte_summary_tester(key): +# _summary_tester(key, analytes, args=["TDS"]) +# +# +# def _analyte_county_tester(source, **kw): +# _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) +# +# +# def test_unify_analytes_amp(): +# _analyte_county_tester("amp", timeseries_flag=False) +# +# +# def test_unify_analytes_wqp(): +# _analyte_county_tester("wqp") +# +# +# def test_unify_analytes_bor(): +# _analyte_county_tester("bor", combined_flag=False) +# +# +# def test_unify_analytes_isc_seven_rivers(): +# _analyte_county_tester("isc-seven-rivers") +# +# +# def test_unify_analytes_dwb(): +# _analyte_county_tester("dwb", timeseries_flag=False) +# +# +# def test_unify_analytes_wqp_summary(): +# _analyte_summary_tester("wqp") +# +# +# def test_unify_analytes_bor_summary(): +# _analyte_summary_tester("bor") +# +# +# def test_unify_analytes_amp_summary(): +# _analyte_summary_tester("amp") +# +# +# def test_unify_analytes_dwb_summary(): +# _analyte_summary_tester("dwb") +# +# +# def test_unify_analytes_isc_seven_rivers_summary(): +# _analyte_summary_tester("isc-seven-rivers") + + +# ====== End Analyte Tests ======================================================= + + +# ====== Water Level Tests ======================================================= +# def _waterlevel_county_tester(source, **kw): +# _timeseries_tester(source, waterlevels, county="eddy", **kw) +# +# +# def _waterlevel_bbox_tester(source, **kw): +# _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) + +# +# def test_unify_waterlevels_nwis(): +# _waterlevel_county_tester("nwis", timeseries_flag=False) +# +# +# def test_unify_waterlevels_amp(): +# _waterlevel_county_tester("amp", timeseries_flag=False) +# +# +# def test_unify_waterlevels_st2(): +# _waterlevel_county_tester("st2", combined_flag=False) +# +# +# def test_unify_waterlevels_isc_seven_rivers(): +# _waterlevel_county_tester("isc-seven-rivers") +# +# +# def test_unify_waterlevels_ckan(): +# _waterlevel_county_tester("ckan") +# +# +# def test_unify_waterlevels_nwis_summary(): +# _summary_tester("nwis", waterlevels) +# +# +# def test_unify_waterlevels_amp_summary(): +# _summary_tester("amp", waterlevels) +# +# +# def test_unify_waterlevels_st2_summary(): +# _summary_tester("st2", waterlevels) +# +# +# def test_unify_waterlevels_isc_seven_rivers_summary(): +# _summary_tester("isc-seven-rivers", waterlevels) +# +# +# def test_unify_waterlevels_nwis_bbox(): +# _waterlevel_bbox_tester("nwis", timeseries_flag=False) +# +# +# def test_unify_waterlevels_amp_bbox(): +# _waterlevel_bbox_tester("amp") +# +# +# def test_unify_waterlevels_st2_bbox(): +# _waterlevel_bbox_tester("st2", combined_flag=False) +# +# +# def test_unify_waterlevels_isc_seven_rivers_bbox(): +# _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) +# +# +# def test_unify_waterlevels_ckan_bbox(): +# _waterlevel_bbox_tester("ckan") + + +# ====== End Water Level Tests ======================================================= +# ============= EOF ============================================= diff --git a/tests/archived/test_unifier.py b/tests/archived/test_unifier.py new file mode 100644 index 0000000..3947ef6 --- /dev/null +++ b/tests/archived/test_unifier.py @@ -0,0 +1,473 @@ +# =============================================================================== +# Copyright 2024 Jake Ross +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== +import datetime +import os + +import pytest +import shapely.wkt + +from backend.config import Config +from backend.connectors.ckan import HONDO_RESOURCE_ID +from backend.unifier import unify_analytes, unify_waterlevels + + +def config_factory(): + cfg = Config() + cfg.county = "eddy" + cfg.bbox = "-104.5 32.5,-104 33" + cfg.start_date = "2020-01-01" + cfg.end_date = "2024-5-01" + cfg.output_summary = False + + cfg.use_source_nmbgmr = False + cfg.use_source_wqp = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nwis = False + cfg.use_source_oseroswell = False + cfg.use_source_pvacd = False + cfg.use_source_bor = False + cfg.use_source_dwb = False + cfg.use_source_bernco = False + + cfg.site_limit = 10 + return cfg + + +@pytest.fixture +def waterlevel_summary_cfg(): + cfg = config_factory() + cfg.output_summary = True + return cfg + + +@pytest.fixture +def waterlevel_timeseries_cfg(): + cfg = config_factory() + cfg.output_summary = False + return cfg + + +@pytest.fixture +def analyte_summary_cfg(): + cfg = config_factory() + cfg.output_summary = True + cfg.analyte = "TDS" + return cfg + + +# def test_unify_analytes(cfg): +# unify_analytes(cfg) + + +def _setup(tmp_path, cfg, source, tag): + d = tmp_path / tag + d.mkdir() + cfg.output_dir = str(d) + for stag in ( + "nmbgmr", + "nwis", + "pvacd", + "bor", + "dwb", + "wqp", + "iscsevenrivers", + "oseroswell", + "bernco", + ): + if stag == source: + setattr(cfg, f"use_source_{stag}", True) + return d + + +def _setup_waterlevels(tmp_path, cfg, source): + d = _setup(tmp_path, cfg, source, "waterlevels") + unify_waterlevels(cfg) + return d + + +def _setup_analytes(tmp_path, cfg, source): + d = _setup(tmp_path, cfg, source, "analyte") + unify_analytes(cfg) + return d + + +def _test_analytes_summary(tmp_path, cfg, source): + d = _setup_analytes(tmp_path, cfg, source) + assert (d / "output.csv").is_file() + + +def _test_waterlevels_summary(tmp_path, cfg, source): + d = _setup_waterlevels(tmp_path, cfg, source) + assert (d / "output.csv").is_file() + + +def _test_waterlevels_timeseries( + tmp_path, cfg, source, combined_flag=True, timeseries_flag=False +): + d = _setup_waterlevels(tmp_path, cfg, source) + combined = d / "output.combined.csv" + timeseries = d / "output_timeseries" + print(combined_flag) + + print("combined", combined.is_file(), combined_flag) + assert combined.is_file() == combined_flag + print("timeseries", timeseries.is_dir(), timeseries_flag) + assert timeseries.is_dir() == timeseries_flag + + return combined, timeseries + + +def _test_waterelevels_timeseries_date_range( + tmp_path, cfg, source, timeseries_flag=True, combined_flag=False +): + combined, timeseries = _test_waterlevels_timeseries( + tmp_path, + cfg, + source, + timeseries_flag=timeseries_flag, + combined_flag=combined_flag, + ) + + for p in timeseries.iterdir(): + if os.path.basename(p) == "sites.csv": + continue + + with open(p, "r") as rfile: + lines = rfile.readlines() + for l in lines[1:]: + vs = l.split(",") + dd = vs[3] + dd = datetime.datetime.strptime(dd, "%Y-%m-%d") + assert dd.year >= 2020 and dd.year <= 2024 + + +def test_nwis_site_health_check(): + from backend.connectors.usgs.source import NWISSiteSource + + n = NWISSiteSource() + assert n.health() + + +def test_nmbgmr_site_health_check(): + from backend.connectors.nmbgmr.source import NMBGMRSiteSource + + n = NMBGMRSiteSource() + assert n.health() + + +def test_wqp_site_health_check(): + from backend.connectors.wqp.source import WQPSiteSource + + n = WQPSiteSource() + assert n.health() + + +def test_bor_site_health_check(): + from backend.connectors.bor.source import BORSiteSource + + n = BORSiteSource() + assert n.health() + + +def test_dwb_site_health_check(): + from backend.connectors.nmenv.source import DWBSiteSource + + n = DWBSiteSource() + assert n.health() + + +def test_isc_seven_rivers_site_health_check(): + from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource + + n = ISCSevenRiversSiteSource() + assert n.health() + + +def test_ckan_site_health_check(): + from backend.connectors.ckan.source import OSERoswellSiteSource + + n = OSERoswellSiteSource(HONDO_RESOURCE_ID) + assert n.health() + + +def test_pvacd_site_health_check(): + from backend.connectors.st2.source import PVACDSiteSource + + n = PVACDSiteSource() + assert n.health() + + +def test_bernco_site_health_check(): + from backend.connectors.st2.source import BernCoSiteSource + + n = BernCoSiteSource() + assert n.health() + + +# def test_ose_roswell_site_health_check(): +# from backend.connectors.ose_roswell.source import OSESiteSource +# n = OSESiteSource() +# assert n.health() + + +# Source tests ======================================================================================================== +def test_source_bounds_nmbgmr(): + from backend.unifier import get_source_bounds + from backend.connectors import NM_STATE_BOUNDING_POLYGON + + sourcekey = "nmbgmr" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "Polygon" + assert bounds == NM_STATE_BOUNDING_POLYGON + + +def test_source_bounds_is_seven_rivers(): + from backend.unifier import get_source_bounds + from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON + + sourcekey = "iscsevenrivers" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "Polygon" + assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON + + +def test_source_bounds_oser(): + from backend.unifier import get_source_bounds + from backend.connectors import ( + OSE_ROSWELL_HONDO_BOUNDING_POLYGON, + OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, + OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, + ) + + sourcekey = "oseroswell" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "GeometryCollection" + assert bounds == shapely.GeometryCollection( + [ + OSE_ROSWELL_HONDO_BOUNDING_POLYGON, + OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, + OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, + ] + ) + + +def test_sources_socorro(tmp_path): + cfg = Config() + cfg.county = "socorro" + + from backend.unifier import get_sources + + sources = get_sources(cfg) + assert sources + assert len(sources) == 2 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + ["NMBGMRSiteSource", "NWISSiteSource"] + ) + + +def test_sources_eddy_dtw(tmp_path): + cfg = Config() + cfg.county = "eddy" + + from backend.unifier import get_sources + + sources = get_sources(cfg) + assert sources + assert len(sources) == 5 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + [ + "ISCSevenRiversSiteSource", + "NMBGMRSiteSource", + "OSERoswellSiteSource", + "PVACDSiteSource", + "NWISSiteSource", + ] + ) + + +def test_sources_eddy_tds(tmp_path): + cfg = Config() + cfg.county = "eddy" + cfg.analyte = "TDS" + + from backend.unifier import get_sources + + sources = get_sources(cfg) + assert sources + assert len(sources) == 5 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + [ + "BORSiteSource", + "DWBSiteSource", + "ISCSevenRiversSiteSource", + "NMBGMRSiteSource", + "WQPSiteSource", + ] + ) + + +# Waterlevel Summary tests =========================================================================================== +def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): + waterlevel_summary_cfg.county = "bernalillo" + waterlevel_summary_cfg.bbox = None + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") + + +def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") + + +def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") + + +def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") + + +def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") + + +def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") + + +# Waterlevel timeseries tests ========================================================================================= +def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): + # there are one or more locations within the bounding box that have only + # one record, so there is a combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "nwis", + combined_flag=True, + timeseries_flag=True, + ) + + +def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): + _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") + + +def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): + # all locations within the bounding box have more than one record + # so there is no combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "pvacd", + combined_flag=False, + timeseries_flag=True, + ) + + +def test_unify_waterlevels_isc_seven_rivers_timeseries( + tmp_path, waterlevel_timeseries_cfg +): + # all locations within the bounding box have more than one record + # so there is no combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "iscsevenrivers", + combined_flag=False, + timeseries_flag=True, + ) + + +def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): + _test_waterlevels_timeseries( + tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True + ) + + +# Waterlevel summary date range tests ================================================================================= +def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): + d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") + assert (d / "output.csv").is_file() + + +# Waterlevel timeseries date range ==================================================================================== +def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): + # there are one or more locations within the bounding box and date range + # that have only one record, so there is a combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "nwis", + timeseries_flag=True, + combined_flag=True, + ) + + +def test_waterlevels_isc_seven_rivers_timeseries_date_range( + tmp_path, waterlevel_timeseries_cfg +): + # all locations within the bounding box and date rangehave more than one + # record so there is no combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "iscsevenrivers", + timeseries_flag=True, + combined_flag=False, + ) + + +def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): + # all locations within the bounding box and date rangehave more than one + # record so there is no combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "pvacd", + timeseries_flag=True, + combined_flag=False, + ) + + +# Analyte summary tests =============================================================================================== +def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") + + +def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") + + +def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): + # BOR locations are found within Otero County + analyte_summary_cfg.county = "otero" + analyte_summary_cfg.bbox = None + _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") + + +def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") + + +def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") + + +# ============= EOF ============================================= From 8045586b332ff06e79494cdabedb02cb15babce4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 11:33:43 -0600 Subject: [PATCH 059/143] Archive - but keep - old/outdated tests for reference --- tests/test_cli.py | 408 ------------------------------------ tests/test_unifier.py | 473 ------------------------------------------ 2 files changed, 881 deletions(-) delete mode 100644 tests/test_cli.py delete mode 100644 tests/test_unifier.py diff --git a/tests/test_cli.py b/tests/test_cli.py deleted file mode 100644 index 3d65365..0000000 --- a/tests/test_cli.py +++ /dev/null @@ -1,408 +0,0 @@ -# =============================================================================== -# Copyright 2024 Jake Ross -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# =============================================================================== -import os - -from click.testing import CliRunner -from frontend.cli import analytes, waterlevels - - -def _tester(function, args, fail=False): - runner = CliRunner() - print(f"invoked with {args}") - result = runner.invoke(function, args) - print(f"result.exit_code={result.exit_code}") - print(f"result.output=\n{result.output}") - - if fail: - assert result.exit_code != 0 - else: - assert result.exit_code == 0 - - -def _make_args(source): - args = [] - if source: - nosources = [ - f - for f in ( - "--no-amp", - "--no-nwis", - "--no-pvacd", - "--no-bor", - "--no-dwb", - "--no-wqp", - "--no-isc-seven-rivers", - "--no-ckan", - ) - if f != f"--no-{source}" - ] - args += nosources - - args += ["--site-limit", 10, "--dry"] - - return args - - -def _make_tds_args(source): - return ["TDS"] + _make_args(source) - - -def _make_wl_args(source=None): - return _make_args(source) - - -def test_waterlevels_nwis(): - args = _make_wl_args("nwis") - _tester(waterlevels, args) - - -def test_waterlevels_pvacd(): - args = _make_wl_args("pvacd") - _tester(waterlevels, args) - - -def test_waterlevels_nmbgmr(): - args = _make_wl_args("nmbgmr") - _tester(waterlevels, args) - - -def test_waterlevels_isc_seven_rivers(): - args = _make_wl_args("iscsevenrivers") - _tester(waterlevels, args) - - -def test_waterlevels_invalid_source(): - args = _make_wl_args() - args.append("--no-foo") - _tester(waterlevels, args, fail=True) - - -def test_waterlevels_invalid_bbox(): - args = _make_wl_args() - args.append("--bbox") - _tester(waterlevels, args, fail=True) - - -def test_waterlevels_invalid_bbox_format(): - args = _make_wl_args() - args.extend(["--bbox", "1 2 3"]) - _tester(waterlevels, args, fail=True) - - -def test_waterlevels_valid_bbox_format(): - args = _make_wl_args() - args.extend(["--bbox", "1 2,3 4"]) - _tester(waterlevels, args) - - -def test_waterlevels_invalid_county(): - args = _make_wl_args() - args.append("--county") - _tester(waterlevels, args, fail=True) - - -def test_waterlevels_invalid_county_name(): - args = _make_wl_args() - args.extend(["--county", "foo"]) - _tester(waterlevels, args, fail=True) - - -# Analyte Tests ======================================================= -def test_analytes_wqp(): - args = _make_tds_args("wqp") - _tester(analytes, args) - - -def test_analytes_bor(): - args = _make_tds_args("bor") - _tester(analytes, args) - - -def test_analytes_amp(): - args = _make_tds_args("amp") - _tester(analytes, args) - - -def test_analytes_dwb(): - args = _make_tds_args("dwb") - _tester(analytes, args) - - -def test_analytes_isc_seven_rivers(): - args = _make_tds_args("isc-seven-rivers") - _tester(analytes, args) - - -def test_analytes_invalid_analyte(): - args = _make_args("wqp") - args[0] = "Foo" - _tester(analytes, args, fail=True) - - -def test_analytes_invalid_source(): - args = _make_tds_args("wqp") - args.append("--no-foo") - _tester(analytes, args, fail=True) - - -def test_analytes_invalid_bbox(): - args = _make_tds_args("wqp") - args.append("--bbox") - _tester(analytes, args, fail=True) - - -def test_analytes_invalid_bbox_format(): - args = _make_tds_args("wqp") - args.extend(["--bbox", "1 2 3"]) - _tester(analytes, args, fail=True) - - -def test_analytes_valid_bbox_format(): - args = _make_tds_args("wqp") - args.extend(["--bbox", "1 2,3 4"]) - _tester(analytes, args) - - -def test_analytes_invalid_county(): - args = _make_tds_args("wqp") - args.append("--county") - _tester(analytes, args, fail=True) - - -def test_analytes_invalid_county_name(): - args = _make_tds_args("wqp") - args.extend(["--county", "foo"]) - _tester(analytes, args, fail=True) - - -def test_waterlevels_date_range_YMD(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) - _tester(waterlevels, args) - - -def test_waterlevels_date_range_YM(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) - _tester(waterlevels, args) - - -def test_waterlevels_date_range_Y(): - args = _make_wl_args() - args.extend(["--start-date", "2020", "--end-date", "2021"]) - _tester(waterlevels, args) - - -def test_waterlevels_invalid_start(): - args = _make_wl_args() - args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) - _tester(waterlevels, args, fail=True) - - -def test_waterlevels_invalid_end(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) - _tester(waterlevels, args, fail=True) - - -# -# def _tester(source, func, county, bbox, args=None): -# runner = CliRunner() -# -# nosources = [ -# f -# for f in ( -# "--no-amp", -# "--no-nwis", -# "--no-st2", -# "--no-bor", -# "--no-dwb", -# "--no-wqp", -# "--no-isc-seven-rivers", -# "--no-ckan", -# ) -# if f != f"--no-{source}" -# ] -# -# dargs = nosources + ["--site-limit", 10] -# -# if args: -# args += dargs -# else: -# args = dargs -# -# if county: -# args.extend(("--county", county)) -# elif bbox: -# args.extend(("--bbox", bbox)) -# -# print(" ".join([str(f) for f in args])) -# result = runner.invoke(func, args) -# -# return result - - -# def _summary_tester(source, func, county=None, bbox=None, args=None): -# if not (county or bbox): -# county = "eddy" -# -# runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args) -# # assert result.exit_code == 0 -# # assert os.path.isfile("output.csv") -# -# -# def _timeseries_tester( -# source, -# func, -# combined_flag=True, -# timeseries_flag=True, -# county=None, -# bbox=None, -# args=None, -# ): -# if args is None: -# args = [] -# # runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) -# # assert result.exit_code == 0 -# # print("combined", os.path.isfile("output.combined.csv"), combined_flag) -# # assert os.path.isfile("output.combined.csv") == combined_flag -# # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) -# # assert os.path.isdir("output_timeseries") == timeseries_flag -# -# -# # ====== Analyte Tests ======================================================= -# def _analyte_summary_tester(key): -# _summary_tester(key, analytes, args=["TDS"]) -# -# -# def _analyte_county_tester(source, **kw): -# _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) -# -# -# def test_unify_analytes_amp(): -# _analyte_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp(): -# _analyte_county_tester("wqp") -# -# -# def test_unify_analytes_bor(): -# _analyte_county_tester("bor", combined_flag=False) -# -# -# def test_unify_analytes_isc_seven_rivers(): -# _analyte_county_tester("isc-seven-rivers") -# -# -# def test_unify_analytes_dwb(): -# _analyte_county_tester("dwb", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp_summary(): -# _analyte_summary_tester("wqp") -# -# -# def test_unify_analytes_bor_summary(): -# _analyte_summary_tester("bor") -# -# -# def test_unify_analytes_amp_summary(): -# _analyte_summary_tester("amp") -# -# -# def test_unify_analytes_dwb_summary(): -# _analyte_summary_tester("dwb") -# -# -# def test_unify_analytes_isc_seven_rivers_summary(): -# _analyte_summary_tester("isc-seven-rivers") - - -# ====== End Analyte Tests ======================================================= - - -# ====== Water Level Tests ======================================================= -# def _waterlevel_county_tester(source, **kw): -# _timeseries_tester(source, waterlevels, county="eddy", **kw) -# -# -# def _waterlevel_bbox_tester(source, **kw): -# _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) - -# -# def test_unify_waterlevels_nwis(): -# _waterlevel_county_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp(): -# _waterlevel_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_waterlevels_st2(): -# _waterlevel_county_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers(): -# _waterlevel_county_tester("isc-seven-rivers") -# -# -# def test_unify_waterlevels_ckan(): -# _waterlevel_county_tester("ckan") -# -# -# def test_unify_waterlevels_nwis_summary(): -# _summary_tester("nwis", waterlevels) -# -# -# def test_unify_waterlevels_amp_summary(): -# _summary_tester("amp", waterlevels) -# -# -# def test_unify_waterlevels_st2_summary(): -# _summary_tester("st2", waterlevels) -# -# -# def test_unify_waterlevels_isc_seven_rivers_summary(): -# _summary_tester("isc-seven-rivers", waterlevels) -# -# -# def test_unify_waterlevels_nwis_bbox(): -# _waterlevel_bbox_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp_bbox(): -# _waterlevel_bbox_tester("amp") -# -# -# def test_unify_waterlevels_st2_bbox(): -# _waterlevel_bbox_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers_bbox(): -# _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) -# -# -# def test_unify_waterlevels_ckan_bbox(): -# _waterlevel_bbox_tester("ckan") - - -# ====== End Water Level Tests ======================================================= -# ============= EOF ============================================= diff --git a/tests/test_unifier.py b/tests/test_unifier.py deleted file mode 100644 index 3947ef6..0000000 --- a/tests/test_unifier.py +++ /dev/null @@ -1,473 +0,0 @@ -# =============================================================================== -# Copyright 2024 Jake Ross -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# =============================================================================== -import datetime -import os - -import pytest -import shapely.wkt - -from backend.config import Config -from backend.connectors.ckan import HONDO_RESOURCE_ID -from backend.unifier import unify_analytes, unify_waterlevels - - -def config_factory(): - cfg = Config() - cfg.county = "eddy" - cfg.bbox = "-104.5 32.5,-104 33" - cfg.start_date = "2020-01-01" - cfg.end_date = "2024-5-01" - cfg.output_summary = False - - cfg.use_source_nmbgmr = False - cfg.use_source_wqp = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_nwis = False - cfg.use_source_oseroswell = False - cfg.use_source_pvacd = False - cfg.use_source_bor = False - cfg.use_source_dwb = False - cfg.use_source_bernco = False - - cfg.site_limit = 10 - return cfg - - -@pytest.fixture -def waterlevel_summary_cfg(): - cfg = config_factory() - cfg.output_summary = True - return cfg - - -@pytest.fixture -def waterlevel_timeseries_cfg(): - cfg = config_factory() - cfg.output_summary = False - return cfg - - -@pytest.fixture -def analyte_summary_cfg(): - cfg = config_factory() - cfg.output_summary = True - cfg.analyte = "TDS" - return cfg - - -# def test_unify_analytes(cfg): -# unify_analytes(cfg) - - -def _setup(tmp_path, cfg, source, tag): - d = tmp_path / tag - d.mkdir() - cfg.output_dir = str(d) - for stag in ( - "nmbgmr", - "nwis", - "pvacd", - "bor", - "dwb", - "wqp", - "iscsevenrivers", - "oseroswell", - "bernco", - ): - if stag == source: - setattr(cfg, f"use_source_{stag}", True) - return d - - -def _setup_waterlevels(tmp_path, cfg, source): - d = _setup(tmp_path, cfg, source, "waterlevels") - unify_waterlevels(cfg) - return d - - -def _setup_analytes(tmp_path, cfg, source): - d = _setup(tmp_path, cfg, source, "analyte") - unify_analytes(cfg) - return d - - -def _test_analytes_summary(tmp_path, cfg, source): - d = _setup_analytes(tmp_path, cfg, source) - assert (d / "output.csv").is_file() - - -def _test_waterlevels_summary(tmp_path, cfg, source): - d = _setup_waterlevels(tmp_path, cfg, source) - assert (d / "output.csv").is_file() - - -def _test_waterlevels_timeseries( - tmp_path, cfg, source, combined_flag=True, timeseries_flag=False -): - d = _setup_waterlevels(tmp_path, cfg, source) - combined = d / "output.combined.csv" - timeseries = d / "output_timeseries" - print(combined_flag) - - print("combined", combined.is_file(), combined_flag) - assert combined.is_file() == combined_flag - print("timeseries", timeseries.is_dir(), timeseries_flag) - assert timeseries.is_dir() == timeseries_flag - - return combined, timeseries - - -def _test_waterelevels_timeseries_date_range( - tmp_path, cfg, source, timeseries_flag=True, combined_flag=False -): - combined, timeseries = _test_waterlevels_timeseries( - tmp_path, - cfg, - source, - timeseries_flag=timeseries_flag, - combined_flag=combined_flag, - ) - - for p in timeseries.iterdir(): - if os.path.basename(p) == "sites.csv": - continue - - with open(p, "r") as rfile: - lines = rfile.readlines() - for l in lines[1:]: - vs = l.split(",") - dd = vs[3] - dd = datetime.datetime.strptime(dd, "%Y-%m-%d") - assert dd.year >= 2020 and dd.year <= 2024 - - -def test_nwis_site_health_check(): - from backend.connectors.usgs.source import NWISSiteSource - - n = NWISSiteSource() - assert n.health() - - -def test_nmbgmr_site_health_check(): - from backend.connectors.nmbgmr.source import NMBGMRSiteSource - - n = NMBGMRSiteSource() - assert n.health() - - -def test_wqp_site_health_check(): - from backend.connectors.wqp.source import WQPSiteSource - - n = WQPSiteSource() - assert n.health() - - -def test_bor_site_health_check(): - from backend.connectors.bor.source import BORSiteSource - - n = BORSiteSource() - assert n.health() - - -def test_dwb_site_health_check(): - from backend.connectors.nmenv.source import DWBSiteSource - - n = DWBSiteSource() - assert n.health() - - -def test_isc_seven_rivers_site_health_check(): - from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource - - n = ISCSevenRiversSiteSource() - assert n.health() - - -def test_ckan_site_health_check(): - from backend.connectors.ckan.source import OSERoswellSiteSource - - n = OSERoswellSiteSource(HONDO_RESOURCE_ID) - assert n.health() - - -def test_pvacd_site_health_check(): - from backend.connectors.st2.source import PVACDSiteSource - - n = PVACDSiteSource() - assert n.health() - - -def test_bernco_site_health_check(): - from backend.connectors.st2.source import BernCoSiteSource - - n = BernCoSiteSource() - assert n.health() - - -# def test_ose_roswell_site_health_check(): -# from backend.connectors.ose_roswell.source import OSESiteSource -# n = OSESiteSource() -# assert n.health() - - -# Source tests ======================================================================================================== -def test_source_bounds_nmbgmr(): - from backend.unifier import get_source_bounds - from backend.connectors import NM_STATE_BOUNDING_POLYGON - - sourcekey = "nmbgmr" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "Polygon" - assert bounds == NM_STATE_BOUNDING_POLYGON - - -def test_source_bounds_is_seven_rivers(): - from backend.unifier import get_source_bounds - from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON - - sourcekey = "iscsevenrivers" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "Polygon" - assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON - - -def test_source_bounds_oser(): - from backend.unifier import get_source_bounds - from backend.connectors import ( - OSE_ROSWELL_HONDO_BOUNDING_POLYGON, - OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, - OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, - ) - - sourcekey = "oseroswell" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "GeometryCollection" - assert bounds == shapely.GeometryCollection( - [ - OSE_ROSWELL_HONDO_BOUNDING_POLYGON, - OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, - OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, - ] - ) - - -def test_sources_socorro(tmp_path): - cfg = Config() - cfg.county = "socorro" - - from backend.unifier import get_sources - - sources = get_sources(cfg) - assert sources - assert len(sources) == 2 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - ["NMBGMRSiteSource", "NWISSiteSource"] - ) - - -def test_sources_eddy_dtw(tmp_path): - cfg = Config() - cfg.county = "eddy" - - from backend.unifier import get_sources - - sources = get_sources(cfg) - assert sources - assert len(sources) == 5 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - [ - "ISCSevenRiversSiteSource", - "NMBGMRSiteSource", - "OSERoswellSiteSource", - "PVACDSiteSource", - "NWISSiteSource", - ] - ) - - -def test_sources_eddy_tds(tmp_path): - cfg = Config() - cfg.county = "eddy" - cfg.analyte = "TDS" - - from backend.unifier import get_sources - - sources = get_sources(cfg) - assert sources - assert len(sources) == 5 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - [ - "BORSiteSource", - "DWBSiteSource", - "ISCSevenRiversSiteSource", - "NMBGMRSiteSource", - "WQPSiteSource", - ] - ) - - -# Waterlevel Summary tests =========================================================================================== -def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): - waterlevel_summary_cfg.county = "bernalillo" - waterlevel_summary_cfg.bbox = None - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") - - -def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") - - -def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") - - -def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") - - -def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") - - -def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") - - -# Waterlevel timeseries tests ========================================================================================= -def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): - # there are one or more locations within the bounding box that have only - # one record, so there is a combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "nwis", - combined_flag=True, - timeseries_flag=True, - ) - - -def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): - _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") - - -def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): - # all locations within the bounding box have more than one record - # so there is no combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "pvacd", - combined_flag=False, - timeseries_flag=True, - ) - - -def test_unify_waterlevels_isc_seven_rivers_timeseries( - tmp_path, waterlevel_timeseries_cfg -): - # all locations within the bounding box have more than one record - # so there is no combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "iscsevenrivers", - combined_flag=False, - timeseries_flag=True, - ) - - -def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): - _test_waterlevels_timeseries( - tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True - ) - - -# Waterlevel summary date range tests ================================================================================= -def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): - d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") - assert (d / "output.csv").is_file() - - -# Waterlevel timeseries date range ==================================================================================== -def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): - # there are one or more locations within the bounding box and date range - # that have only one record, so there is a combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "nwis", - timeseries_flag=True, - combined_flag=True, - ) - - -def test_waterlevels_isc_seven_rivers_timeseries_date_range( - tmp_path, waterlevel_timeseries_cfg -): - # all locations within the bounding box and date rangehave more than one - # record so there is no combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "iscsevenrivers", - timeseries_flag=True, - combined_flag=False, - ) - - -def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): - # all locations within the bounding box and date rangehave more than one - # record so there is no combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "pvacd", - timeseries_flag=True, - combined_flag=False, - ) - - -# Analyte summary tests =============================================================================================== -def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") - - -def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") - - -def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): - # BOR locations are found within Otero County - analyte_summary_cfg.county = "otero" - analyte_summary_cfg.bbox = None - _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") - - -def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") - - -def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") - - -# ============= EOF ============================================= From 4f894a01007f4d0f01fe500129a457e72c86a336 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 11:34:07 -0600 Subject: [PATCH 060/143] Setup BaseTestClass for reusability for all sources and tests --- tests/__init__.py | 184 +++++++++++++++++++++++++++++++--------------- 1 file changed, 123 insertions(+), 61 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index b0c0bdd..b1868e7 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,10 +1,27 @@ +from logging import shutdown as logger_shutdown from pathlib import Path import pytest -from backend.config import Config, SOURCE_KEYS, get_source +from backend.config import Config, SOURCE_KEYS from backend.constants import WATERLEVELS +from backend.logger import setup_logging +from backend.record import SummaryRecord, SiteRecord, ParameterRecord from backend.unifier import unify_analytes, unify_waterlevels +SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) +SITE_RECORD_HEADERS = list(SiteRecord.keys) +PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) + + +def recursively_clean_directory(path): + """Recursively delete all files and directories in the given path.""" + for item in path.iterdir(): + if item.is_dir(): + recursively_clean_directory(item) + else: + item.unlink() + path.rmdir() + class BaseTestClass: @@ -13,56 +30,88 @@ class BaseTestClass: agency = None # set set_limit for tests - site_limit = 8 + site_limit = 6 @pytest.fixture(autouse=True) def setup(self): - # Setup code + # SETUP CODE ---------------------------------------------------------- + # 1: setup test/config attributes self.config = Config() - for agency in SOURCE_KEYS: setattr(self.config, f"use_source_{agency}", False) - setattr(self.config, "site_limit", self.site_limit) setattr(self.config, "parameter", self.parameter) setattr(self.config, "units", self.units) setattr(self.config, f"use_source_{self.agency}", True) - self.config.finalize() - # run test + # 2: initiate logger + setup_logging(path=self.config.output_path) + + # RUN TESTS ------------------------------------------------------------ yield - # Teardown code + # UNIVERSAL ASSERTIONS ------------------------------------------------- + # 1: log file exists + log_path = Path(self.config.output_path) / "die.log" + assert log_path.exists() + + + # TEARDOWN CODE -------------------------------------------------------- + # 1: close logger to delete log file + logger_shutdown() + + # 2: delete newly created dirs and files path_to_clean = Path(self.config.output_path) print(f"Cleaning and removing {path_to_clean}") - for f in Path(path_to_clean).iterdir(): - f.unlink() - path_to_clean.rmdir() + recursively_clean_directory(path_to_clean) + + # reset test attributes self.dirs_to_delete = [] self.config = None self.unifier = None - def _unify(self): - self.unifier(self.config) + def _run_unifier(self): + if self.parameter == WATERLEVELS: + unify_waterlevels(self.config) + else: + unify_analytes(self.config) + + def _check_sites_file(self): + sites_file = Path(self.config.output_path) / "sites.csv" + assert sites_file.exists() + + with open(sites_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SITE_RECORD_HEADERS + + # +1 for the header + with open(sites_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 - def _test_health(self): + def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): + timeseries_file = Path(timeseries_dir) / timeseries_file_name + assert timeseries_file.exists() + + with open(timeseries_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == PARAMETER_RECORD_HEADERS + + def test_health(self): # do a health check for the agency source = self.config.all_site_sources()[0][0] assert source.health() - def _test_summary(self): - # Arrange + def test_summary(self): + # Arrange -------------------------------------------------------------- self.config.output_summary = True self.config.report() - # Act - if self.parameter == WATERLEVELS: - unify_waterlevels(self.config) - else: - unify_analytes(self.config) + # Act ------------------------------------------------------------------ + self._run_unifier() - # Assert + # Assert --------------------------------------------------------------- # Check the summary file summary_file = Path(self.config.output_path) / "summary.csv" assert summary_file.exists() @@ -70,53 +119,66 @@ def _test_summary(self): # Check the column headers with open(summary_file, "r") as f: headers = f.readline().strip().split(",") - expected_headers = [ - "source", - "id", - "name", - "usgs_site_id", - "alternate_site_id", - "latitude", - "longitude", - "horizontal_datum", - "elevation", - "elevation_units", - "well_depth", - "well_depth_units", - "parameter_name", - "parameter_units", - "nrecords", - "min", - "max", - "mean", - "earliest_date", - "earliest_time", - "earliest_value", - "earliest_units", - "latest_date", - "latest_time", - "latest_value", - "latest_units", - ] - assert headers == expected_headers - - def _test_timeseries_unified(self): - pass + assert headers == SUMMARY_RECORD_HEADERS - def _test_timeseries_separated(self): - pass + # +1 for the header + with open(summary_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + + + def test_timeseries_unified(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_unified = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries file + timeseries_dir = Path(self.config.output_path) + timeseries_file_name = "timeseries_unified.csv" + self._check_timeseries_file(timeseries_dir, timeseries_file_name) + + def test_timeseries_separated(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_separated = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries files + timeseries_dir = Path(self.config.output_path) / "timeseries" + assert len([f for f in timeseries_dir.iterdir()]) == self.site_limit + + for timeseries_file in timeseries_dir.iterdir(): + self._check_timeseries_file(timeseries_dir, timeseries_file.name) - def _test_date_range(self): + @pytest.mark.skip(reason="Not implemented yet") + def test_date_range(self): pass - def _test_wkt(self): + @pytest.mark.skip(reason="Not implemented yet") + def test_wkt(self): pass - def _test_county(self): + @pytest.mark.skip(reason="Not implemented yet") + def test_county(self): pass - def _test_huc(self): + @pytest.mark.skip(reason="Not implemented yet") + def test_huc(self): pass - def _text_bbox(self): + @pytest.mark.skip(reason="Not implemented yet") + def text_bbox(self): pass From 49b5e9cf11462b88bd5ab8dbb041ef40561497c0 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 11:34:26 -0600 Subject: [PATCH 061/143] test NMBGMR --- tests/source_tests/test_nmbgmr.py | 34 +++++-------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/tests/source_tests/test_nmbgmr.py b/tests/source_tests/test_nmbgmr.py index 051c7da..b16c54f 100644 --- a/tests/source_tests/test_nmbgmr.py +++ b/tests/source_tests/test_nmbgmr.py @@ -1,4 +1,4 @@ -from backend.constants import WATERLEVELS +from backend.constants import WATERLEVELS, CALCIUM from tests import BaseTestClass import pytest @@ -10,32 +10,8 @@ class TestNMBGMRWaterlevels(BaseTestClass): units = "ft" agency = "nmbgmr_amp" - def test_health(self): - self._test_health() +class TestNMBGMRAnalyte(BaseTestClass): - def test_summary(self): - self._test_summary() - - @pytest.mark.skip(reason="Not implemented yet") - def test_timeseries_unified(self): - self._test_timeseries_unified() - - @pytest.mark.skip(reason="Not implemented yet") - def test_timeseries_separated(self): - self._test_timeseries_separated() - - @pytest.mark.skip(reason="Not implemented yet") - def test_date_range(self): - self._test_date_range() - - @pytest.mark.skip(reason="Not implemented yet") - def test_wkt(self): - self._test_wkt() - - @pytest.mark.skip(reason="Not implemented yet") - def test_county(self): - self._test_county() - - @pytest.mark.skip(reason="Not implemented yet") - def test_huc(self): - self._test_huc() + parameter = CALCIUM + units = "mg/l" + agency = "nmbgmr_amp" \ No newline at end of file From a94c102988674d12d7be020cd6f30fc8829c82da Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 12:03:19 -0600 Subject: [PATCH 062/143] Fix NMED DWB health check --- backend/connectors/nmenv/source.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index ae62c6b..2ba9694 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -27,8 +27,7 @@ DT_MEASURED, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, - EARLIEST, - LATEST, + TDS ) from backend.source import get_analyte_search_param, get_terminal_record @@ -46,7 +45,7 @@ def __repr__(self): return "DWBSiteSource" def health(self): - return self.get_records(top=10, analyte="TDS") + return self.get_records(top=10, analyte=tds) def get_records(self, *args, **kw): From 0451649d3ee739621c035fb79ce6d8a9949817b3 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 12:03:48 -0600 Subject: [PATCH 063/143] Fix my fix --- backend/connectors/nmenv/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 2ba9694..4d99d7a 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -45,7 +45,7 @@ def __repr__(self): return "DWBSiteSource" def health(self): - return self.get_records(top=10, analyte=tds) + return self.get_records(top=10, analyte=TDS) def get_records(self, *args, **kw): From 17812616e8b2158e12788e860b87bf9bd8d639f0 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 12:03:57 -0600 Subject: [PATCH 064/143] Initial setup for source tests --- tests/source_tests/test_bernco.py | 9 +++++++++ tests/source_tests/test_bor.py | 8 ++++++++ tests/source_tests/test_cabq.py | 9 +++++++++ tests/source_tests/test_ebid.py | 9 +++++++++ tests/source_tests/test_nmbgmr.py | 8 +++----- tests/source_tests/test_nmed_dwb.py | 9 +++++++++ tests/source_tests/test_nmose_isc_seven_rivers.py | 15 +++++++++++++++ tests/source_tests/test_nmose_roswell.py | 9 +++++++++ tests/source_tests/test_nwis.py | 9 +++++++++ tests/source_tests/test_pvacd.py | 9 +++++++++ tests/source_tests/test_wqp.py | 15 +++++++++++++++ 11 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 tests/source_tests/test_bernco.py create mode 100644 tests/source_tests/test_bor.py create mode 100644 tests/source_tests/test_cabq.py create mode 100644 tests/source_tests/test_ebid.py create mode 100644 tests/source_tests/test_nmed_dwb.py create mode 100644 tests/source_tests/test_nmose_isc_seven_rivers.py create mode 100644 tests/source_tests/test_nmose_roswell.py create mode 100644 tests/source_tests/test_nwis.py create mode 100644 tests/source_tests/test_pvacd.py create mode 100644 tests/source_tests/test_wqp.py diff --git a/tests/source_tests/test_bernco.py b/tests/source_tests/test_bernco.py new file mode 100644 index 0000000..f9306a4 --- /dev/null +++ b/tests/source_tests/test_bernco.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestBernCoWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "bernco" \ No newline at end of file diff --git a/tests/source_tests/test_bor.py b/tests/source_tests/test_bor.py new file mode 100644 index 0000000..089e831 --- /dev/null +++ b/tests/source_tests/test_bor.py @@ -0,0 +1,8 @@ +from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER +from tests import BaseTestClass + +class TestBoRAnalyte(BaseTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "bor" \ No newline at end of file diff --git a/tests/source_tests/test_cabq.py b/tests/source_tests/test_cabq.py new file mode 100644 index 0000000..ae16ad0 --- /dev/null +++ b/tests/source_tests/test_cabq.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestCABQWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "cabq" \ No newline at end of file diff --git a/tests/source_tests/test_ebid.py b/tests/source_tests/test_ebid.py new file mode 100644 index 0000000..6a8bdd5 --- /dev/null +++ b/tests/source_tests/test_ebid.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestEBIDWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "ebid" \ No newline at end of file diff --git a/tests/source_tests/test_nmbgmr.py b/tests/source_tests/test_nmbgmr.py index b16c54f..b8a2cfb 100644 --- a/tests/source_tests/test_nmbgmr.py +++ b/tests/source_tests/test_nmbgmr.py @@ -1,17 +1,15 @@ -from backend.constants import WATERLEVELS, CALCIUM +from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET from tests import BaseTestClass -import pytest - class TestNMBGMRWaterlevels(BaseTestClass): parameter = WATERLEVELS - units = "ft" + units = FEET agency = "nmbgmr_amp" class TestNMBGMRAnalyte(BaseTestClass): parameter = CALCIUM - units = "mg/l" + units = MILLIGRAMS_PER_LITER agency = "nmbgmr_amp" \ No newline at end of file diff --git a/tests/source_tests/test_nmed_dwb.py b/tests/source_tests/test_nmed_dwb.py new file mode 100644 index 0000000..f50bef0 --- /dev/null +++ b/tests/source_tests/test_nmed_dwb.py @@ -0,0 +1,9 @@ +from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER +from tests import BaseTestClass + + +class TestNMEDDWBAnalyte(BaseTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "nmed_dwb" \ No newline at end of file diff --git a/tests/source_tests/test_nmose_isc_seven_rivers.py b/tests/source_tests/test_nmose_isc_seven_rivers.py new file mode 100644 index 0000000..738d901 --- /dev/null +++ b/tests/source_tests/test_nmose_isc_seven_rivers.py @@ -0,0 +1,15 @@ +from backend.constants import WATERLEVELS, CALCIUM, FEET, MILLIGRAMS_PER_LITER +from tests import BaseTestClass + + +class TestNMOSEISCSevenRiversWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nmose_isc_seven_rivers" + +class TestNMOSEISCSevenRiversAnalyte(BaseTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "nmose_isc_seven_rivers" \ No newline at end of file diff --git a/tests/source_tests/test_nmose_roswell.py b/tests/source_tests/test_nmose_roswell.py new file mode 100644 index 0000000..ee82b4f --- /dev/null +++ b/tests/source_tests/test_nmose_roswell.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestNMOSERoswellWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nmose_roswell" \ No newline at end of file diff --git a/tests/source_tests/test_nwis.py b/tests/source_tests/test_nwis.py new file mode 100644 index 0000000..f8c0903 --- /dev/null +++ b/tests/source_tests/test_nwis.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestNWISWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "nwis" \ No newline at end of file diff --git a/tests/source_tests/test_pvacd.py b/tests/source_tests/test_pvacd.py new file mode 100644 index 0000000..d9fc1da --- /dev/null +++ b/tests/source_tests/test_pvacd.py @@ -0,0 +1,9 @@ +from backend.constants import WATERLEVELS, FEET +from tests import BaseTestClass + + +class TestPVACDWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "pvacd" \ No newline at end of file diff --git a/tests/source_tests/test_wqp.py b/tests/source_tests/test_wqp.py new file mode 100644 index 0000000..bcf5695 --- /dev/null +++ b/tests/source_tests/test_wqp.py @@ -0,0 +1,15 @@ +from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET +from tests import BaseTestClass + + +class TestWQPWaterlevels(BaseTestClass): + + parameter = WATERLEVELS + units = FEET + agency = "wqp" + +class TestWQPAnalyte(BaseTestClass): + + parameter = CALCIUM + units = MILLIGRAMS_PER_LITER + agency = "wqp" \ No newline at end of file From ef9de1ac658675bbabe0a938a7d02452f695c0d9 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 15:15:21 -0600 Subject: [PATCH 065/143] Fix site_limit trimming --- backend/unifier.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 82cd27f..e9e6468 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -161,13 +161,17 @@ def _site_wrapper(site_source, parameter_source, persister, config): f"removing {num_sites_to_remove} to avoid exceeding the site limit" ) + # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 + # and calling list[:0] will retur an empty list, so subtract + # num_sites_to_remove from the length of the list + # to remove the last num_sites_to_remove sites if use_summarize: - persister.records = persister.records[:-num_sites_to_remove] + persister.records = persister.records[:len(persister.records)-num_sites_to_remove] else: persister.timeseries = persister.timeseries[ - :-num_sites_to_remove + :len(persister.timeseries)-num_sites_to_remove ] - persister.sites = persister.sites[:-num_sites_to_remove] + persister.sites = persister.sites[:len(persister.sites)-num_sites_to_remove] break if type(site_records) == list: From 4ad367e913ded4486c9d0911d1a23d526fdcd98d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 15:29:15 -0600 Subject: [PATCH 066/143] Trim for site_limit at end of for loop for when site_limit < chunk_size This needs to be done at the end of the for loop to catche situations where site_simit < chunk_size, otherwise the trimming won't occur --- backend/unifier.py | 63 +++++++++++++++++++++++----------------------- tests/__init__.py | 2 +- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index e9e6468..33f3773 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -143,37 +143,6 @@ def _site_wrapper(site_source, parameter_source, persister, config): persister.sites.extend(sites) else: for site_records in site_source.chunks(sites): - print( - "sites_with_records_count:", - sites_with_records_count, - "|", - "site_limit:", - site_limit, - "|", - "chunk_size:", - site_source.chunk_size, - ) - if site_limit: - if sites_with_records_count >= site_limit: - # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded - num_sites_to_remove = sites_with_records_count - site_limit - print( - f"removing {num_sites_to_remove} to avoid exceeding the site limit" - ) - - # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 - # and calling list[:0] will retur an empty list, so subtract - # num_sites_to_remove from the length of the list - # to remove the last num_sites_to_remove sites - if use_summarize: - persister.records = persister.records[:len(persister.records)-num_sites_to_remove] - else: - persister.timeseries = persister.timeseries[ - :len(persister.timeseries)-num_sites_to_remove - ] - persister.sites = persister.sites[:len(persister.sites)-num_sites_to_remove] - break - if type(site_records) == list: n = len(site_records) if first_flag: @@ -208,6 +177,38 @@ def _site_wrapper(site_source, parameter_source, persister, config): persister.timeseries.append((site, records)) persister.sites.append(site) + if site_limit: + print( + "sites_with_records_count:", + sites_with_records_count, + "|", + "site_limit:", + site_limit, + "|", + "chunk_size:", + site_source.chunk_size, + ) + + if sites_with_records_count >= site_limit: + # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded + num_sites_to_remove = sites_with_records_count - site_limit + print( + f"removing {num_sites_to_remove} to avoid exceeding the site limit" + ) + + # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 + # and calling list[:0] will retur an empty list, so subtract + # num_sites_to_remove from the length of the list + # to remove the last num_sites_to_remove sites + if use_summarize: + persister.records = persister.records[:len(persister.records)-num_sites_to_remove] + else: + persister.timeseries = persister.timeseries[ + :len(persister.timeseries)-num_sites_to_remove + ] + persister.sites = persister.sites[:len(persister.sites)-num_sites_to_remove] + break + except BaseException: import traceback diff --git a/tests/__init__.py b/tests/__init__.py index b1868e7..1861aef 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -30,7 +30,7 @@ class BaseTestClass: agency = None # set set_limit for tests - site_limit = 6 + site_limit = 3 @pytest.fixture(autouse=True) def setup(self): From 29b58630ad479829a17e3d83bc8245dcbd911d73 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 4 Apr 2025 21:31:37 +0000 Subject: [PATCH 067/143] Formatting changes --- backend/connectors/nmenv/source.py | 2 +- backend/unifier.py | 28 +++++++++++-------- tests/__init__.py | 10 +++---- tests/source_tests/test_bernco.py | 2 +- tests/source_tests/test_bor.py | 3 +- tests/source_tests/test_cabq.py | 2 +- tests/source_tests/test_ebid.py | 2 +- tests/source_tests/test_nmbgmr.py | 3 +- tests/source_tests/test_nmed_dwb.py | 2 +- .../test_nmose_isc_seven_rivers.py | 3 +- tests/source_tests/test_nmose_roswell.py | 2 +- tests/source_tests/test_nwis.py | 2 +- tests/source_tests/test_pvacd.py | 2 +- tests/source_tests/test_wqp.py | 3 +- 14 files changed, 36 insertions(+), 30 deletions(-) diff --git a/backend/connectors/nmenv/source.py b/backend/connectors/nmenv/source.py index 4d99d7a..08b1d68 100644 --- a/backend/connectors/nmenv/source.py +++ b/backend/connectors/nmenv/source.py @@ -27,7 +27,7 @@ DT_MEASURED, SOURCE_PARAMETER_NAME, SOURCE_PARAMETER_UNITS, - TDS + TDS, ) from backend.source import get_analyte_search_param, get_terminal_record diff --git a/backend/unifier.py b/backend/unifier.py index 33f3773..c6a8311 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -179,15 +179,15 @@ def _site_wrapper(site_source, parameter_source, persister, config): if site_limit: print( - "sites_with_records_count:", - sites_with_records_count, - "|", - "site_limit:", - site_limit, - "|", - "chunk_size:", - site_source.chunk_size, - ) + "sites_with_records_count:", + sites_with_records_count, + "|", + "site_limit:", + site_limit, + "|", + "chunk_size:", + site_source.chunk_size, + ) if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded @@ -201,12 +201,16 @@ def _site_wrapper(site_source, parameter_source, persister, config): # num_sites_to_remove from the length of the list # to remove the last num_sites_to_remove sites if use_summarize: - persister.records = persister.records[:len(persister.records)-num_sites_to_remove] + persister.records = persister.records[ + : len(persister.records) - num_sites_to_remove + ] else: persister.timeseries = persister.timeseries[ - :len(persister.timeseries)-num_sites_to_remove + : len(persister.timeseries) - num_sites_to_remove + ] + persister.sites = persister.sites[ + : len(persister.sites) - num_sites_to_remove ] - persister.sites = persister.sites[:len(persister.sites)-num_sites_to_remove] break except BaseException: diff --git a/tests/__init__.py b/tests/__init__.py index 1861aef..52c810e 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -55,13 +55,12 @@ def setup(self): # 1: log file exists log_path = Path(self.config.output_path) / "die.log" assert log_path.exists() - - # TEARDOWN CODE -------------------------------------------------------- - # 1: close logger to delete log file + # TEARDOWN CODE -------------------------------------------------------- + # 1: close logger to delete log file logger_shutdown() - # 2: delete newly created dirs and files + # 2: delete newly created dirs and files path_to_clean = Path(self.config.output_path) print(f"Cleaning and removing {path_to_clean}") recursively_clean_directory(path_to_clean) @@ -125,7 +124,6 @@ def test_summary(self): with open(summary_file, "r") as f: lines = f.readlines() assert len(lines) == self.site_limit + 1 - def test_timeseries_unified(self): # Arrange -------------------------------------------------------------- @@ -138,7 +136,7 @@ def test_timeseries_unified(self): # Assert --------------------------------------------------------------- # Check the sites file self._check_sites_file() - + # Check the timeseries file timeseries_dir = Path(self.config.output_path) timeseries_file_name = "timeseries_unified.csv" diff --git a/tests/source_tests/test_bernco.py b/tests/source_tests/test_bernco.py index f9306a4..b100481 100644 --- a/tests/source_tests/test_bernco.py +++ b/tests/source_tests/test_bernco.py @@ -6,4 +6,4 @@ class TestBernCoWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "bernco" \ No newline at end of file + agency = "bernco" diff --git a/tests/source_tests/test_bor.py b/tests/source_tests/test_bor.py index 089e831..77bf325 100644 --- a/tests/source_tests/test_bor.py +++ b/tests/source_tests/test_bor.py @@ -1,8 +1,9 @@ from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER from tests import BaseTestClass + class TestBoRAnalyte(BaseTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "bor" \ No newline at end of file + agency = "bor" diff --git a/tests/source_tests/test_cabq.py b/tests/source_tests/test_cabq.py index ae16ad0..b430d3a 100644 --- a/tests/source_tests/test_cabq.py +++ b/tests/source_tests/test_cabq.py @@ -6,4 +6,4 @@ class TestCABQWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "cabq" \ No newline at end of file + agency = "cabq" diff --git a/tests/source_tests/test_ebid.py b/tests/source_tests/test_ebid.py index 6a8bdd5..fa69e00 100644 --- a/tests/source_tests/test_ebid.py +++ b/tests/source_tests/test_ebid.py @@ -6,4 +6,4 @@ class TestEBIDWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "ebid" \ No newline at end of file + agency = "ebid" diff --git a/tests/source_tests/test_nmbgmr.py b/tests/source_tests/test_nmbgmr.py index b8a2cfb..4643bf0 100644 --- a/tests/source_tests/test_nmbgmr.py +++ b/tests/source_tests/test_nmbgmr.py @@ -8,8 +8,9 @@ class TestNMBGMRWaterlevels(BaseTestClass): units = FEET agency = "nmbgmr_amp" + class TestNMBGMRAnalyte(BaseTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "nmbgmr_amp" \ No newline at end of file + agency = "nmbgmr_amp" diff --git a/tests/source_tests/test_nmed_dwb.py b/tests/source_tests/test_nmed_dwb.py index f50bef0..fff8a6e 100644 --- a/tests/source_tests/test_nmed_dwb.py +++ b/tests/source_tests/test_nmed_dwb.py @@ -6,4 +6,4 @@ class TestNMEDDWBAnalyte(BaseTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "nmed_dwb" \ No newline at end of file + agency = "nmed_dwb" diff --git a/tests/source_tests/test_nmose_isc_seven_rivers.py b/tests/source_tests/test_nmose_isc_seven_rivers.py index 738d901..a0a5d28 100644 --- a/tests/source_tests/test_nmose_isc_seven_rivers.py +++ b/tests/source_tests/test_nmose_isc_seven_rivers.py @@ -8,8 +8,9 @@ class TestNMOSEISCSevenRiversWaterlevels(BaseTestClass): units = FEET agency = "nmose_isc_seven_rivers" + class TestNMOSEISCSevenRiversAnalyte(BaseTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "nmose_isc_seven_rivers" \ No newline at end of file + agency = "nmose_isc_seven_rivers" diff --git a/tests/source_tests/test_nmose_roswell.py b/tests/source_tests/test_nmose_roswell.py index ee82b4f..4c1bd6b 100644 --- a/tests/source_tests/test_nmose_roswell.py +++ b/tests/source_tests/test_nmose_roswell.py @@ -6,4 +6,4 @@ class TestNMOSERoswellWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "nmose_roswell" \ No newline at end of file + agency = "nmose_roswell" diff --git a/tests/source_tests/test_nwis.py b/tests/source_tests/test_nwis.py index f8c0903..493b801 100644 --- a/tests/source_tests/test_nwis.py +++ b/tests/source_tests/test_nwis.py @@ -6,4 +6,4 @@ class TestNWISWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "nwis" \ No newline at end of file + agency = "nwis" diff --git a/tests/source_tests/test_pvacd.py b/tests/source_tests/test_pvacd.py index d9fc1da..edf5d48 100644 --- a/tests/source_tests/test_pvacd.py +++ b/tests/source_tests/test_pvacd.py @@ -6,4 +6,4 @@ class TestPVACDWaterlevels(BaseTestClass): parameter = WATERLEVELS units = FEET - agency = "pvacd" \ No newline at end of file + agency = "pvacd" diff --git a/tests/source_tests/test_wqp.py b/tests/source_tests/test_wqp.py index bcf5695..49e61d9 100644 --- a/tests/source_tests/test_wqp.py +++ b/tests/source_tests/test_wqp.py @@ -8,8 +8,9 @@ class TestWQPWaterlevels(BaseTestClass): units = FEET agency = "wqp" + class TestWQPAnalyte(BaseTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "wqp" \ No newline at end of file + agency = "wqp" From 1c9f86432a44eb3744f935a7429ebefbd245ca56 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 15:45:55 -0600 Subject: [PATCH 068/143] Work on mypy linting from CICD --- backend/bounding_polygons.py | 2 +- backend/transformer.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index 9ac50aa..8445077 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -162,7 +162,7 @@ def get_county_polygon(name, as_wkt=True): _warning(f"Invalid state. {state}") -def get_state_polygon(state, buffer): +def get_state_polygon(state: str, buffer: int | None =None): statefp = _statelookup(state) if statefp: obj = _get_cached_object( diff --git a/backend/transformer.py b/backend/transformer.py index 229fc0c..b993578 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -131,7 +131,7 @@ def convert_units( output_units: str, source_parameter_name: str, die_parameter_name: str, - dt: str = None, + dt: str | None = None, ) -> tuple[float, float, str]: """ Converts the following units for any parameter value: @@ -198,7 +198,7 @@ def convert_units( the source_parameter_name (e.g. nitrate as n). """ if die_parameter_name == "ph": - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == mgl: if input_units in ["mg/l caco3", "mg/l caco3**"]: if die_parameter_name == "bicarbonate": @@ -210,7 +210,7 @@ def convert_units( elif input_units == "mg/l as n": conversion_factor = 4.427 elif input_units in ["mg/l asno3", "mg/l as no3"]: - conversion_factor = 1 + conversion_factor = 1.0 elif input_units == "ug/l as n": conversion_factor = 0.004427 elif input_units == "pci/l": @@ -220,22 +220,22 @@ def convert_units( elif input_units == tpaf: conversion_factor = 735.47 elif input_units == ppm: - conversion_factor = 1 + conversion_factor = 1.0 elif input_units == output_units: if source_parameter_name in ["nitrate as n", "nitrate (as n)"]: conversion_factor = 4.427 else: - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == ft: if input_units in [m, "meters"]: conversion_factor = 3.28084 elif input_units in [ft, "feet"]: - conversion_factor = 1 + conversion_factor = 1.0 elif output_units == m: if input_units in [ft, "feet"]: conversion_factor = 0.3048 elif input_units in [m, "meters"]: - conversion_factor = 1 + conversion_factor = 1.0 if conversion_factor: return input_value * conversion_factor, conversion_factor, warning @@ -395,7 +395,7 @@ def do_transform( # _transform is already implemented in each ParameterTransformer record = self._transform(inrecord, *args, **kw) if not record: - return + return None # ensure that a site or summary record is contained within the boundaing polygon if "longitude" in record and "latitude" in record: @@ -403,7 +403,7 @@ def do_transform( self.warn( f"Skipping site {record['id']}. It is not within the defined geographic bounds" ) - return + return None self._post_transform(record, *args, **kw) From 2f4432caecc872d8e912410d66fe9f3b8e8d7967 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 4 Apr 2025 15:54:34 -0600 Subject: [PATCH 069/143] Work on mypy changes --- backend/source.py | 6 +- tests/archived/test_cli.py | 274 ++++++------ tests/archived/test_unifier.py | 758 ++++++++++++++++----------------- 3 files changed, 519 insertions(+), 519 deletions(-) diff --git a/backend/source.py b/backend/source.py index acf47d9..5ca8c9b 100644 --- a/backend/source.py +++ b/backend/source.py @@ -19,7 +19,7 @@ import httpx import shapely.wkt from shapely import MultiPoint -from typing import Union, List +from typing import Union, List, Callable from backend.constants import ( MILLIGRAMS_PER_LITER, @@ -47,7 +47,7 @@ from backend.transformer import BaseTransformer, convert_units -def make_site_list(site_record: list | dict) -> list | str: +def make_site_list(site_record: list[SiteRecord] | SiteRecord) -> list | str: """ Returns a list of site ids, as defined by site_record @@ -67,7 +67,7 @@ def make_site_list(site_record: list | dict) -> list | str: return sites -def get_terminal_record(records: list, tag: Union[str, callable], bookend: str) -> dict: +def get_terminal_record(records: list, tag: Union[str, Callable], bookend: str) -> dict: """ Returns the most recent record based on the tag diff --git a/tests/archived/test_cli.py b/tests/archived/test_cli.py index 3d65365..ba8240e 100644 --- a/tests/archived/test_cli.py +++ b/tests/archived/test_cli.py @@ -13,209 +13,209 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -import os +# import os -from click.testing import CliRunner -from frontend.cli import analytes, waterlevels +# from click.testing import CliRunner +# from frontend.cli import analytes, waterlevels -def _tester(function, args, fail=False): - runner = CliRunner() - print(f"invoked with {args}") - result = runner.invoke(function, args) - print(f"result.exit_code={result.exit_code}") - print(f"result.output=\n{result.output}") +# def _tester(function, args, fail=False): +# runner = CliRunner() +# print(f"invoked with {args}") +# result = runner.invoke(function, args) +# print(f"result.exit_code={result.exit_code}") +# print(f"result.output=\n{result.output}") - if fail: - assert result.exit_code != 0 - else: - assert result.exit_code == 0 +# if fail: +# assert result.exit_code != 0 +# else: +# assert result.exit_code == 0 -def _make_args(source): - args = [] - if source: - nosources = [ - f - for f in ( - "--no-amp", - "--no-nwis", - "--no-pvacd", - "--no-bor", - "--no-dwb", - "--no-wqp", - "--no-isc-seven-rivers", - "--no-ckan", - ) - if f != f"--no-{source}" - ] - args += nosources +# def _make_args(source): +# args = [] +# if source: +# nosources = [ +# f +# for f in ( +# "--no-amp", +# "--no-nwis", +# "--no-pvacd", +# "--no-bor", +# "--no-dwb", +# "--no-wqp", +# "--no-isc-seven-rivers", +# "--no-ckan", +# ) +# if f != f"--no-{source}" +# ] +# args += nosources - args += ["--site-limit", 10, "--dry"] +# args += ["--site-limit", 10, "--dry"] - return args +# return args -def _make_tds_args(source): - return ["TDS"] + _make_args(source) +# def _make_tds_args(source): +# return ["TDS"] + _make_args(source) -def _make_wl_args(source=None): - return _make_args(source) +# def _make_wl_args(source=None): +# return _make_args(source) -def test_waterlevels_nwis(): - args = _make_wl_args("nwis") - _tester(waterlevels, args) +# def test_waterlevels_nwis(): +# args = _make_wl_args("nwis") +# _tester(waterlevels, args) -def test_waterlevels_pvacd(): - args = _make_wl_args("pvacd") - _tester(waterlevels, args) +# def test_waterlevels_pvacd(): +# args = _make_wl_args("pvacd") +# _tester(waterlevels, args) -def test_waterlevels_nmbgmr(): - args = _make_wl_args("nmbgmr") - _tester(waterlevels, args) +# def test_waterlevels_nmbgmr(): +# args = _make_wl_args("nmbgmr") +# _tester(waterlevels, args) -def test_waterlevels_isc_seven_rivers(): - args = _make_wl_args("iscsevenrivers") - _tester(waterlevels, args) +# def test_waterlevels_isc_seven_rivers(): +# args = _make_wl_args("iscsevenrivers") +# _tester(waterlevels, args) -def test_waterlevels_invalid_source(): - args = _make_wl_args() - args.append("--no-foo") - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_source(): +# args = _make_wl_args() +# args.append("--no-foo") +# _tester(waterlevels, args, fail=True) -def test_waterlevels_invalid_bbox(): - args = _make_wl_args() - args.append("--bbox") - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_bbox(): +# args = _make_wl_args() +# args.append("--bbox") +# _tester(waterlevels, args, fail=True) -def test_waterlevels_invalid_bbox_format(): - args = _make_wl_args() - args.extend(["--bbox", "1 2 3"]) - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_bbox_format(): +# args = _make_wl_args() +# args.extend(["--bbox", "1 2 3"]) +# _tester(waterlevels, args, fail=True) -def test_waterlevels_valid_bbox_format(): - args = _make_wl_args() - args.extend(["--bbox", "1 2,3 4"]) - _tester(waterlevels, args) +# def test_waterlevels_valid_bbox_format(): +# args = _make_wl_args() +# args.extend(["--bbox", "1 2,3 4"]) +# _tester(waterlevels, args) -def test_waterlevels_invalid_county(): - args = _make_wl_args() - args.append("--county") - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_county(): +# args = _make_wl_args() +# args.append("--county") +# _tester(waterlevels, args, fail=True) -def test_waterlevels_invalid_county_name(): - args = _make_wl_args() - args.extend(["--county", "foo"]) - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_county_name(): +# args = _make_wl_args() +# args.extend(["--county", "foo"]) +# _tester(waterlevels, args, fail=True) -# Analyte Tests ======================================================= -def test_analytes_wqp(): - args = _make_tds_args("wqp") - _tester(analytes, args) +# # Analyte Tests ======================================================= +# def test_analytes_wqp(): +# args = _make_tds_args("wqp") +# _tester(analytes, args) -def test_analytes_bor(): - args = _make_tds_args("bor") - _tester(analytes, args) +# def test_analytes_bor(): +# args = _make_tds_args("bor") +# _tester(analytes, args) -def test_analytes_amp(): - args = _make_tds_args("amp") - _tester(analytes, args) +# def test_analytes_amp(): +# args = _make_tds_args("amp") +# _tester(analytes, args) -def test_analytes_dwb(): - args = _make_tds_args("dwb") - _tester(analytes, args) +# def test_analytes_dwb(): +# args = _make_tds_args("dwb") +# _tester(analytes, args) -def test_analytes_isc_seven_rivers(): - args = _make_tds_args("isc-seven-rivers") - _tester(analytes, args) +# def test_analytes_isc_seven_rivers(): +# args = _make_tds_args("isc-seven-rivers") +# _tester(analytes, args) -def test_analytes_invalid_analyte(): - args = _make_args("wqp") - args[0] = "Foo" - _tester(analytes, args, fail=True) +# def test_analytes_invalid_analyte(): +# args = _make_args("wqp") +# args[0] = "Foo" +# _tester(analytes, args, fail=True) -def test_analytes_invalid_source(): - args = _make_tds_args("wqp") - args.append("--no-foo") - _tester(analytes, args, fail=True) +# def test_analytes_invalid_source(): +# args = _make_tds_args("wqp") +# args.append("--no-foo") +# _tester(analytes, args, fail=True) -def test_analytes_invalid_bbox(): - args = _make_tds_args("wqp") - args.append("--bbox") - _tester(analytes, args, fail=True) +# def test_analytes_invalid_bbox(): +# args = _make_tds_args("wqp") +# args.append("--bbox") +# _tester(analytes, args, fail=True) -def test_analytes_invalid_bbox_format(): - args = _make_tds_args("wqp") - args.extend(["--bbox", "1 2 3"]) - _tester(analytes, args, fail=True) +# def test_analytes_invalid_bbox_format(): +# args = _make_tds_args("wqp") +# args.extend(["--bbox", "1 2 3"]) +# _tester(analytes, args, fail=True) -def test_analytes_valid_bbox_format(): - args = _make_tds_args("wqp") - args.extend(["--bbox", "1 2,3 4"]) - _tester(analytes, args) +# def test_analytes_valid_bbox_format(): +# args = _make_tds_args("wqp") +# args.extend(["--bbox", "1 2,3 4"]) +# _tester(analytes, args) -def test_analytes_invalid_county(): - args = _make_tds_args("wqp") - args.append("--county") - _tester(analytes, args, fail=True) +# def test_analytes_invalid_county(): +# args = _make_tds_args("wqp") +# args.append("--county") +# _tester(analytes, args, fail=True) -def test_analytes_invalid_county_name(): - args = _make_tds_args("wqp") - args.extend(["--county", "foo"]) - _tester(analytes, args, fail=True) +# def test_analytes_invalid_county_name(): +# args = _make_tds_args("wqp") +# args.extend(["--county", "foo"]) +# _tester(analytes, args, fail=True) -def test_waterlevels_date_range_YMD(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) - _tester(waterlevels, args) +# def test_waterlevels_date_range_YMD(): +# args = _make_wl_args() +# args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) +# _tester(waterlevels, args) -def test_waterlevels_date_range_YM(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) - _tester(waterlevels, args) +# def test_waterlevels_date_range_YM(): +# args = _make_wl_args() +# args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) +# _tester(waterlevels, args) -def test_waterlevels_date_range_Y(): - args = _make_wl_args() - args.extend(["--start-date", "2020", "--end-date", "2021"]) - _tester(waterlevels, args) +# def test_waterlevels_date_range_Y(): +# args = _make_wl_args() +# args.extend(["--start-date", "2020", "--end-date", "2021"]) +# _tester(waterlevels, args) -def test_waterlevels_invalid_start(): - args = _make_wl_args() - args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_start(): +# args = _make_wl_args() +# args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) +# _tester(waterlevels, args, fail=True) -def test_waterlevels_invalid_end(): - args = _make_wl_args() - args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) - _tester(waterlevels, args, fail=True) +# def test_waterlevels_invalid_end(): +# args = _make_wl_args() +# args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) +# _tester(waterlevels, args, fail=True) # diff --git a/tests/archived/test_unifier.py b/tests/archived/test_unifier.py index 3947ef6..319c762 100644 --- a/tests/archived/test_unifier.py +++ b/tests/archived/test_unifier.py @@ -13,461 +13,461 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -import datetime -import os - -import pytest -import shapely.wkt - -from backend.config import Config -from backend.connectors.ckan import HONDO_RESOURCE_ID -from backend.unifier import unify_analytes, unify_waterlevels - - -def config_factory(): - cfg = Config() - cfg.county = "eddy" - cfg.bbox = "-104.5 32.5,-104 33" - cfg.start_date = "2020-01-01" - cfg.end_date = "2024-5-01" - cfg.output_summary = False - - cfg.use_source_nmbgmr = False - cfg.use_source_wqp = False - cfg.use_source_iscsevenrivers = False - cfg.use_source_nwis = False - cfg.use_source_oseroswell = False - cfg.use_source_pvacd = False - cfg.use_source_bor = False - cfg.use_source_dwb = False - cfg.use_source_bernco = False - - cfg.site_limit = 10 - return cfg - - -@pytest.fixture -def waterlevel_summary_cfg(): - cfg = config_factory() - cfg.output_summary = True - return cfg - - -@pytest.fixture -def waterlevel_timeseries_cfg(): - cfg = config_factory() - cfg.output_summary = False - return cfg - - -@pytest.fixture -def analyte_summary_cfg(): - cfg = config_factory() - cfg.output_summary = True - cfg.analyte = "TDS" - return cfg - - -# def test_unify_analytes(cfg): +# import datetime +# import os + +# import pytest +# import shapely.wkt + +# from backend.config import Config +# from backend.connectors.ckan import HONDO_RESOURCE_ID +# from backend.unifier import unify_analytes, unify_waterlevels + + +# def config_factory(): +# cfg = Config() +# cfg.county = "eddy" +# cfg.bbox = "-104.5 32.5,-104 33" +# cfg.start_date = "2020-01-01" +# cfg.end_date = "2024-5-01" +# cfg.output_summary = False + +# cfg.use_source_nmbgmr = False +# cfg.use_source_wqp = False +# cfg.use_source_iscsevenrivers = False +# cfg.use_source_nwis = False +# cfg.use_source_oseroswell = False +# cfg.use_source_pvacd = False +# cfg.use_source_bor = False +# cfg.use_source_dwb = False +# cfg.use_source_bernco = False + +# cfg.site_limit = 10 +# return cfg + + +# @pytest.fixture +# def waterlevel_summary_cfg(): +# cfg = config_factory() +# cfg.output_summary = True +# return cfg + + +# @pytest.fixture +# def waterlevel_timeseries_cfg(): +# cfg = config_factory() +# cfg.output_summary = False +# return cfg + + +# @pytest.fixture +# def analyte_summary_cfg(): +# cfg = config_factory() +# cfg.output_summary = True +# cfg.analyte = "TDS" +# return cfg + + +# # def test_unify_analytes(cfg): +# # unify_analytes(cfg) + + +# def _setup(tmp_path, cfg, source, tag): +# d = tmp_path / tag +# d.mkdir() +# cfg.output_dir = str(d) +# for stag in ( +# "nmbgmr", +# "nwis", +# "pvacd", +# "bor", +# "dwb", +# "wqp", +# "iscsevenrivers", +# "oseroswell", +# "bernco", +# ): +# if stag == source: +# setattr(cfg, f"use_source_{stag}", True) +# return d + + +# def _setup_waterlevels(tmp_path, cfg, source): +# d = _setup(tmp_path, cfg, source, "waterlevels") +# unify_waterlevels(cfg) +# return d + + +# def _setup_analytes(tmp_path, cfg, source): +# d = _setup(tmp_path, cfg, source, "analyte") # unify_analytes(cfg) +# return d -def _setup(tmp_path, cfg, source, tag): - d = tmp_path / tag - d.mkdir() - cfg.output_dir = str(d) - for stag in ( - "nmbgmr", - "nwis", - "pvacd", - "bor", - "dwb", - "wqp", - "iscsevenrivers", - "oseroswell", - "bernco", - ): - if stag == source: - setattr(cfg, f"use_source_{stag}", True) - return d - - -def _setup_waterlevels(tmp_path, cfg, source): - d = _setup(tmp_path, cfg, source, "waterlevels") - unify_waterlevels(cfg) - return d +# def _test_analytes_summary(tmp_path, cfg, source): +# d = _setup_analytes(tmp_path, cfg, source) +# assert (d / "output.csv").is_file() -def _setup_analytes(tmp_path, cfg, source): - d = _setup(tmp_path, cfg, source, "analyte") - unify_analytes(cfg) - return d +# def _test_waterlevels_summary(tmp_path, cfg, source): +# d = _setup_waterlevels(tmp_path, cfg, source) +# assert (d / "output.csv").is_file() -def _test_analytes_summary(tmp_path, cfg, source): - d = _setup_analytes(tmp_path, cfg, source) - assert (d / "output.csv").is_file() +# def _test_waterlevels_timeseries( +# tmp_path, cfg, source, combined_flag=True, timeseries_flag=False +# ): +# d = _setup_waterlevels(tmp_path, cfg, source) +# combined = d / "output.combined.csv" +# timeseries = d / "output_timeseries" +# print(combined_flag) +# print("combined", combined.is_file(), combined_flag) +# assert combined.is_file() == combined_flag +# print("timeseries", timeseries.is_dir(), timeseries_flag) +# assert timeseries.is_dir() == timeseries_flag -def _test_waterlevels_summary(tmp_path, cfg, source): - d = _setup_waterlevels(tmp_path, cfg, source) - assert (d / "output.csv").is_file() +# return combined, timeseries -def _test_waterlevels_timeseries( - tmp_path, cfg, source, combined_flag=True, timeseries_flag=False -): - d = _setup_waterlevels(tmp_path, cfg, source) - combined = d / "output.combined.csv" - timeseries = d / "output_timeseries" - print(combined_flag) +# def _test_waterelevels_timeseries_date_range( +# tmp_path, cfg, source, timeseries_flag=True, combined_flag=False +# ): +# combined, timeseries = _test_waterlevels_timeseries( +# tmp_path, +# cfg, +# source, +# timeseries_flag=timeseries_flag, +# combined_flag=combined_flag, +# ) - print("combined", combined.is_file(), combined_flag) - assert combined.is_file() == combined_flag - print("timeseries", timeseries.is_dir(), timeseries_flag) - assert timeseries.is_dir() == timeseries_flag +# for p in timeseries.iterdir(): +# if os.path.basename(p) == "sites.csv": +# continue - return combined, timeseries +# with open(p, "r") as rfile: +# lines = rfile.readlines() +# for l in lines[1:]: +# vs = l.split(",") +# dd = vs[3] +# dd = datetime.datetime.strptime(dd, "%Y-%m-%d") +# assert dd.year >= 2020 and dd.year <= 2024 -def _test_waterelevels_timeseries_date_range( - tmp_path, cfg, source, timeseries_flag=True, combined_flag=False -): - combined, timeseries = _test_waterlevels_timeseries( - tmp_path, - cfg, - source, - timeseries_flag=timeseries_flag, - combined_flag=combined_flag, - ) +# def test_nwis_site_health_check(): +# from backend.connectors.usgs.source import NWISSiteSource - for p in timeseries.iterdir(): - if os.path.basename(p) == "sites.csv": - continue - - with open(p, "r") as rfile: - lines = rfile.readlines() - for l in lines[1:]: - vs = l.split(",") - dd = vs[3] - dd = datetime.datetime.strptime(dd, "%Y-%m-%d") - assert dd.year >= 2020 and dd.year <= 2024 - - -def test_nwis_site_health_check(): - from backend.connectors.usgs.source import NWISSiteSource - - n = NWISSiteSource() - assert n.health() - - -def test_nmbgmr_site_health_check(): - from backend.connectors.nmbgmr.source import NMBGMRSiteSource +# n = NWISSiteSource() +# assert n.health() - n = NMBGMRSiteSource() - assert n.health() +# def test_nmbgmr_site_health_check(): +# from backend.connectors.nmbgmr.source import NMBGMRSiteSource -def test_wqp_site_health_check(): - from backend.connectors.wqp.source import WQPSiteSource +# n = NMBGMRSiteSource() +# assert n.health() - n = WQPSiteSource() - assert n.health() +# def test_wqp_site_health_check(): +# from backend.connectors.wqp.source import WQPSiteSource -def test_bor_site_health_check(): - from backend.connectors.bor.source import BORSiteSource +# n = WQPSiteSource() +# assert n.health() - n = BORSiteSource() - assert n.health() +# def test_bor_site_health_check(): +# from backend.connectors.bor.source import BORSiteSource -def test_dwb_site_health_check(): - from backend.connectors.nmenv.source import DWBSiteSource +# n = BORSiteSource() +# assert n.health() - n = DWBSiteSource() - assert n.health() +# def test_dwb_site_health_check(): +# from backend.connectors.nmenv.source import DWBSiteSource -def test_isc_seven_rivers_site_health_check(): - from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource +# n = DWBSiteSource() +# assert n.health() - n = ISCSevenRiversSiteSource() - assert n.health() +# def test_isc_seven_rivers_site_health_check(): +# from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource -def test_ckan_site_health_check(): - from backend.connectors.ckan.source import OSERoswellSiteSource +# n = ISCSevenRiversSiteSource() +# assert n.health() - n = OSERoswellSiteSource(HONDO_RESOURCE_ID) - assert n.health() +# def test_ckan_site_health_check(): +# from backend.connectors.ckan.source import OSERoswellSiteSource -def test_pvacd_site_health_check(): - from backend.connectors.st2.source import PVACDSiteSource +# n = OSERoswellSiteSource(HONDO_RESOURCE_ID) +# assert n.health() - n = PVACDSiteSource() - assert n.health() +# def test_pvacd_site_health_check(): +# from backend.connectors.st2.source import PVACDSiteSource -def test_bernco_site_health_check(): - from backend.connectors.st2.source import BernCoSiteSource +# n = PVACDSiteSource() +# assert n.health() - n = BernCoSiteSource() - assert n.health() +# def test_bernco_site_health_check(): +# from backend.connectors.st2.source import BernCoSiteSource -# def test_ose_roswell_site_health_check(): -# from backend.connectors.ose_roswell.source import OSESiteSource -# n = OSESiteSource() +# n = BernCoSiteSource() # assert n.health() -# Source tests ======================================================================================================== -def test_source_bounds_nmbgmr(): - from backend.unifier import get_source_bounds - from backend.connectors import NM_STATE_BOUNDING_POLYGON - - sourcekey = "nmbgmr" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "Polygon" - assert bounds == NM_STATE_BOUNDING_POLYGON - - -def test_source_bounds_is_seven_rivers(): - from backend.unifier import get_source_bounds - from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON +# # def test_ose_roswell_site_health_check(): +# # from backend.connectors.ose_roswell.source import OSESiteSource +# # n = OSESiteSource() +# # assert n.health() + + +# # Source tests ======================================================================================================== +# def test_source_bounds_nmbgmr(): +# from backend.unifier import get_source_bounds +# from backend.connectors import NM_STATE_BOUNDING_POLYGON + +# sourcekey = "nmbgmr" +# bounds = get_source_bounds(sourcekey) +# assert bounds +# assert bounds.is_valid +# assert bounds.geom_type == "Polygon" +# assert bounds == NM_STATE_BOUNDING_POLYGON + + +# def test_source_bounds_is_seven_rivers(): +# from backend.unifier import get_source_bounds +# from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON + +# sourcekey = "iscsevenrivers" +# bounds = get_source_bounds(sourcekey) +# assert bounds +# assert bounds.is_valid +# assert bounds.geom_type == "Polygon" +# assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON - sourcekey = "iscsevenrivers" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "Polygon" - assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON +# def test_source_bounds_oser(): +# from backend.unifier import get_source_bounds +# from backend.connectors import ( +# OSE_ROSWELL_HONDO_BOUNDING_POLYGON, +# OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, +# OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, +# ) -def test_source_bounds_oser(): - from backend.unifier import get_source_bounds - from backend.connectors import ( - OSE_ROSWELL_HONDO_BOUNDING_POLYGON, - OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, - OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, - ) +# sourcekey = "oseroswell" +# bounds = get_source_bounds(sourcekey) +# assert bounds +# assert bounds.is_valid +# assert bounds.geom_type == "GeometryCollection" +# assert bounds == shapely.GeometryCollection( +# [ +# OSE_ROSWELL_HONDO_BOUNDING_POLYGON, +# OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, +# OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, +# ] +# ) - sourcekey = "oseroswell" - bounds = get_source_bounds(sourcekey) - assert bounds - assert bounds.is_valid - assert bounds.geom_type == "GeometryCollection" - assert bounds == shapely.GeometryCollection( - [ - OSE_ROSWELL_HONDO_BOUNDING_POLYGON, - OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, - OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, - ] - ) +# def test_sources_socorro(tmp_path): +# cfg = Config() +# cfg.county = "socorro" -def test_sources_socorro(tmp_path): - cfg = Config() - cfg.county = "socorro" +# from backend.unifier import get_sources - from backend.unifier import get_sources +# sources = get_sources(cfg) +# assert sources +# assert len(sources) == 2 +# assert sorted([s.__class__.__name__ for s in sources]) == sorted( +# ["NMBGMRSiteSource", "NWISSiteSource"] +# ) - sources = get_sources(cfg) - assert sources - assert len(sources) == 2 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - ["NMBGMRSiteSource", "NWISSiteSource"] - ) +# def test_sources_eddy_dtw(tmp_path): +# cfg = Config() +# cfg.county = "eddy" -def test_sources_eddy_dtw(tmp_path): - cfg = Config() - cfg.county = "eddy" +# from backend.unifier import get_sources - from backend.unifier import get_sources - - sources = get_sources(cfg) - assert sources - assert len(sources) == 5 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - [ - "ISCSevenRiversSiteSource", - "NMBGMRSiteSource", - "OSERoswellSiteSource", - "PVACDSiteSource", - "NWISSiteSource", - ] - ) +# sources = get_sources(cfg) +# assert sources +# assert len(sources) == 5 +# assert sorted([s.__class__.__name__ for s in sources]) == sorted( +# [ +# "ISCSevenRiversSiteSource", +# "NMBGMRSiteSource", +# "OSERoswellSiteSource", +# "PVACDSiteSource", +# "NWISSiteSource", +# ] +# ) -def test_sources_eddy_tds(tmp_path): - cfg = Config() - cfg.county = "eddy" - cfg.analyte = "TDS" +# def test_sources_eddy_tds(tmp_path): +# cfg = Config() +# cfg.county = "eddy" +# cfg.analyte = "TDS" - from backend.unifier import get_sources +# from backend.unifier import get_sources - sources = get_sources(cfg) - assert sources - assert len(sources) == 5 - assert sorted([s.__class__.__name__ for s in sources]) == sorted( - [ - "BORSiteSource", - "DWBSiteSource", - "ISCSevenRiversSiteSource", - "NMBGMRSiteSource", - "WQPSiteSource", - ] - ) - - -# Waterlevel Summary tests =========================================================================================== -def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): - waterlevel_summary_cfg.county = "bernalillo" - waterlevel_summary_cfg.bbox = None - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") - - -def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") - - -def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") - - -def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") - - -def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") - - -def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): - _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") - - -# Waterlevel timeseries tests ========================================================================================= -def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): - # there are one or more locations within the bounding box that have only - # one record, so there is a combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "nwis", - combined_flag=True, - timeseries_flag=True, - ) +# sources = get_sources(cfg) +# assert sources +# assert len(sources) == 5 +# assert sorted([s.__class__.__name__ for s in sources]) == sorted( +# [ +# "BORSiteSource", +# "DWBSiteSource", +# "ISCSevenRiversSiteSource", +# "NMBGMRSiteSource", +# "WQPSiteSource", +# ] +# ) + + +# # Waterlevel Summary tests =========================================================================================== +# def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): +# waterlevel_summary_cfg.county = "bernalillo" +# waterlevel_summary_cfg.bbox = None +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") + + +# def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") + + +# def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") + + +# def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") + + +# def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") + + +# def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): +# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") + + +# # Waterlevel timeseries tests ========================================================================================= +# def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): +# # there are one or more locations within the bounding box that have only +# # one record, so there is a combined file +# _test_waterlevels_timeseries( +# tmp_path, +# waterlevel_timeseries_cfg, +# "nwis", +# combined_flag=True, +# timeseries_flag=True, +# ) -def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): - _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") +# def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): +# _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") -def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): - # all locations within the bounding box have more than one record - # so there is no combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "pvacd", - combined_flag=False, - timeseries_flag=True, - ) +# def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): +# # all locations within the bounding box have more than one record +# # so there is no combined file +# _test_waterlevels_timeseries( +# tmp_path, +# waterlevel_timeseries_cfg, +# "pvacd", +# combined_flag=False, +# timeseries_flag=True, +# ) -def test_unify_waterlevels_isc_seven_rivers_timeseries( - tmp_path, waterlevel_timeseries_cfg -): - # all locations within the bounding box have more than one record - # so there is no combined file - _test_waterlevels_timeseries( - tmp_path, - waterlevel_timeseries_cfg, - "iscsevenrivers", - combined_flag=False, - timeseries_flag=True, - ) +# def test_unify_waterlevels_isc_seven_rivers_timeseries( +# tmp_path, waterlevel_timeseries_cfg +# ): +# # all locations within the bounding box have more than one record +# # so there is no combined file +# _test_waterlevels_timeseries( +# tmp_path, +# waterlevel_timeseries_cfg, +# "iscsevenrivers", +# combined_flag=False, +# timeseries_flag=True, +# ) -def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): - _test_waterlevels_timeseries( - tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True - ) +# def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): +# _test_waterlevels_timeseries( +# tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True +# ) -# Waterlevel summary date range tests ================================================================================= -def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): - d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") - assert (d / "output.csv").is_file() +# # Waterlevel summary date range tests ================================================================================= +# def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): +# d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") +# assert (d / "output.csv").is_file() -# Waterlevel timeseries date range ==================================================================================== -def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): - # there are one or more locations within the bounding box and date range - # that have only one record, so there is a combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "nwis", - timeseries_flag=True, - combined_flag=True, - ) +# # Waterlevel timeseries date range ==================================================================================== +# def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): +# # there are one or more locations within the bounding box and date range +# # that have only one record, so there is a combined file +# _test_waterelevels_timeseries_date_range( +# tmp_path, +# waterlevel_timeseries_cfg, +# "nwis", +# timeseries_flag=True, +# combined_flag=True, +# ) -def test_waterlevels_isc_seven_rivers_timeseries_date_range( - tmp_path, waterlevel_timeseries_cfg -): - # all locations within the bounding box and date rangehave more than one - # record so there is no combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "iscsevenrivers", - timeseries_flag=True, - combined_flag=False, - ) +# def test_waterlevels_isc_seven_rivers_timeseries_date_range( +# tmp_path, waterlevel_timeseries_cfg +# ): +# # all locations within the bounding box and date rangehave more than one +# # record so there is no combined file +# _test_waterelevels_timeseries_date_range( +# tmp_path, +# waterlevel_timeseries_cfg, +# "iscsevenrivers", +# timeseries_flag=True, +# combined_flag=False, +# ) -def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): - # all locations within the bounding box and date rangehave more than one - # record so there is no combined file - _test_waterelevels_timeseries_date_range( - tmp_path, - waterlevel_timeseries_cfg, - "pvacd", - timeseries_flag=True, - combined_flag=False, - ) - - -# Analyte summary tests =============================================================================================== -def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") - - -def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") - - -def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): - # BOR locations are found within Otero County - analyte_summary_cfg.county = "otero" - analyte_summary_cfg.bbox = None - _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") +# def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): +# # all locations within the bounding box and date rangehave more than one +# # record so there is no combined file +# _test_waterelevels_timeseries_date_range( +# tmp_path, +# waterlevel_timeseries_cfg, +# "pvacd", +# timeseries_flag=True, +# combined_flag=False, +# ) + + +# # Analyte summary tests =============================================================================================== +# def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): +# _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") + + +# def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): +# _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") + + +# def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): +# # BOR locations are found within Otero County +# analyte_summary_cfg.county = "otero" +# analyte_summary_cfg.bbox = None +# _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") -def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") - - -def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): - _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") +# def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): +# _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") + + +# def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): +# _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") # ============= EOF ============================================= From 2155fbd3438f9120f8129da5f5b2212dd0de8014 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 7 Apr 2025 10:19:04 -0600 Subject: [PATCH 070/143] type hint BaseTestClass class attributes --- tests/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 52c810e..f650d55 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,6 +1,7 @@ from logging import shutdown as logger_shutdown from pathlib import Path import pytest +from typing import Optional from backend.config import Config, SOURCE_KEYS from backend.constants import WATERLEVELS @@ -24,13 +25,12 @@ def recursively_clean_directory(path): class BaseTestClass: + parameter: str + units: str + agency: str - parameter = None - units = None - agency = None - - # set set_limit for tests - site_limit = 3 + # set site_limit for tests + site_limit: int = 3 @pytest.fixture(autouse=True) def setup(self): From f90a1c4cf1b5d0769d1165d193f5f9954bc2afa1 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 7 Apr 2025 10:44:13 -0600 Subject: [PATCH 071/143] mypy typing --- backend/connectors/nmose/source.py | 22 +++++++++++----------- backend/unifier.py | 2 ++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 92e6fb3..0f1ded0 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,4 +1,4 @@ -import os +from typing import List,Dict, Tuple from shapely import wkt from backend.connectors import NM_STATE_BOUNDING_POLYGON @@ -20,10 +20,10 @@ class NMOSEPODSiteSource(BaseSiteSource): """ transformer_klass = NMOSEPODSiteTransformer - chunk_size = 5000 + chunk_size: int = 5000 bounding_polygon = NM_STATE_BOUNDING_POLYGON - def get_records(self, *args, **kw) -> dict: + def get_records(self, *args, **kw) -> List[Dict]: config = self.config params = {} # if config.has_bounds(): @@ -37,25 +37,25 @@ def get_records(self, *args, **kw) -> dict: # if config.end_date: # params["endDt"] = config.end_dt.date().isoformat() - url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" + url: str = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - params["where"] = ( + params["where"]: Tuple = ( "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" ) - params["outFields"] = ( + params["outFields"]: Tuple = ( "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" ) - params["outSR"] = 4326 - params["f"] = "json" - params["resultRecordCount"] = self.chunk_size - params["resultOffset"] = 0 + params["outSR"]: int = 4326 + params["f"]: str = "json" + params["resultRecordCount"]: int = self.chunk_size + params["resultOffset"]: int = 0 if config.has_bounds(): wkt = config.bounding_wkt() params["geometry"] = wkt_to_arcgis_json(wkt) params["geometryType"] = "esriGeometryPolygon" - records = [] + records: List = [] i = 1 while 1: rs = self._execute_json_request(url, params, tag="features") diff --git a/backend/unifier.py b/backend/unifier.py index c6a8311..5628746 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -39,6 +39,8 @@ def health_check(source: BaseSiteSource) -> bool: source = get_source(source) if source: return bool(source.health()) + else: + return None def unify_analytes(config): From 91af8ca39f82115ed6dec3711530ce666dfba542 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 7 Apr 2025 16:45:40 +0000 Subject: [PATCH 072/143] Formatting changes --- backend/bounding_polygons.py | 2 +- backend/connectors/nmose/source.py | 22 ++++++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/backend/bounding_polygons.py b/backend/bounding_polygons.py index 8445077..8a9ccd6 100644 --- a/backend/bounding_polygons.py +++ b/backend/bounding_polygons.py @@ -162,7 +162,7 @@ def get_county_polygon(name, as_wkt=True): _warning(f"Invalid state. {state}") -def get_state_polygon(state: str, buffer: int | None =None): +def get_state_polygon(state: str, buffer: int | None = None): statefp = _statelookup(state) if statefp: obj = _get_cached_object( diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 0f1ded0..005d5ca 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,4 +1,4 @@ -from typing import List,Dict, Tuple +from typing import List, Dict, Tuple from shapely import wkt from backend.connectors import NM_STATE_BOUNDING_POLYGON @@ -37,14 +37,20 @@ def get_records(self, *args, **kw) -> List[Dict]: # if config.end_date: # params["endDt"] = config.end_dt.date().isoformat() - url: str = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - - params["where"]: Tuple = ( - "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" - ) - params["outFields"]: Tuple = ( - "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" + url: str = ( + "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" ) + + params[ + "where" + ]: ( + Tuple + ) = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" + params[ + "outFields" + ]: ( + Tuple + ) = "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" params["outSR"]: int = 4326 params["f"]: str = "json" params["resultRecordCount"]: int = self.chunk_size From bcfdaf7d8b67fde9fd80de7aa3452426196513e5 Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 9 Apr 2025 13:19:08 -0600 Subject: [PATCH 073/143] added additional fields to pods site export --- backend/connectors/nmose/source.py | 8 ++++++-- backend/connectors/nmose/transformer.py | 10 +++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 53fe355..4fc03e4 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -43,8 +43,12 @@ def get_records(self, *args, **kw) -> dict: url = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - params['where'] = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" - params["outFields"] = "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" + # params['where'] = "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS', + # 'G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" + #pods = 157127 + params['where'] = "pod_status = 'ACT' AND pod_basin NOT IN ('SP', 'SD', 'LWD')" + params["outFields"] = ("OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status," + "pod_name,pod_nbr,pod_suffix,pod_file,depth_well,aquifer,elevation") params["outSR"] = 4326 params["f"] = "json" params["resultRecordCount"] = self.chunk_size diff --git a/backend/connectors/nmose/transformer.py b/backend/connectors/nmose/transformer.py index 2ed99e6..7f6997f 100644 --- a/backend/connectors/nmose/transformer.py +++ b/backend/connectors/nmose/transformer.py @@ -24,12 +24,12 @@ def _transform(self, record) -> dict: # "name": record["station_nm"], "latitude": geometry["y"], "longitude": geometry["x"], - # "elevation": elevation, - # "elevation_units": "ft", + "elevation": properties['elevation'], + "elevation_units": "ft", # "horizontal_datum": datum, # "vertical_datum": record["alt_datum_cd"], - # "aquifer": record["nat_aqfr_cd"], - # "well_depth": record["well_depth_va"], - # "well_depth_units": "ft", + "aquifer": properties["aquifer"], + "well_depth": properties["depth_well"], + "well_depth_units": "ft", } return rec \ No newline at end of file From 53e29a12baf6588bcd7904bf16374192969b3c51 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 9 Apr 2025 16:07:54 -0600 Subject: [PATCH 074/143] Work on mypy type checking --- backend/connectors/nmose/source.py | 12 ++++++------ backend/source.py | 7 ++----- backend/unifier.py | 2 +- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index 0f1ded0..a78d8d6 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -39,16 +39,16 @@ def get_records(self, *args, **kw) -> List[Dict]: url: str = "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" - params["where"]: Tuple = ( + params["where"] = ( "pod_status = 'ACT' AND pod_basin IN ('A','B','C','CC','CD','CL','CP','CR','CT','E','FS','G','GSF','H', 'HA','HC','HS','HU','J','L','LA','LRG','LV','M','MR','NH','P','PL','PN','RA','RG','S','SB','SJ','SS','T','TU','UP','VV')" ) - params["outFields"]: Tuple = ( + params["outFields"] = ( "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county,pod_name,pod_nbr,pod_suffix,pod_file" ) - params["outSR"]: int = 4326 - params["f"]: str = "json" - params["resultRecordCount"]: int = self.chunk_size - params["resultOffset"]: int = 0 + params["outSR"] = 4326 + params["f"] = "json" + params["resultRecordCount"] = self.chunk_size + params["resultOffset"] = 0 if config.has_bounds(): wkt = config.bounding_wkt() diff --git a/backend/source.py b/backend/source.py index 5ca8c9b..5884861 100644 --- a/backend/source.py +++ b/backend/source.py @@ -19,7 +19,7 @@ import httpx import shapely.wkt from shapely import MultiPoint -from typing import Union, List, Callable +from typing import Union, List, Callable, Dict from backend.constants import ( MILLIGRAMS_PER_LITER, @@ -289,7 +289,7 @@ def read(self, *args, **kw) -> list: # Methods That Need to be Implemented For Each Source # ========================================================================== - def get_records(self, *args, **kw) -> dict: + def get_records(self, *args, **kw) -> List[Dict]: """ Returns records as a dictionary, where the keys are site ids and the values are site or parameter records. @@ -837,9 +837,6 @@ def _extract_site_records(self, records: dict, site_record: dict) -> list: list a list of records for the site """ - if site_record.chunk_size == 1: - return records - raise NotImplementedError( f"{self.__class__.__name__} Must implement _extract_site_records" ) diff --git a/backend/unifier.py b/backend/unifier.py index 5628746..880369f 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -22,7 +22,7 @@ from backend.source import BaseSiteSource -def health_check(source: BaseSiteSource) -> bool: +def health_check(source: BaseSiteSource) -> bool | None: """ Determines if data can be returned from the source (if it is healthy) From 82eaf5342fc5212f7b7deeefc6a9a6133364d4ad Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 9 Apr 2025 16:36:31 -0600 Subject: [PATCH 075/143] Work on mypy type checking --- backend/transformer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/transformer.py b/backend/transformer.py index b993578..701866a 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -34,6 +34,7 @@ EARLIEST, LATEST, ) +from backend.config import Config from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logger import Loggable from backend.record import ( @@ -132,7 +133,7 @@ def convert_units( source_parameter_name: str, die_parameter_name: str, dt: str | None = None, -) -> tuple[float, float, str]: +) -> tuple[float, float | None, str]: """ Converts the following units for any parameter value: @@ -331,7 +332,7 @@ class BaseTransformer(Loggable): """ _cached_polygon = None - config = None + config: Config = None check_contained = True # ========================================================================== @@ -347,6 +348,7 @@ def do_transform( | AnalyteSummaryRecord | WaterLevelSummaryRecord | SummaryRecord + | None ): """ Transforms a record, site or parameter, into a standardized format. @@ -667,7 +669,7 @@ def _get_record_klass(self): class SiteTransformer(BaseTransformer): - def _get_record_klass(self) -> SiteRecord: + def _get_record_klass(self) -> type[SiteRecord]: """ Returns the SiteRecord class to use for the transformer for all site records @@ -786,7 +788,7 @@ def _transform_latest_record(self, record, site_id): class WaterLevelTransformer(ParameterTransformer): - def _get_record_klass(self) -> WaterLevelRecord | WaterLevelSummaryRecord: + def _get_record_klass(self) -> type[WaterLevelRecord] | type[WaterLevelSummaryRecord]: """ Returns the WaterLevelRecord class to use for the transformer for water level records if config.output_summary is False, otherwise From 761e4edca248ac8ed7852bf2f23d2c51cef847ff Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 9 Apr 2025 22:37:41 +0000 Subject: [PATCH 076/143] Formatting changes --- backend/transformer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/transformer.py b/backend/transformer.py index 701866a..4d9a1c4 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -788,7 +788,9 @@ def _transform_latest_record(self, record, site_id): class WaterLevelTransformer(ParameterTransformer): - def _get_record_klass(self) -> type[WaterLevelRecord] | type[WaterLevelSummaryRecord]: + def _get_record_klass( + self, + ) -> type[WaterLevelRecord] | type[WaterLevelSummaryRecord]: """ Returns the WaterLevelRecord class to use for the transformer for water level records if config.output_summary is False, otherwise From d90479b5d886f4eb887edc6923927dc36ff90719 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 10 Apr 2025 09:05:29 -0600 Subject: [PATCH 077/143] mypy.ini configuration file --- backend/transformer.py | 4 +- mypy.ini | 3 + tests/archived/test_cli.py | 624 +++++++++++++-------------- tests/archived/test_unifier.py | 758 ++++++++++++++++----------------- 4 files changed, 696 insertions(+), 693 deletions(-) create mode 100644 mypy.ini diff --git a/backend/transformer.py b/backend/transformer.py index 4d9a1c4..0c3796d 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -34,7 +34,6 @@ EARLIEST, LATEST, ) -from backend.config import Config from backend.geo_utils import datum_transform, ALLOWED_DATUMS from backend.logger import Loggable from backend.record import ( @@ -332,7 +331,7 @@ class BaseTransformer(Loggable): """ _cached_polygon = None - config: Config = None + config = None check_contained = True # ========================================================================== @@ -396,6 +395,7 @@ def do_transform( # _transform needs to be implemented by each SiteTransformer # _transform is already implemented in each ParameterTransformer record = self._transform(inrecord, *args, **kw) + print(type(record)) if not record: return None diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..380b366 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +ignore_missing_imports = True +exclude = ^(venv|.github|.mypy_cache|.pytest_cache|nmuwd.egg-info|__pycache__|build|tests/archived) \ No newline at end of file diff --git a/tests/archived/test_cli.py b/tests/archived/test_cli.py index ba8240e..6fa2baa 100644 --- a/tests/archived/test_cli.py +++ b/tests/archived/test_cli.py @@ -13,395 +13,395 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -# import os +import os -# from click.testing import CliRunner -# from frontend.cli import analytes, waterlevels +from click.testing import CliRunner +from frontend.cli import analytes, waterlevels -# def _tester(function, args, fail=False): -# runner = CliRunner() -# print(f"invoked with {args}") -# result = runner.invoke(function, args) -# print(f"result.exit_code={result.exit_code}") -# print(f"result.output=\n{result.output}") +def _tester(function, args, fail=False): + runner = CliRunner() + print(f"invoked with {args}") + result = runner.invoke(function, args) + print(f"result.exit_code={result.exit_code}") + print(f"result.output=\n{result.output}") -# if fail: -# assert result.exit_code != 0 -# else: -# assert result.exit_code == 0 + if fail: + assert result.exit_code != 0 + else: + assert result.exit_code == 0 -# def _make_args(source): -# args = [] -# if source: -# nosources = [ -# f -# for f in ( -# "--no-amp", -# "--no-nwis", -# "--no-pvacd", -# "--no-bor", -# "--no-dwb", -# "--no-wqp", -# "--no-isc-seven-rivers", -# "--no-ckan", -# ) -# if f != f"--no-{source}" -# ] -# args += nosources +def _make_args(source): + args = [] + if source: + nosources = [ + f + for f in ( + "--no-amp", + "--no-nwis", + "--no-pvacd", + "--no-bor", + "--no-dwb", + "--no-wqp", + "--no-isc-seven-rivers", + "--no-ckan", + ) + if f != f"--no-{source}" + ] + args += nosources -# args += ["--site-limit", 10, "--dry"] + args += ["--site-limit", 10, "--dry"] -# return args + return args -# def _make_tds_args(source): -# return ["TDS"] + _make_args(source) +def _make_tds_args(source): + return ["TDS"] + _make_args(source) -# def _make_wl_args(source=None): -# return _make_args(source) +def _make_wl_args(source=None): + return _make_args(source) -# def test_waterlevels_nwis(): -# args = _make_wl_args("nwis") -# _tester(waterlevels, args) +def test_waterlevels_nwis(): + args = _make_wl_args("nwis") + _tester(waterlevels, args) -# def test_waterlevels_pvacd(): -# args = _make_wl_args("pvacd") -# _tester(waterlevels, args) +def test_waterlevels_pvacd(): + args = _make_wl_args("pvacd") + _tester(waterlevels, args) -# def test_waterlevels_nmbgmr(): -# args = _make_wl_args("nmbgmr") -# _tester(waterlevels, args) +def test_waterlevels_nmbgmr(): + args = _make_wl_args("nmbgmr") + _tester(waterlevels, args) -# def test_waterlevels_isc_seven_rivers(): -# args = _make_wl_args("iscsevenrivers") -# _tester(waterlevels, args) +def test_waterlevels_isc_seven_rivers(): + args = _make_wl_args("iscsevenrivers") + _tester(waterlevels, args) -# def test_waterlevels_invalid_source(): -# args = _make_wl_args() -# args.append("--no-foo") -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_source(): + args = _make_wl_args() + args.append("--no-foo") + _tester(waterlevels, args, fail=True) -# def test_waterlevels_invalid_bbox(): -# args = _make_wl_args() -# args.append("--bbox") -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_bbox(): + args = _make_wl_args() + args.append("--bbox") + _tester(waterlevels, args, fail=True) -# def test_waterlevels_invalid_bbox_format(): -# args = _make_wl_args() -# args.extend(["--bbox", "1 2 3"]) -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_bbox_format(): + args = _make_wl_args() + args.extend(["--bbox", "1 2 3"]) + _tester(waterlevels, args, fail=True) -# def test_waterlevels_valid_bbox_format(): -# args = _make_wl_args() -# args.extend(["--bbox", "1 2,3 4"]) -# _tester(waterlevels, args) +def test_waterlevels_valid_bbox_format(): + args = _make_wl_args() + args.extend(["--bbox", "1 2,3 4"]) + _tester(waterlevels, args) -# def test_waterlevels_invalid_county(): -# args = _make_wl_args() -# args.append("--county") -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_county(): + args = _make_wl_args() + args.append("--county") + _tester(waterlevels, args, fail=True) -# def test_waterlevels_invalid_county_name(): -# args = _make_wl_args() -# args.extend(["--county", "foo"]) -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_county_name(): + args = _make_wl_args() + args.extend(["--county", "foo"]) + _tester(waterlevels, args, fail=True) -# # Analyte Tests ======================================================= -# def test_analytes_wqp(): -# args = _make_tds_args("wqp") -# _tester(analytes, args) +# Analyte Tests ======================================================= +def test_analytes_wqp(): + args = _make_tds_args("wqp") + _tester(analytes, args) -# def test_analytes_bor(): -# args = _make_tds_args("bor") -# _tester(analytes, args) +def test_analytes_bor(): + args = _make_tds_args("bor") + _tester(analytes, args) -# def test_analytes_amp(): -# args = _make_tds_args("amp") -# _tester(analytes, args) +def test_analytes_amp(): + args = _make_tds_args("amp") + _tester(analytes, args) -# def test_analytes_dwb(): -# args = _make_tds_args("dwb") -# _tester(analytes, args) +def test_analytes_dwb(): + args = _make_tds_args("dwb") + _tester(analytes, args) -# def test_analytes_isc_seven_rivers(): -# args = _make_tds_args("isc-seven-rivers") -# _tester(analytes, args) +def test_analytes_isc_seven_rivers(): + args = _make_tds_args("isc-seven-rivers") + _tester(analytes, args) -# def test_analytes_invalid_analyte(): -# args = _make_args("wqp") -# args[0] = "Foo" -# _tester(analytes, args, fail=True) +def test_analytes_invalid_analyte(): + args = _make_args("wqp") + args[0] = "Foo" + _tester(analytes, args, fail=True) -# def test_analytes_invalid_source(): -# args = _make_tds_args("wqp") -# args.append("--no-foo") -# _tester(analytes, args, fail=True) +def test_analytes_invalid_source(): + args = _make_tds_args("wqp") + args.append("--no-foo") + _tester(analytes, args, fail=True) -# def test_analytes_invalid_bbox(): -# args = _make_tds_args("wqp") -# args.append("--bbox") -# _tester(analytes, args, fail=True) +def test_analytes_invalid_bbox(): + args = _make_tds_args("wqp") + args.append("--bbox") + _tester(analytes, args, fail=True) -# def test_analytes_invalid_bbox_format(): -# args = _make_tds_args("wqp") -# args.extend(["--bbox", "1 2 3"]) -# _tester(analytes, args, fail=True) +def test_analytes_invalid_bbox_format(): + args = _make_tds_args("wqp") + args.extend(["--bbox", "1 2 3"]) + _tester(analytes, args, fail=True) -# def test_analytes_valid_bbox_format(): -# args = _make_tds_args("wqp") -# args.extend(["--bbox", "1 2,3 4"]) -# _tester(analytes, args) +def test_analytes_valid_bbox_format(): + args = _make_tds_args("wqp") + args.extend(["--bbox", "1 2,3 4"]) + _tester(analytes, args) -# def test_analytes_invalid_county(): -# args = _make_tds_args("wqp") -# args.append("--county") -# _tester(analytes, args, fail=True) +def test_analytes_invalid_county(): + args = _make_tds_args("wqp") + args.append("--county") + _tester(analytes, args, fail=True) -# def test_analytes_invalid_county_name(): -# args = _make_tds_args("wqp") -# args.extend(["--county", "foo"]) -# _tester(analytes, args, fail=True) +def test_analytes_invalid_county_name(): + args = _make_tds_args("wqp") + args.extend(["--county", "foo"]) + _tester(analytes, args, fail=True) -# def test_waterlevels_date_range_YMD(): -# args = _make_wl_args() -# args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) -# _tester(waterlevels, args) +def test_waterlevels_date_range_YMD(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01-01", "--end-date", "2020-05-01"]) + _tester(waterlevels, args) -# def test_waterlevels_date_range_YM(): -# args = _make_wl_args() -# args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) -# _tester(waterlevels, args) +def test_waterlevels_date_range_YM(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01", "--end-date", "2020-05"]) + _tester(waterlevels, args) -# def test_waterlevels_date_range_Y(): -# args = _make_wl_args() -# args.extend(["--start-date", "2020", "--end-date", "2021"]) -# _tester(waterlevels, args) +def test_waterlevels_date_range_Y(): + args = _make_wl_args() + args.extend(["--start-date", "2020", "--end-date", "2021"]) + _tester(waterlevels, args) -# def test_waterlevels_invalid_start(): -# args = _make_wl_args() -# args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_start(): + args = _make_wl_args() + args.extend(["--start-date", "x-01-01", "--end-date", "2019-05-01"]) + _tester(waterlevels, args, fail=True) -# def test_waterlevels_invalid_end(): -# args = _make_wl_args() -# args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) -# _tester(waterlevels, args, fail=True) +def test_waterlevels_invalid_end(): + args = _make_wl_args() + args.extend(["--start-date", "2020-01-01", "--end-date", "x-05-01"]) + _tester(waterlevels, args, fail=True) -# -# def _tester(source, func, county, bbox, args=None): -# runner = CliRunner() -# -# nosources = [ -# f -# for f in ( -# "--no-amp", -# "--no-nwis", -# "--no-st2", -# "--no-bor", -# "--no-dwb", -# "--no-wqp", -# "--no-isc-seven-rivers", -# "--no-ckan", -# ) -# if f != f"--no-{source}" -# ] -# -# dargs = nosources + ["--site-limit", 10] -# -# if args: -# args += dargs -# else: -# args = dargs -# -# if county: -# args.extend(("--county", county)) -# elif bbox: -# args.extend(("--bbox", bbox)) -# -# print(" ".join([str(f) for f in args])) -# result = runner.invoke(func, args) -# -# return result +def _tester(source, func, county, bbox, args=None): + runner = CliRunner() + + nosources = [ + f + for f in ( + "--no-amp", + "--no-nwis", + "--no-st2", + "--no-bor", + "--no-dwb", + "--no-wqp", + "--no-isc-seven-rivers", + "--no-ckan", + ) + if f != f"--no-{source}" + ] + + dargs = nosources + ["--site-limit", 10] + + if args: + args += dargs + else: + args = dargs + + if county: + args.extend(("--county", county)) + elif bbox: + args.extend(("--bbox", bbox)) + + print(" ".join([str(f) for f in args])) + result = runner.invoke(func, args) + + return result + + +def _summary_tester(source, func, county=None, bbox=None, args=None): + if not (county or bbox): + county = "eddy" + + runner = CliRunner() + # with runner.isolated_filesystem(): + # result = _tester(source, func, county, bbox, args) + # assert result.exit_code == 0 + # assert os.path.isfile("output.csv") + + +def _timeseries_tester( + source, + func, + combined_flag=True, + timeseries_flag=True, + county=None, + bbox=None, + args=None, +): + if args is None: + args = [] + # runner = CliRunner() + # with runner.isolated_filesystem(): + # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) + # assert result.exit_code == 0 + # print("combined", os.path.isfile("output.combined.csv"), combined_flag) + # assert os.path.isfile("output.combined.csv") == combined_flag + # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) + # assert os.path.isdir("output_timeseries") == timeseries_flag + + +# ====== Analyte Tests ======================================================= +def _analyte_summary_tester(key): + _summary_tester(key, analytes, args=["TDS"]) + + +def _analyte_county_tester(source, **kw): + _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) + + +def test_unify_analytes_amp(): + _analyte_county_tester("amp", timeseries_flag=False) + + +def test_unify_analytes_wqp(): + _analyte_county_tester("wqp") + + +def test_unify_analytes_bor(): + _analyte_county_tester("bor", combined_flag=False) + + +def test_unify_analytes_isc_seven_rivers(): + _analyte_county_tester("isc-seven-rivers") + + +def test_unify_analytes_dwb(): + _analyte_county_tester("dwb", timeseries_flag=False) + + +def test_unify_analytes_wqp_summary(): + _analyte_summary_tester("wqp") + + +def test_unify_analytes_bor_summary(): + _analyte_summary_tester("bor") -# def _summary_tester(source, func, county=None, bbox=None, args=None): -# if not (county or bbox): -# county = "eddy" -# -# runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args) -# # assert result.exit_code == 0 -# # assert os.path.isfile("output.csv") -# -# -# def _timeseries_tester( -# source, -# func, -# combined_flag=True, -# timeseries_flag=True, -# county=None, -# bbox=None, -# args=None, -# ): -# if args is None: -# args = [] -# # runner = CliRunner() -# # with runner.isolated_filesystem(): -# # result = _tester(source, func, county, bbox, args=args + ["--timeseries"]) -# # assert result.exit_code == 0 -# # print("combined", os.path.isfile("output.combined.csv"), combined_flag) -# # assert os.path.isfile("output.combined.csv") == combined_flag -# # print("timeseries", os.path.isdir("output_timeseries"), timeseries_flag) -# # assert os.path.isdir("output_timeseries") == timeseries_flag -# -# -# # ====== Analyte Tests ======================================================= -# def _analyte_summary_tester(key): -# _summary_tester(key, analytes, args=["TDS"]) -# -# -# def _analyte_county_tester(source, **kw): -# _timeseries_tester(source, analytes, args=["TDS"], county="eddy", **kw) -# -# -# def test_unify_analytes_amp(): -# _analyte_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp(): -# _analyte_county_tester("wqp") -# -# -# def test_unify_analytes_bor(): -# _analyte_county_tester("bor", combined_flag=False) -# -# -# def test_unify_analytes_isc_seven_rivers(): -# _analyte_county_tester("isc-seven-rivers") -# -# -# def test_unify_analytes_dwb(): -# _analyte_county_tester("dwb", timeseries_flag=False) -# -# -# def test_unify_analytes_wqp_summary(): -# _analyte_summary_tester("wqp") -# -# -# def test_unify_analytes_bor_summary(): -# _analyte_summary_tester("bor") -# -# -# def test_unify_analytes_amp_summary(): -# _analyte_summary_tester("amp") -# -# -# def test_unify_analytes_dwb_summary(): -# _analyte_summary_tester("dwb") -# -# -# def test_unify_analytes_isc_seven_rivers_summary(): -# _analyte_summary_tester("isc-seven-rivers") + +def test_unify_analytes_amp_summary(): + _analyte_summary_tester("amp") + + +def test_unify_analytes_dwb_summary(): + _analyte_summary_tester("dwb") + + +def test_unify_analytes_isc_seven_rivers_summary(): + _analyte_summary_tester("isc-seven-rivers") # ====== End Analyte Tests ======================================================= # ====== Water Level Tests ======================================================= -# def _waterlevel_county_tester(source, **kw): -# _timeseries_tester(source, waterlevels, county="eddy", **kw) -# -# -# def _waterlevel_bbox_tester(source, **kw): -# _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) +def _waterlevel_county_tester(source, **kw): + _timeseries_tester(source, waterlevels, county="eddy", **kw) -# -# def test_unify_waterlevels_nwis(): -# _waterlevel_county_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp(): -# _waterlevel_county_tester("amp", timeseries_flag=False) -# -# -# def test_unify_waterlevels_st2(): -# _waterlevel_county_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers(): -# _waterlevel_county_tester("isc-seven-rivers") -# -# -# def test_unify_waterlevels_ckan(): -# _waterlevel_county_tester("ckan") -# -# -# def test_unify_waterlevels_nwis_summary(): -# _summary_tester("nwis", waterlevels) -# -# -# def test_unify_waterlevels_amp_summary(): -# _summary_tester("amp", waterlevels) -# -# -# def test_unify_waterlevels_st2_summary(): -# _summary_tester("st2", waterlevels) -# -# -# def test_unify_waterlevels_isc_seven_rivers_summary(): -# _summary_tester("isc-seven-rivers", waterlevels) -# -# -# def test_unify_waterlevels_nwis_bbox(): -# _waterlevel_bbox_tester("nwis", timeseries_flag=False) -# -# -# def test_unify_waterlevels_amp_bbox(): -# _waterlevel_bbox_tester("amp") -# -# -# def test_unify_waterlevels_st2_bbox(): -# _waterlevel_bbox_tester("st2", combined_flag=False) -# -# -# def test_unify_waterlevels_isc_seven_rivers_bbox(): -# _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) -# -# -# def test_unify_waterlevels_ckan_bbox(): -# _waterlevel_bbox_tester("ckan") + +def _waterlevel_bbox_tester(source, **kw): + _timeseries_tester(source, waterlevels, bbox="-104.5 32.5,-104 33", **kw) + + +def test_unify_waterlevels_nwis(): + _waterlevel_county_tester("nwis", timeseries_flag=False) + + +def test_unify_waterlevels_amp(): + _waterlevel_county_tester("amp", timeseries_flag=False) + + +def test_unify_waterlevels_st2(): + _waterlevel_county_tester("st2", combined_flag=False) + + +def test_unify_waterlevels_isc_seven_rivers(): + _waterlevel_county_tester("isc-seven-rivers") + + +def test_unify_waterlevels_ckan(): + _waterlevel_county_tester("ckan") + + +def test_unify_waterlevels_nwis_summary(): + _summary_tester("nwis", waterlevels) + + +def test_unify_waterlevels_amp_summary(): + _summary_tester("amp", waterlevels) + + +def test_unify_waterlevels_st2_summary(): + _summary_tester("st2", waterlevels) + + +def test_unify_waterlevels_isc_seven_rivers_summary(): + _summary_tester("isc-seven-rivers", waterlevels) + + +def test_unify_waterlevels_nwis_bbox(): + _waterlevel_bbox_tester("nwis", timeseries_flag=False) + + +def test_unify_waterlevels_amp_bbox(): + _waterlevel_bbox_tester("amp") + + +def test_unify_waterlevels_st2_bbox(): + _waterlevel_bbox_tester("st2", combined_flag=False) + + +def test_unify_waterlevels_isc_seven_rivers_bbox(): + _waterlevel_bbox_tester("isc-seven-rivers", combined_flag=False) + + +def test_unify_waterlevels_ckan_bbox(): + _waterlevel_bbox_tester("ckan") # ====== End Water Level Tests ======================================================= diff --git a/tests/archived/test_unifier.py b/tests/archived/test_unifier.py index 319c762..3947ef6 100644 --- a/tests/archived/test_unifier.py +++ b/tests/archived/test_unifier.py @@ -13,461 +13,461 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -# import datetime -# import os - -# import pytest -# import shapely.wkt - -# from backend.config import Config -# from backend.connectors.ckan import HONDO_RESOURCE_ID -# from backend.unifier import unify_analytes, unify_waterlevels - - -# def config_factory(): -# cfg = Config() -# cfg.county = "eddy" -# cfg.bbox = "-104.5 32.5,-104 33" -# cfg.start_date = "2020-01-01" -# cfg.end_date = "2024-5-01" -# cfg.output_summary = False - -# cfg.use_source_nmbgmr = False -# cfg.use_source_wqp = False -# cfg.use_source_iscsevenrivers = False -# cfg.use_source_nwis = False -# cfg.use_source_oseroswell = False -# cfg.use_source_pvacd = False -# cfg.use_source_bor = False -# cfg.use_source_dwb = False -# cfg.use_source_bernco = False - -# cfg.site_limit = 10 -# return cfg - - -# @pytest.fixture -# def waterlevel_summary_cfg(): -# cfg = config_factory() -# cfg.output_summary = True -# return cfg - - -# @pytest.fixture -# def waterlevel_timeseries_cfg(): -# cfg = config_factory() -# cfg.output_summary = False -# return cfg - - -# @pytest.fixture -# def analyte_summary_cfg(): -# cfg = config_factory() -# cfg.output_summary = True -# cfg.analyte = "TDS" -# return cfg - - -# # def test_unify_analytes(cfg): -# # unify_analytes(cfg) - - -# def _setup(tmp_path, cfg, source, tag): -# d = tmp_path / tag -# d.mkdir() -# cfg.output_dir = str(d) -# for stag in ( -# "nmbgmr", -# "nwis", -# "pvacd", -# "bor", -# "dwb", -# "wqp", -# "iscsevenrivers", -# "oseroswell", -# "bernco", -# ): -# if stag == source: -# setattr(cfg, f"use_source_{stag}", True) -# return d - - -# def _setup_waterlevels(tmp_path, cfg, source): -# d = _setup(tmp_path, cfg, source, "waterlevels") -# unify_waterlevels(cfg) -# return d - - -# def _setup_analytes(tmp_path, cfg, source): -# d = _setup(tmp_path, cfg, source, "analyte") +import datetime +import os + +import pytest +import shapely.wkt + +from backend.config import Config +from backend.connectors.ckan import HONDO_RESOURCE_ID +from backend.unifier import unify_analytes, unify_waterlevels + + +def config_factory(): + cfg = Config() + cfg.county = "eddy" + cfg.bbox = "-104.5 32.5,-104 33" + cfg.start_date = "2020-01-01" + cfg.end_date = "2024-5-01" + cfg.output_summary = False + + cfg.use_source_nmbgmr = False + cfg.use_source_wqp = False + cfg.use_source_iscsevenrivers = False + cfg.use_source_nwis = False + cfg.use_source_oseroswell = False + cfg.use_source_pvacd = False + cfg.use_source_bor = False + cfg.use_source_dwb = False + cfg.use_source_bernco = False + + cfg.site_limit = 10 + return cfg + + +@pytest.fixture +def waterlevel_summary_cfg(): + cfg = config_factory() + cfg.output_summary = True + return cfg + + +@pytest.fixture +def waterlevel_timeseries_cfg(): + cfg = config_factory() + cfg.output_summary = False + return cfg + + +@pytest.fixture +def analyte_summary_cfg(): + cfg = config_factory() + cfg.output_summary = True + cfg.analyte = "TDS" + return cfg + + +# def test_unify_analytes(cfg): # unify_analytes(cfg) -# return d -# def _test_analytes_summary(tmp_path, cfg, source): -# d = _setup_analytes(tmp_path, cfg, source) -# assert (d / "output.csv").is_file() +def _setup(tmp_path, cfg, source, tag): + d = tmp_path / tag + d.mkdir() + cfg.output_dir = str(d) + for stag in ( + "nmbgmr", + "nwis", + "pvacd", + "bor", + "dwb", + "wqp", + "iscsevenrivers", + "oseroswell", + "bernco", + ): + if stag == source: + setattr(cfg, f"use_source_{stag}", True) + return d -# def _test_waterlevels_summary(tmp_path, cfg, source): -# d = _setup_waterlevels(tmp_path, cfg, source) -# assert (d / "output.csv").is_file() +def _setup_waterlevels(tmp_path, cfg, source): + d = _setup(tmp_path, cfg, source, "waterlevels") + unify_waterlevels(cfg) + return d -# def _test_waterlevels_timeseries( -# tmp_path, cfg, source, combined_flag=True, timeseries_flag=False -# ): -# d = _setup_waterlevels(tmp_path, cfg, source) -# combined = d / "output.combined.csv" -# timeseries = d / "output_timeseries" -# print(combined_flag) +def _setup_analytes(tmp_path, cfg, source): + d = _setup(tmp_path, cfg, source, "analyte") + unify_analytes(cfg) + return d -# print("combined", combined.is_file(), combined_flag) -# assert combined.is_file() == combined_flag -# print("timeseries", timeseries.is_dir(), timeseries_flag) -# assert timeseries.is_dir() == timeseries_flag -# return combined, timeseries +def _test_analytes_summary(tmp_path, cfg, source): + d = _setup_analytes(tmp_path, cfg, source) + assert (d / "output.csv").is_file() -# def _test_waterelevels_timeseries_date_range( -# tmp_path, cfg, source, timeseries_flag=True, combined_flag=False -# ): -# combined, timeseries = _test_waterlevels_timeseries( -# tmp_path, -# cfg, -# source, -# timeseries_flag=timeseries_flag, -# combined_flag=combined_flag, -# ) +def _test_waterlevels_summary(tmp_path, cfg, source): + d = _setup_waterlevels(tmp_path, cfg, source) + assert (d / "output.csv").is_file() -# for p in timeseries.iterdir(): -# if os.path.basename(p) == "sites.csv": -# continue -# with open(p, "r") as rfile: -# lines = rfile.readlines() -# for l in lines[1:]: -# vs = l.split(",") -# dd = vs[3] -# dd = datetime.datetime.strptime(dd, "%Y-%m-%d") -# assert dd.year >= 2020 and dd.year <= 2024 +def _test_waterlevels_timeseries( + tmp_path, cfg, source, combined_flag=True, timeseries_flag=False +): + d = _setup_waterlevels(tmp_path, cfg, source) + combined = d / "output.combined.csv" + timeseries = d / "output_timeseries" + print(combined_flag) + print("combined", combined.is_file(), combined_flag) + assert combined.is_file() == combined_flag + print("timeseries", timeseries.is_dir(), timeseries_flag) + assert timeseries.is_dir() == timeseries_flag -# def test_nwis_site_health_check(): -# from backend.connectors.usgs.source import NWISSiteSource + return combined, timeseries -# n = NWISSiteSource() -# assert n.health() +def _test_waterelevels_timeseries_date_range( + tmp_path, cfg, source, timeseries_flag=True, combined_flag=False +): + combined, timeseries = _test_waterlevels_timeseries( + tmp_path, + cfg, + source, + timeseries_flag=timeseries_flag, + combined_flag=combined_flag, + ) -# def test_nmbgmr_site_health_check(): -# from backend.connectors.nmbgmr.source import NMBGMRSiteSource + for p in timeseries.iterdir(): + if os.path.basename(p) == "sites.csv": + continue -# n = NMBGMRSiteSource() -# assert n.health() + with open(p, "r") as rfile: + lines = rfile.readlines() + for l in lines[1:]: + vs = l.split(",") + dd = vs[3] + dd = datetime.datetime.strptime(dd, "%Y-%m-%d") + assert dd.year >= 2020 and dd.year <= 2024 -# def test_wqp_site_health_check(): -# from backend.connectors.wqp.source import WQPSiteSource +def test_nwis_site_health_check(): + from backend.connectors.usgs.source import NWISSiteSource -# n = WQPSiteSource() -# assert n.health() + n = NWISSiteSource() + assert n.health() -# def test_bor_site_health_check(): -# from backend.connectors.bor.source import BORSiteSource +def test_nmbgmr_site_health_check(): + from backend.connectors.nmbgmr.source import NMBGMRSiteSource -# n = BORSiteSource() -# assert n.health() + n = NMBGMRSiteSource() + assert n.health() -# def test_dwb_site_health_check(): -# from backend.connectors.nmenv.source import DWBSiteSource +def test_wqp_site_health_check(): + from backend.connectors.wqp.source import WQPSiteSource -# n = DWBSiteSource() -# assert n.health() + n = WQPSiteSource() + assert n.health() -# def test_isc_seven_rivers_site_health_check(): -# from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource +def test_bor_site_health_check(): + from backend.connectors.bor.source import BORSiteSource -# n = ISCSevenRiversSiteSource() -# assert n.health() + n = BORSiteSource() + assert n.health() -# def test_ckan_site_health_check(): -# from backend.connectors.ckan.source import OSERoswellSiteSource +def test_dwb_site_health_check(): + from backend.connectors.nmenv.source import DWBSiteSource -# n = OSERoswellSiteSource(HONDO_RESOURCE_ID) -# assert n.health() + n = DWBSiteSource() + assert n.health() -# def test_pvacd_site_health_check(): -# from backend.connectors.st2.source import PVACDSiteSource +def test_isc_seven_rivers_site_health_check(): + from backend.connectors.isc_seven_rivers.source import ISCSevenRiversSiteSource -# n = PVACDSiteSource() -# assert n.health() + n = ISCSevenRiversSiteSource() + assert n.health() -# def test_bernco_site_health_check(): -# from backend.connectors.st2.source import BernCoSiteSource +def test_ckan_site_health_check(): + from backend.connectors.ckan.source import OSERoswellSiteSource + + n = OSERoswellSiteSource(HONDO_RESOURCE_ID) + assert n.health() -# n = BernCoSiteSource() -# assert n.health() +def test_pvacd_site_health_check(): + from backend.connectors.st2.source import PVACDSiteSource + + n = PVACDSiteSource() + assert n.health() + + +def test_bernco_site_health_check(): + from backend.connectors.st2.source import BernCoSiteSource + + n = BernCoSiteSource() + assert n.health() + + +# def test_ose_roswell_site_health_check(): +# from backend.connectors.ose_roswell.source import OSESiteSource +# n = OSESiteSource() +# assert n.health() -# # def test_ose_roswell_site_health_check(): -# # from backend.connectors.ose_roswell.source import OSESiteSource -# # n = OSESiteSource() -# # assert n.health() - - -# # Source tests ======================================================================================================== -# def test_source_bounds_nmbgmr(): -# from backend.unifier import get_source_bounds -# from backend.connectors import NM_STATE_BOUNDING_POLYGON - -# sourcekey = "nmbgmr" -# bounds = get_source_bounds(sourcekey) -# assert bounds -# assert bounds.is_valid -# assert bounds.geom_type == "Polygon" -# assert bounds == NM_STATE_BOUNDING_POLYGON - - -# def test_source_bounds_is_seven_rivers(): -# from backend.unifier import get_source_bounds -# from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON -# sourcekey = "iscsevenrivers" -# bounds = get_source_bounds(sourcekey) -# assert bounds -# assert bounds.is_valid -# assert bounds.geom_type == "Polygon" -# assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON +# Source tests ======================================================================================================== +def test_source_bounds_nmbgmr(): + from backend.unifier import get_source_bounds + from backend.connectors import NM_STATE_BOUNDING_POLYGON + + sourcekey = "nmbgmr" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "Polygon" + assert bounds == NM_STATE_BOUNDING_POLYGON + + +def test_source_bounds_is_seven_rivers(): + from backend.unifier import get_source_bounds + from backend.connectors import ISC_SEVEN_RIVERS_BOUNDING_POLYGON + sourcekey = "iscsevenrivers" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "Polygon" + assert bounds == ISC_SEVEN_RIVERS_BOUNDING_POLYGON -# def test_source_bounds_oser(): -# from backend.unifier import get_source_bounds -# from backend.connectors import ( -# OSE_ROSWELL_HONDO_BOUNDING_POLYGON, -# OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, -# OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, -# ) -# sourcekey = "oseroswell" -# bounds = get_source_bounds(sourcekey) -# assert bounds -# assert bounds.is_valid -# assert bounds.geom_type == "GeometryCollection" -# assert bounds == shapely.GeometryCollection( -# [ -# OSE_ROSWELL_HONDO_BOUNDING_POLYGON, -# OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, -# OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, -# ] -# ) +def test_source_bounds_oser(): + from backend.unifier import get_source_bounds + from backend.connectors import ( + OSE_ROSWELL_HONDO_BOUNDING_POLYGON, + OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, + OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, + ) + sourcekey = "oseroswell" + bounds = get_source_bounds(sourcekey) + assert bounds + assert bounds.is_valid + assert bounds.geom_type == "GeometryCollection" + assert bounds == shapely.GeometryCollection( + [ + OSE_ROSWELL_HONDO_BOUNDING_POLYGON, + OSE_ROSWELL_FORT_SUMNER_BOUNDING_POLYGON, + OSE_ROSWELL_ROSWELL_BOUNDING_POLYGON, + ] + ) -# def test_sources_socorro(tmp_path): -# cfg = Config() -# cfg.county = "socorro" -# from backend.unifier import get_sources +def test_sources_socorro(tmp_path): + cfg = Config() + cfg.county = "socorro" -# sources = get_sources(cfg) -# assert sources -# assert len(sources) == 2 -# assert sorted([s.__class__.__name__ for s in sources]) == sorted( -# ["NMBGMRSiteSource", "NWISSiteSource"] -# ) + from backend.unifier import get_sources + sources = get_sources(cfg) + assert sources + assert len(sources) == 2 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + ["NMBGMRSiteSource", "NWISSiteSource"] + ) -# def test_sources_eddy_dtw(tmp_path): -# cfg = Config() -# cfg.county = "eddy" -# from backend.unifier import get_sources +def test_sources_eddy_dtw(tmp_path): + cfg = Config() + cfg.county = "eddy" -# sources = get_sources(cfg) -# assert sources -# assert len(sources) == 5 -# assert sorted([s.__class__.__name__ for s in sources]) == sorted( -# [ -# "ISCSevenRiversSiteSource", -# "NMBGMRSiteSource", -# "OSERoswellSiteSource", -# "PVACDSiteSource", -# "NWISSiteSource", -# ] -# ) + from backend.unifier import get_sources + + sources = get_sources(cfg) + assert sources + assert len(sources) == 5 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + [ + "ISCSevenRiversSiteSource", + "NMBGMRSiteSource", + "OSERoswellSiteSource", + "PVACDSiteSource", + "NWISSiteSource", + ] + ) -# def test_sources_eddy_tds(tmp_path): -# cfg = Config() -# cfg.county = "eddy" -# cfg.analyte = "TDS" +def test_sources_eddy_tds(tmp_path): + cfg = Config() + cfg.county = "eddy" + cfg.analyte = "TDS" -# from backend.unifier import get_sources + from backend.unifier import get_sources -# sources = get_sources(cfg) -# assert sources -# assert len(sources) == 5 -# assert sorted([s.__class__.__name__ for s in sources]) == sorted( -# [ -# "BORSiteSource", -# "DWBSiteSource", -# "ISCSevenRiversSiteSource", -# "NMBGMRSiteSource", -# "WQPSiteSource", -# ] -# ) - - -# # Waterlevel Summary tests =========================================================================================== -# def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): -# waterlevel_summary_cfg.county = "bernalillo" -# waterlevel_summary_cfg.bbox = None -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") - - -# def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") - - -# def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") - - -# def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") - - -# def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") - - -# def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): -# _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") - - -# # Waterlevel timeseries tests ========================================================================================= -# def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): -# # there are one or more locations within the bounding box that have only -# # one record, so there is a combined file -# _test_waterlevels_timeseries( -# tmp_path, -# waterlevel_timeseries_cfg, -# "nwis", -# combined_flag=True, -# timeseries_flag=True, -# ) + sources = get_sources(cfg) + assert sources + assert len(sources) == 5 + assert sorted([s.__class__.__name__ for s in sources]) == sorted( + [ + "BORSiteSource", + "DWBSiteSource", + "ISCSevenRiversSiteSource", + "NMBGMRSiteSource", + "WQPSiteSource", + ] + ) + + +# Waterlevel Summary tests =========================================================================================== +def test_unify_waterlevels_bernco_summary(tmp_path, waterlevel_summary_cfg): + waterlevel_summary_cfg.county = "bernalillo" + waterlevel_summary_cfg.bbox = None + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "bernco") + + +def test_unify_waterlevels_nwis_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nwis") + + +def test_unify_waterlevels_amp_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "nmbgmr") + + +def test_unify_waterlevels_pvacd_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "pvacd") + + +def test_unify_waterlevels_isc_seven_rivers_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "iscsevenrivers") + + +def test_unify_waterlevels_ose_roswell_summary(tmp_path, waterlevel_summary_cfg): + _test_waterlevels_summary(tmp_path, waterlevel_summary_cfg, "oseroswell") + + +# Waterlevel timeseries tests ========================================================================================= +def test_unify_waterlevels_nwis_timeseries(tmp_path, waterlevel_timeseries_cfg): + # there are one or more locations within the bounding box that have only + # one record, so there is a combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "nwis", + combined_flag=True, + timeseries_flag=True, + ) -# def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): -# _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") +def test_unify_waterlevels_amp_timeseries(tmp_path, waterlevel_timeseries_cfg): + _test_waterlevels_timeseries(tmp_path, waterlevel_timeseries_cfg, "nmbgmr") -# def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): -# # all locations within the bounding box have more than one record -# # so there is no combined file -# _test_waterlevels_timeseries( -# tmp_path, -# waterlevel_timeseries_cfg, -# "pvacd", -# combined_flag=False, -# timeseries_flag=True, -# ) +def test_unify_waterlevels_pvacd_timeseries(tmp_path, waterlevel_timeseries_cfg): + # all locations within the bounding box have more than one record + # so there is no combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "pvacd", + combined_flag=False, + timeseries_flag=True, + ) -# def test_unify_waterlevels_isc_seven_rivers_timeseries( -# tmp_path, waterlevel_timeseries_cfg -# ): -# # all locations within the bounding box have more than one record -# # so there is no combined file -# _test_waterlevels_timeseries( -# tmp_path, -# waterlevel_timeseries_cfg, -# "iscsevenrivers", -# combined_flag=False, -# timeseries_flag=True, -# ) +def test_unify_waterlevels_isc_seven_rivers_timeseries( + tmp_path, waterlevel_timeseries_cfg +): + # all locations within the bounding box have more than one record + # so there is no combined file + _test_waterlevels_timeseries( + tmp_path, + waterlevel_timeseries_cfg, + "iscsevenrivers", + combined_flag=False, + timeseries_flag=True, + ) -# def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): -# _test_waterlevels_timeseries( -# tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True -# ) +def test_unify_waterlevels_ose_roswell_timeseries(tmp_path, waterlevel_timeseries_cfg): + _test_waterlevels_timeseries( + tmp_path, waterlevel_timeseries_cfg, "oseroswell", timeseries_flag=True + ) -# # Waterlevel summary date range tests ================================================================================= -# def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): -# d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") -# assert (d / "output.csv").is_file() +# Waterlevel summary date range tests ================================================================================= +def test_waterlevels_nwis_summary_date_range(tmp_path, waterlevel_summary_cfg): + d = _setup_waterlevels(tmp_path, waterlevel_summary_cfg, "nwis") + assert (d / "output.csv").is_file() -# # Waterlevel timeseries date range ==================================================================================== -# def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): -# # there are one or more locations within the bounding box and date range -# # that have only one record, so there is a combined file -# _test_waterelevels_timeseries_date_range( -# tmp_path, -# waterlevel_timeseries_cfg, -# "nwis", -# timeseries_flag=True, -# combined_flag=True, -# ) +# Waterlevel timeseries date range ==================================================================================== +def test_waterlevels_nwis_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): + # there are one or more locations within the bounding box and date range + # that have only one record, so there is a combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "nwis", + timeseries_flag=True, + combined_flag=True, + ) -# def test_waterlevels_isc_seven_rivers_timeseries_date_range( -# tmp_path, waterlevel_timeseries_cfg -# ): -# # all locations within the bounding box and date rangehave more than one -# # record so there is no combined file -# _test_waterelevels_timeseries_date_range( -# tmp_path, -# waterlevel_timeseries_cfg, -# "iscsevenrivers", -# timeseries_flag=True, -# combined_flag=False, -# ) +def test_waterlevels_isc_seven_rivers_timeseries_date_range( + tmp_path, waterlevel_timeseries_cfg +): + # all locations within the bounding box and date rangehave more than one + # record so there is no combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "iscsevenrivers", + timeseries_flag=True, + combined_flag=False, + ) -# def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): -# # all locations within the bounding box and date rangehave more than one -# # record so there is no combined file -# _test_waterelevels_timeseries_date_range( -# tmp_path, -# waterlevel_timeseries_cfg, -# "pvacd", -# timeseries_flag=True, -# combined_flag=False, -# ) - - -# # Analyte summary tests =============================================================================================== -# def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): -# _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") - - -# def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): -# _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") - - -# def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): -# # BOR locations are found within Otero County -# analyte_summary_cfg.county = "otero" -# analyte_summary_cfg.bbox = None -# _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") +def test_waterlevels_pvacd_timeseries_date_range(tmp_path, waterlevel_timeseries_cfg): + # all locations within the bounding box and date rangehave more than one + # record so there is no combined file + _test_waterelevels_timeseries_date_range( + tmp_path, + waterlevel_timeseries_cfg, + "pvacd", + timeseries_flag=True, + combined_flag=False, + ) + + +# Analyte summary tests =============================================================================================== +def test_unify_analytes_wqp_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "wqp") + + +def test_unify_analytes_amp_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "nmbgmr") + + +def test_unify_analytes_bor_summary(tmp_path, analyte_summary_cfg): + # BOR locations are found within Otero County + analyte_summary_cfg.county = "otero" + analyte_summary_cfg.bbox = None + _test_analytes_summary(tmp_path, analyte_summary_cfg, "bor") -# def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): -# _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") - - -# def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): -# _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") +def test_unify_analytes_isc_seven_rivers_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "iscsevenrivers") + + +def test_unify_analytes_dwb_summary(tmp_path, analyte_summary_cfg): + _test_analytes_summary(tmp_path, analyte_summary_cfg, "dwb") # ============= EOF ============================================= From ccc5cad52869f8facc0e46f7745ee60c9e61eceb Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 10 Apr 2025 16:56:32 -0600 Subject: [PATCH 078/143] Comment out debugging print statements --- backend/unifier.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 880369f..40a6296 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -194,9 +194,9 @@ def _site_wrapper(site_source, parameter_source, persister, config): if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded num_sites_to_remove = sites_with_records_count - site_limit - print( - f"removing {num_sites_to_remove} to avoid exceeding the site limit" - ) + # print( + # f"removing {num_sites_to_remove} to avoid exceeding the site limit" + # ) # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 # and calling list[:0] will retur an empty list, so subtract From 64928747efe867cbc3c46409f2622e332a78d2e4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 10 Apr 2025 16:57:13 -0600 Subject: [PATCH 079/143] remove debugging print statements --- backend/unifier.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/unifier.py b/backend/unifier.py index 40a6296..ce03a3d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -180,16 +180,16 @@ def _site_wrapper(site_source, parameter_source, persister, config): persister.sites.append(site) if site_limit: - print( - "sites_with_records_count:", - sites_with_records_count, - "|", - "site_limit:", - site_limit, - "|", - "chunk_size:", - site_source.chunk_size, - ) + # print( + # "sites_with_records_count:", + # sites_with_records_count, + # "|", + # "site_limit:", + # site_limit, + # "|", + # "chunk_size:", + # site_source.chunk_size, + # ) if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded From f359dc2544eefc007384b6e5d7788ace050058af Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 10 Apr 2025 17:10:02 -0600 Subject: [PATCH 080/143] mypy type checking --- backend/config.py | 11 ----- backend/source.py | 40 ++++++++---------- backend/transformer.py | 93 +++++++++++++++++++++--------------------- 3 files changed, 64 insertions(+), 80 deletions(-) diff --git a/backend/config.py b/backend/config.py index 9f0f2d7..25f61fd 100644 --- a/backend/config.py +++ b/backend/config.py @@ -15,7 +15,6 @@ # =============================================================================== import os import sys -import time from datetime import datetime, timedelta import shapely.wkt @@ -29,15 +28,6 @@ NMBGMRAnalyteSource, ) from .connectors.bor.source import BORSiteSource, BORAnalyteSource -from .connectors.ckan import ( - HONDO_RESOURCE_ID, - FORT_SUMNER_RESOURCE_ID, - ROSWELL_RESOURCE_ID, -) -from .connectors.ckan.source import ( - OSERoswellSiteSource, - OSERoswellWaterLevelSource, -) from .connectors.nmenv.source import DWBSiteSource, DWBAnalyteSource from .connectors.nmose.source import NMOSEPODSiteSource from .constants import ( @@ -67,7 +57,6 @@ ISCSevenRiversAnalyteSource, ) from .connectors.st2.source import ( - ST2SiteSource, PVACDSiteSource, PVACDWaterLevelSource, EBIDSiteSource, diff --git a/backend/source.py b/backend/source.py index 5884861..5d8196a 100644 --- a/backend/source.py +++ b/backend/source.py @@ -15,19 +15,13 @@ # =============================================================================== from json import JSONDecodeError -import click import httpx import shapely.wkt from shapely import MultiPoint from typing import Union, List, Callable, Dict from backend.constants import ( - MILLIGRAMS_PER_LITER, FEET, - METERS, - PARTS_PER_MILLION, - DTW, - DTW_UNITS, DT_MEASURED, PARAMETER_NAME, PARAMETER_UNITS, @@ -36,7 +30,6 @@ LATEST, ) from backend.logger import Loggable -from backend.persister import BasePersister, CSVPersister from backend.record import ( AnalyteRecord, AnalyteSummaryRecord, @@ -106,7 +99,10 @@ def func(x): return sorted(records, key=func)[0] elif bookend == LATEST: return sorted(records, key=func)[-1] - + else: + raise ValueError( + f"Invalid bookend {bookend}. Must be either {EARLIEST} or {LATEST}" + ) def get_analyte_search_param(parameter: str, mapping: dict) -> str: """ @@ -178,11 +174,9 @@ class BaseSource(Loggable): """ transformer_klass = BaseTransformer - config = None - def __init__(self, config=None): + def __init__(self): self.transformer = self.transformer_klass() - self.set_config(config) super().__init__() @property @@ -205,7 +199,7 @@ def discover(self, *args, **kw): # Methods Already Implemented # ========================================================================== - def _execute_text_request(self, url: str, params=None, **kw) -> str: + def _execute_text_request(self, url: str, params: dict | None = None, **kw) -> str: """ Executes a get request to the provided url and returns the text response. @@ -235,8 +229,8 @@ def _execute_text_request(self, url: str, params=None, **kw) -> str: return "" def _execute_json_request( - self, url: str, params: dict = None, tag: str = None, **kw - ) -> dict: + self, url: str, params: dict | None = None, tag: str | None = None, **kw + ) -> dict | None: """ Executes a get request to the provided url and returns the json response. @@ -268,18 +262,18 @@ def _execute_json_request( return obj except JSONDecodeError: self.warn(f"service responded but with no data. \n{resp.text}") - return [] + return None else: self.warn(f"service responded with status {resp.status_code}") self.warn(f"service responded with text {resp.text}") self.warn(f"service at url: {resp.url}") - return [] + return None # ========================================================================== # Methods Implemented in BaseSiteSource and BaseParameterSource # ========================================================================== - def read(self, *args, **kw) -> list: + def read(self, *args, **kw) -> list | None: """ Returns the records. Implemented in BaseSiteSource and BaseAnalyteSource """ @@ -437,7 +431,7 @@ def intersects(self, wkt: str) -> bool: return True - def read(self, *args, **kw) -> List[SiteRecord]: + def read(self, *args, **kw) -> List[SiteRecord] | None: """ Returns a list of transformed site records. Calls self.get_records, which needs to be implemented for each source @@ -454,6 +448,7 @@ def read(self, *args, **kw) -> List[SiteRecord]: return self._transform_sites(records) else: self.warn("No site records returned") + return None def _transform_sites(self, records: list) -> List[SiteRecord]: """ @@ -479,7 +474,7 @@ def _transform_sites(self, records: list) -> List[SiteRecord]: self.log(f"processed nrecords={len(transformed_records)}") return transformed_records - def chunks(self, records: list, chunk_size: int = None) -> list: + def chunks(self, records: list, chunk_size: int | None = None) -> list: """ Returns a list of records split into lists of size chunk_size. If chunk_size less than 1 then the records are not split @@ -613,13 +608,13 @@ def _extract_latest_record(self, records: list) -> dict: return self._extract_terminal_record(records, bookend=LATEST) def read( - self, site_record: SiteRecord, use_summarize: bool, start_ind: int, end_ind: int + self, site_record: SiteRecord | list, use_summarize: bool, start_ind: int, end_ind: int ) -> List[ AnalyteRecord | AnalyteSummaryRecord | WaterLevelRecord | WaterLevelSummaryRecord - ]: + ] | None: """ Returns a list of transformed parameter records. Transformed parameter records are standardized so that all of the records have the same format. They are @@ -772,6 +767,7 @@ def read( name = ",".join(names) self.warn(f"{name}: No records found") + return None # ========================================================================== # Methods Implemented in BaseAnalyteSource and BaseWaterLevelSource @@ -820,7 +816,7 @@ def _get_output_units(self) -> str: # Methods That Need to be Implemented For Each Source # ========================================================================== - def _extract_site_records(self, records: dict, site_record: dict) -> list: + def _extract_site_records(self, records: list[dict], site_record: dict) -> list: """ Returns all records for a single site as a list of records (which are dictionaries). diff --git a/backend/transformer.py b/backend/transformer.py index 0c3796d..904e895 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -394,56 +394,55 @@ def do_transform( """ # _transform needs to be implemented by each SiteTransformer # _transform is already implemented in each ParameterTransformer - record = self._transform(inrecord, *args, **kw) - print(type(record)) - if not record: + transformed_record = self._transform(inrecord, *args, **kw) + if not transformed_record: return None # ensure that a site or summary record is contained within the boundaing polygon - if "longitude" in record and "latitude" in record: - if not self.contained(record["longitude"], record["latitude"]): + if "longitude" in transformed_record and "latitude" in transformed_record: + if not self.contained(transformed_record["longitude"], transformed_record["latitude"]): self.warn( - f"Skipping site {record['id']}. It is not within the defined geographic bounds" + f"Skipping site {transformed_record['id']}. It is not within the defined geographic bounds" ) return None - self._post_transform(record, *args, **kw) + self._post_transform(transformed_record, *args, **kw) # standardize datetime - dt = record.get(DT_MEASURED) + dt = transformed_record.get(DT_MEASURED) if dt: - d, t = standardize_datetime(dt, record["id"]) - record["date_measured"] = d - record["time_measured"] = t + d, t = standardize_datetime(dt, transformed_record["id"]) + transformed_record["date_measured"] = d + transformed_record["time_measured"] = t else: - mrd = record.get("latest_datetime") + mrd = transformed_record.get("latest_datetime") if mrd: - d, t = standardize_datetime(mrd, record["id"]) - record["date_measured"] = d - record["time_measured"] = t + d, t = standardize_datetime(mrd, transformed_record["id"]) + transformed_record["date_measured"] = d + transformed_record["time_measured"] = t # convert to proper record type # a record klass holds the original record's data as a dictionary, and has methods to update the record's data and get the record's data klass = self._get_record_klass() - record = klass(record) + klassed_record = klass(transformed_record) # update the record's geographic information and well data if it is a SiteRecord or SummaryRecord # transforms the horizontal datum and lon/lat coordinates to WGS84 # transforms the elevation and well depth units to the output unit specified in the config # transforms the well depth and well depth units to the output unit specified in the config - if isinstance(record, (SiteRecord, SummaryRecord)): - y = float(record.latitude) - x = float(record.longitude) + if isinstance(klassed_record, (SiteRecord, SummaryRecord)): + y = float(klassed_record.latitude) + x = float(klassed_record.longitude) if x == 0 or y == 0: - self.warn(f"Skipping site {record.id}. Latitude or Longitude is 0") + self.warn(f"Skipping site {klassed_record.id}. Latitude or Longitude is 0") return None - input_horizontal_datum = record.horizontal_datum + input_horizontal_datum = klassed_record.horizontal_datum if input_horizontal_datum not in ALLOWED_DATUMS: self.warn( - f"Skipping site {record.id}. Datum {input_horizontal_datum} cannot be processed" + f"Skipping site {klassed_record.id}. Datum {input_horizontal_datum} cannot be processed" ) return None @@ -464,43 +463,43 @@ def do_transform( if not self.in_nm(lng, lat): self.warn( - f"Skipping site {record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not within 25km of New Mexico" + f"Skipping site {klassed_record.id}. Coordinates {x}, {y} with datum {input_horizontal_datum} are not within 25km of New Mexico" ) return None - record.update(latitude=lat) - record.update(longitude=lng) - record.update(horizontal_datum=datum) + klassed_record.update(latitude=lat) + klassed_record.update(longitude=lng) + klassed_record.update(horizontal_datum=datum) elevation, elevation_unit = transform_length_units( - record.elevation, - record.elevation_units, + klassed_record.elevation, + klassed_record.elevation_units, output_elevation_units, ) - record.update(elevation=elevation) - record.update(elevation_units=elevation_unit) + klassed_record.update(elevation=elevation) + klassed_record.update(elevation_units=elevation_unit) well_depth, well_depth_unit = transform_length_units( - record.well_depth, - record.well_depth_units, + klassed_record.well_depth, + klassed_record.well_depth_units, well_depth_units, ) - record.update(well_depth=well_depth) - record.update(well_depth_units=well_depth_unit) + klassed_record.update(well_depth=well_depth) + klassed_record.update(well_depth_units=well_depth_unit) # update the units to the output unit for analyte records # this is done after converting the units to the output unit for the analyte records # convert the parameter value to the output unit specified in the config - elif isinstance(record, (AnalyteRecord, WaterLevelRecord)): - if isinstance(record, AnalyteRecord): + elif isinstance(klassed_record, (AnalyteRecord, WaterLevelRecord)): + if isinstance(klassed_record, AnalyteRecord): output_units = self.config.analyte_output_units else: output_units = self.config.waterlevel_output_units - source_result = record.parameter_value - source_unit = record.source_parameter_units - dt = record.date_measured - source_name = record.source_parameter_name + source_result = klassed_record.parameter_value + source_unit = klassed_record.source_parameter_units + dt = klassed_record.date_measured + source_name = klassed_record.source_parameter_name conversion_factor = None # conversion factor will remain None if record is kept for time series and cannot be converted, such as non-detects warning_msg = "" try: @@ -516,21 +515,21 @@ def do_transform( msg = f"{warning_msg} for {record.id}" self.warn(msg) except TypeError: - msg = f"Keeping {source_result} for {record.id} on {record.date_measured} for time series data" + msg = f"Keeping {source_result} for {record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result except ValueError: - msg = f"Keeping {source_result} for {record.id} on {record.date_measured} for time series data" + msg = f"Keeping {source_result} for {record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result if warning_msg == "": - record.update(conversion_factor=conversion_factor) - record.update(parameter_value=converted_result) + klassed_record.update(conversion_factor=conversion_factor) + klassed_record.update(parameter_value=converted_result) else: - record = None + klassed_record = None - return record + return klassed_record def in_nm(self, lng: float | int | str, lat: float | int | str) -> bool: """ @@ -819,7 +818,7 @@ def _get_parameter_name_and_units(self) -> tuple: class AnalyteTransformer(ParameterTransformer): - def _get_record_klass(self) -> AnalyteRecord | AnalyteSummaryRecord: + def _get_record_klass(self) -> type[AnalyteRecord] | type[AnalyteSummaryRecord]: """ Returns the AnalyteRecord class to use for the transformer for water level records if config.output_summary is False, otherwise From 1ff9d6e3ff62346bbb88d7b737bc8ddd76933136 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Thu, 10 Apr 2025 17:10:58 -0600 Subject: [PATCH 081/143] mypy type hinting --- backend/transformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/transformer.py b/backend/transformer.py index 904e895..c9eafd8 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -512,14 +512,14 @@ def do_transform( dt, ) if warning_msg != "": - msg = f"{warning_msg} for {record.id}" + msg = f"{warning_msg} for {klassed_record.id}" self.warn(msg) except TypeError: - msg = f"Keeping {source_result} for {record.id} on {klassed_record.date_measured} for time series data" + msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result except ValueError: - msg = f"Keeping {source_result} for {record.id} on {klassed_record.date_measured} for time series data" + msg = f"Keeping {source_result} for {klassed_record.id} on {klassed_record.date_measured} for time series data" self.warn(msg) converted_result = source_result From dad5e50dd9bdc049b401c0c18d76cea63de43d78 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 10:05:52 -0600 Subject: [PATCH 082/143] Finished mypy type checking for tests in GitHub Actions --- backend/connectors/nmose/source.py | 9 ++++++--- backend/source.py | 2 +- backend/transformer.py | 14 +++++++++++++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index df80f7c..249c7db 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Tuple +from typing import List, Dict, Any from shapely import wkt from backend.connectors import NM_STATE_BOUNDING_POLYGON @@ -25,7 +25,7 @@ class NMOSEPODSiteSource(BaseSiteSource): def get_records(self, *args, **kw) -> List[Dict]: config = self.config - params = {} + params: Dict[str, Any] = {} # if config.has_bounds(): # bbox = config.bbox_bounding_points() # params["bBox"] = ",".join([str(b) for b in bbox]) @@ -61,7 +61,10 @@ def get_records(self, *args, **kw) -> List[Dict]: i = 1 while 1: rs = self._execute_json_request(url, params, tag="features") - records.extend(rs) + if rs is None: + continue + else: + records.extend(rs) params["resultOffset"] += self.chunk_size if len(rs) < self.chunk_size: break diff --git a/backend/source.py b/backend/source.py index 5d8196a..c10aaf6 100644 --- a/backend/source.py +++ b/backend/source.py @@ -185,7 +185,7 @@ def tag(self): def set_config(self, config): self.config = config - self.transformer.config = config + self.transformer.set_config(config) def check(self, *args, **kw): return True diff --git a/backend/transformer.py b/backend/transformer.py index c9eafd8..80e8721 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -331,13 +331,25 @@ class BaseTransformer(Loggable): """ _cached_polygon = None - config = None + # config = None check_contained = True # ========================================================================== # Methods Already Implemented # ========================================================================== + def set_config(self, config): + """ + Sets the config for the transformer. Called in BaseSource.set_config() + to set the config for both the source and the transformer. + + Parameters + -------- + config: Config + The config to set for the transformer + """ + self.config = config + def do_transform( self, inrecord: dict, *args, **kw ) -> ( From fbd9b7595d9b059962e1cd4a2fc2485b70e5cf5e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 10:31:14 -0600 Subject: [PATCH 083/143] mypy fix --- backend/source.py | 5 ++++- tests/{source_tests => test_sources}/__init__.py | 0 tests/{source_tests => test_sources}/test_bernco.py | 0 tests/{source_tests => test_sources}/test_bor.py | 0 tests/{source_tests => test_sources}/test_cabq.py | 0 tests/{source_tests => test_sources}/test_ebid.py | 0 tests/{source_tests => test_sources}/test_nmbgmr.py | 0 tests/{source_tests => test_sources}/test_nmed_dwb.py | 0 .../test_nmose_isc_seven_rivers.py | 0 tests/{source_tests => test_sources}/test_nmose_roswell.py | 0 tests/{source_tests => test_sources}/test_nwis.py | 0 tests/{source_tests => test_sources}/test_pvacd.py | 0 tests/{source_tests => test_sources}/test_wqp.py | 0 13 files changed, 4 insertions(+), 1 deletion(-) rename tests/{source_tests => test_sources}/__init__.py (100%) rename tests/{source_tests => test_sources}/test_bernco.py (100%) rename tests/{source_tests => test_sources}/test_bor.py (100%) rename tests/{source_tests => test_sources}/test_cabq.py (100%) rename tests/{source_tests => test_sources}/test_ebid.py (100%) rename tests/{source_tests => test_sources}/test_nmbgmr.py (100%) rename tests/{source_tests => test_sources}/test_nmed_dwb.py (100%) rename tests/{source_tests => test_sources}/test_nmose_isc_seven_rivers.py (100%) rename tests/{source_tests => test_sources}/test_nmose_roswell.py (100%) rename tests/{source_tests => test_sources}/test_nwis.py (100%) rename tests/{source_tests => test_sources}/test_pvacd.py (100%) rename tests/{source_tests => test_sources}/test_wqp.py (100%) diff --git a/backend/source.py b/backend/source.py index c10aaf6..f81b26f 100644 --- a/backend/source.py +++ b/backend/source.py @@ -816,7 +816,7 @@ def _get_output_units(self) -> str: # Methods That Need to be Implemented For Each Source # ========================================================================== - def _extract_site_records(self, records: list[dict], site_record: dict) -> list: + def _extract_site_records(self, records: list[dict], site_record) -> list: """ Returns all records for a single site as a list of records (which are dictionaries). @@ -833,6 +833,9 @@ def _extract_site_records(self, records: list[dict], site_record: dict) -> list: list a list of records for the site """ + if site_record.chunk_size == 1: + return records + raise NotImplementedError( f"{self.__class__.__name__} Must implement _extract_site_records" ) diff --git a/tests/source_tests/__init__.py b/tests/test_sources/__init__.py similarity index 100% rename from tests/source_tests/__init__.py rename to tests/test_sources/__init__.py diff --git a/tests/source_tests/test_bernco.py b/tests/test_sources/test_bernco.py similarity index 100% rename from tests/source_tests/test_bernco.py rename to tests/test_sources/test_bernco.py diff --git a/tests/source_tests/test_bor.py b/tests/test_sources/test_bor.py similarity index 100% rename from tests/source_tests/test_bor.py rename to tests/test_sources/test_bor.py diff --git a/tests/source_tests/test_cabq.py b/tests/test_sources/test_cabq.py similarity index 100% rename from tests/source_tests/test_cabq.py rename to tests/test_sources/test_cabq.py diff --git a/tests/source_tests/test_ebid.py b/tests/test_sources/test_ebid.py similarity index 100% rename from tests/source_tests/test_ebid.py rename to tests/test_sources/test_ebid.py diff --git a/tests/source_tests/test_nmbgmr.py b/tests/test_sources/test_nmbgmr.py similarity index 100% rename from tests/source_tests/test_nmbgmr.py rename to tests/test_sources/test_nmbgmr.py diff --git a/tests/source_tests/test_nmed_dwb.py b/tests/test_sources/test_nmed_dwb.py similarity index 100% rename from tests/source_tests/test_nmed_dwb.py rename to tests/test_sources/test_nmed_dwb.py diff --git a/tests/source_tests/test_nmose_isc_seven_rivers.py b/tests/test_sources/test_nmose_isc_seven_rivers.py similarity index 100% rename from tests/source_tests/test_nmose_isc_seven_rivers.py rename to tests/test_sources/test_nmose_isc_seven_rivers.py diff --git a/tests/source_tests/test_nmose_roswell.py b/tests/test_sources/test_nmose_roswell.py similarity index 100% rename from tests/source_tests/test_nmose_roswell.py rename to tests/test_sources/test_nmose_roswell.py diff --git a/tests/source_tests/test_nwis.py b/tests/test_sources/test_nwis.py similarity index 100% rename from tests/source_tests/test_nwis.py rename to tests/test_sources/test_nwis.py diff --git a/tests/source_tests/test_pvacd.py b/tests/test_sources/test_pvacd.py similarity index 100% rename from tests/source_tests/test_pvacd.py rename to tests/test_sources/test_pvacd.py diff --git a/tests/source_tests/test_wqp.py b/tests/test_sources/test_wqp.py similarity index 100% rename from tests/source_tests/test_wqp.py rename to tests/test_sources/test_wqp.py From b6b368b07c38478104284755682bcbfc5b98baeb Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 14:03:24 -0600 Subject: [PATCH 084/143] rename BaseTestClass to BaseSourceTestClass for clarity --- tests/__init__.py | 6 +++++- tests/test_sources/test_bernco.py | 4 ++-- tests/test_sources/test_bor.py | 4 ++-- tests/test_sources/test_cabq.py | 4 ++-- tests/test_sources/test_ebid.py | 4 ++-- tests/test_sources/test_nmbgmr.py | 6 +++--- tests/test_sources/test_nmed_dwb.py | 4 ++-- tests/test_sources/test_nmose_isc_seven_rivers.py | 6 +++--- tests/test_sources/test_nmose_roswell.py | 4 ++-- tests/test_sources/test_nwis.py | 4 ++-- tests/test_sources/test_pvacd.py | 4 ++-- tests/test_sources/test_wqp.py | 6 +++--- 12 files changed, 30 insertions(+), 26 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index f650d55..7a32897 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -24,7 +24,7 @@ def recursively_clean_directory(path): path.rmdir() -class BaseTestClass: +class BaseSourceTestClass: parameter: str units: str agency: str @@ -97,6 +97,10 @@ def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): headers = f.readline().strip().split(",") assert headers == PARAMETER_RECORD_HEADERS + @pytest.mark.skip(reason="Not implemented yet") + def test_bounds(self): + pass + def test_health(self): # do a health check for the agency source = self.config.all_site_sources()[0][0] diff --git a/tests/test_sources/test_bernco.py b/tests/test_sources/test_bernco.py index b100481..16290c8 100644 --- a/tests/test_sources/test_bernco.py +++ b/tests/test_sources/test_bernco.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestBernCoWaterlevels(BaseTestClass): +class TestBernCoWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_bor.py b/tests/test_sources/test_bor.py index 77bf325..623491b 100644 --- a/tests/test_sources/test_bor.py +++ b/tests/test_sources/test_bor.py @@ -1,8 +1,8 @@ from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestBoRAnalyte(BaseTestClass): +class TestBoRAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER diff --git a/tests/test_sources/test_cabq.py b/tests/test_sources/test_cabq.py index b430d3a..da0324b 100644 --- a/tests/test_sources/test_cabq.py +++ b/tests/test_sources/test_cabq.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestCABQWaterlevels(BaseTestClass): +class TestCABQWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_ebid.py b/tests/test_sources/test_ebid.py index fa69e00..1ddbeda 100644 --- a/tests/test_sources/test_ebid.py +++ b/tests/test_sources/test_ebid.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestEBIDWaterlevels(BaseTestClass): +class TestEBIDWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_nmbgmr.py b/tests/test_sources/test_nmbgmr.py index 4643bf0..1d86731 100644 --- a/tests/test_sources/test_nmbgmr.py +++ b/tests/test_sources/test_nmbgmr.py @@ -1,15 +1,15 @@ from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestNMBGMRWaterlevels(BaseTestClass): +class TestNMBGMRWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET agency = "nmbgmr_amp" -class TestNMBGMRAnalyte(BaseTestClass): +class TestNMBGMRAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER diff --git a/tests/test_sources/test_nmed_dwb.py b/tests/test_sources/test_nmed_dwb.py index fff8a6e..5b35718 100644 --- a/tests/test_sources/test_nmed_dwb.py +++ b/tests/test_sources/test_nmed_dwb.py @@ -1,8 +1,8 @@ from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestNMEDDWBAnalyte(BaseTestClass): +class TestNMEDDWBAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER diff --git a/tests/test_sources/test_nmose_isc_seven_rivers.py b/tests/test_sources/test_nmose_isc_seven_rivers.py index a0a5d28..6a4b021 100644 --- a/tests/test_sources/test_nmose_isc_seven_rivers.py +++ b/tests/test_sources/test_nmose_isc_seven_rivers.py @@ -1,15 +1,15 @@ from backend.constants import WATERLEVELS, CALCIUM, FEET, MILLIGRAMS_PER_LITER -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestNMOSEISCSevenRiversWaterlevels(BaseTestClass): +class TestNMOSEISCSevenRiversWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET agency = "nmose_isc_seven_rivers" -class TestNMOSEISCSevenRiversAnalyte(BaseTestClass): +class TestNMOSEISCSevenRiversAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER diff --git a/tests/test_sources/test_nmose_roswell.py b/tests/test_sources/test_nmose_roswell.py index 4c1bd6b..3c21f4d 100644 --- a/tests/test_sources/test_nmose_roswell.py +++ b/tests/test_sources/test_nmose_roswell.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestNMOSERoswellWaterlevels(BaseTestClass): +class TestNMOSERoswellWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_nwis.py b/tests/test_sources/test_nwis.py index 493b801..ff74edc 100644 --- a/tests/test_sources/test_nwis.py +++ b/tests/test_sources/test_nwis.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestNWISWaterlevels(BaseTestClass): +class TestNWISWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_pvacd.py b/tests/test_sources/test_pvacd.py index edf5d48..6f5e551 100644 --- a/tests/test_sources/test_pvacd.py +++ b/tests/test_sources/test_pvacd.py @@ -1,8 +1,8 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestPVACDWaterlevels(BaseTestClass): +class TestPVACDWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET diff --git a/tests/test_sources/test_wqp.py b/tests/test_sources/test_wqp.py index 49e61d9..10f9ea2 100644 --- a/tests/test_sources/test_wqp.py +++ b/tests/test_sources/test_wqp.py @@ -1,15 +1,15 @@ from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET -from tests import BaseTestClass +from tests import BaseSourceTestClass -class TestWQPWaterlevels(BaseTestClass): +class TestWQPWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS units = FEET agency = "wqp" -class TestWQPAnalyte(BaseTestClass): +class TestWQPAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER From 76f77dc45328e241b064bc1947c2f131ad5f1fa7 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 14:17:47 -0600 Subject: [PATCH 085/143] Update pytest skip messages for clarity --- tests/__init__.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 7a32897..9e23564 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,7 @@ from logging import shutdown as logger_shutdown from pathlib import Path import pytest -from typing import Optional +from shapely import Geometry from backend.config import Config, SOURCE_KEYS from backend.constants import WATERLEVELS @@ -28,6 +28,7 @@ class BaseSourceTestClass: parameter: str units: str agency: str + bounds: Geometry # set site_limit for tests site_limit: int = 3 @@ -97,10 +98,6 @@ def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): headers = f.readline().strip().split(",") assert headers == PARAMETER_RECORD_HEADERS - @pytest.mark.skip(reason="Not implemented yet") - def test_bounds(self): - pass - def test_health(self): # do a health check for the agency source = self.config.all_site_sources()[0][0] @@ -165,22 +162,26 @@ def test_timeseries_separated(self): for timeseries_file in timeseries_dir.iterdir(): self._check_timeseries_file(timeseries_dir, timeseries_file.name) - @pytest.mark.skip(reason="Not implemented yet") + @pytest.mark.skip(reason="test_date_range not implemented yet") def test_date_range(self): pass - @pytest.mark.skip(reason="Not implemented yet") + @pytest.mark.skip(reason="test_bounds not implemented yet") + def test_bounds(self): + pass + + @pytest.mark.skip(reason="test_wkt not implemented yet") def test_wkt(self): pass - @pytest.mark.skip(reason="Not implemented yet") + @pytest.mark.skip(reason="test_county not implemented yet") def test_county(self): pass - @pytest.mark.skip(reason="Not implemented yet") + @pytest.mark.skip(reason="test_huc not implemented yet") def test_huc(self): pass - @pytest.mark.skip(reason="Not implemented yet") + @pytest.mark.skip(reason="test_bbox not implemented yet") def text_bbox(self): pass From d4cd969c032c45c65b0011bfa714caa08c794665 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 14:30:48 -0600 Subject: [PATCH 086/143] reararnge testing folder --- tests/__init__.py | 187 ------------------ tests/test_sources/__init__.py | 187 ++++++++++++++++++ tests/test_sources/test_bernco.py | 2 +- tests/test_sources/test_bor.py | 2 +- tests/test_sources/test_cabq.py | 2 +- tests/test_sources/test_ebid.py | 2 +- tests/test_sources/test_nmbgmr.py | 2 +- tests/test_sources/test_nmed_dwb.py | 2 +- .../test_nmose_isc_seven_rivers.py | 2 +- tests/test_sources/test_nmose_roswell.py | 2 +- tests/test_sources/test_nwis.py | 2 +- tests/test_sources/test_pvacd.py | 2 +- tests/test_sources/test_wqp.py | 2 +- 13 files changed, 198 insertions(+), 198 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 9e23564..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,187 +0,0 @@ -from logging import shutdown as logger_shutdown -from pathlib import Path -import pytest -from shapely import Geometry - -from backend.config import Config, SOURCE_KEYS -from backend.constants import WATERLEVELS -from backend.logger import setup_logging -from backend.record import SummaryRecord, SiteRecord, ParameterRecord -from backend.unifier import unify_analytes, unify_waterlevels - -SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) -SITE_RECORD_HEADERS = list(SiteRecord.keys) -PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) - - -def recursively_clean_directory(path): - """Recursively delete all files and directories in the given path.""" - for item in path.iterdir(): - if item.is_dir(): - recursively_clean_directory(item) - else: - item.unlink() - path.rmdir() - - -class BaseSourceTestClass: - parameter: str - units: str - agency: str - bounds: Geometry - - # set site_limit for tests - site_limit: int = 3 - - @pytest.fixture(autouse=True) - def setup(self): - # SETUP CODE ---------------------------------------------------------- - # 1: setup test/config attributes - self.config = Config() - for agency in SOURCE_KEYS: - setattr(self.config, f"use_source_{agency}", False) - setattr(self.config, "site_limit", self.site_limit) - setattr(self.config, "parameter", self.parameter) - setattr(self.config, "units", self.units) - setattr(self.config, f"use_source_{self.agency}", True) - self.config.finalize() - - # 2: initiate logger - setup_logging(path=self.config.output_path) - - # RUN TESTS ------------------------------------------------------------ - yield - - # UNIVERSAL ASSERTIONS ------------------------------------------------- - # 1: log file exists - log_path = Path(self.config.output_path) / "die.log" - assert log_path.exists() - - # TEARDOWN CODE -------------------------------------------------------- - # 1: close logger to delete log file - logger_shutdown() - - # 2: delete newly created dirs and files - path_to_clean = Path(self.config.output_path) - print(f"Cleaning and removing {path_to_clean}") - recursively_clean_directory(path_to_clean) - - # reset test attributes - self.dirs_to_delete = [] - self.config = None - self.unifier = None - - def _run_unifier(self): - if self.parameter == WATERLEVELS: - unify_waterlevels(self.config) - else: - unify_analytes(self.config) - - def _check_sites_file(self): - sites_file = Path(self.config.output_path) / "sites.csv" - assert sites_file.exists() - - with open(sites_file, "r") as f: - headers = f.readline().strip().split(",") - assert headers == SITE_RECORD_HEADERS - - # +1 for the header - with open(sites_file, "r") as f: - lines = f.readlines() - assert len(lines) == self.site_limit + 1 - - def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): - timeseries_file = Path(timeseries_dir) / timeseries_file_name - assert timeseries_file.exists() - - with open(timeseries_file, "r") as f: - headers = f.readline().strip().split(",") - assert headers == PARAMETER_RECORD_HEADERS - - def test_health(self): - # do a health check for the agency - source = self.config.all_site_sources()[0][0] - assert source.health() - - def test_summary(self): - # Arrange -------------------------------------------------------------- - self.config.output_summary = True - self.config.report() - - # Act ------------------------------------------------------------------ - self._run_unifier() - - # Assert --------------------------------------------------------------- - # Check the summary file - summary_file = Path(self.config.output_path) / "summary.csv" - assert summary_file.exists() - - # Check the column headers - with open(summary_file, "r") as f: - headers = f.readline().strip().split(",") - assert headers == SUMMARY_RECORD_HEADERS - - # +1 for the header - with open(summary_file, "r") as f: - lines = f.readlines() - assert len(lines) == self.site_limit + 1 - - def test_timeseries_unified(self): - # Arrange -------------------------------------------------------------- - self.config.output_timeseries_unified = True - self.config.report() - - # Act ------------------------------------------------------------------ - self._run_unifier() - - # Assert --------------------------------------------------------------- - # Check the sites file - self._check_sites_file() - - # Check the timeseries file - timeseries_dir = Path(self.config.output_path) - timeseries_file_name = "timeseries_unified.csv" - self._check_timeseries_file(timeseries_dir, timeseries_file_name) - - def test_timeseries_separated(self): - # Arrange -------------------------------------------------------------- - self.config.output_timeseries_separated = True - self.config.report() - - # Act ------------------------------------------------------------------ - self._run_unifier() - - # Assert --------------------------------------------------------------- - # Check the sites file - self._check_sites_file() - - # Check the timeseries files - timeseries_dir = Path(self.config.output_path) / "timeseries" - assert len([f for f in timeseries_dir.iterdir()]) == self.site_limit - - for timeseries_file in timeseries_dir.iterdir(): - self._check_timeseries_file(timeseries_dir, timeseries_file.name) - - @pytest.mark.skip(reason="test_date_range not implemented yet") - def test_date_range(self): - pass - - @pytest.mark.skip(reason="test_bounds not implemented yet") - def test_bounds(self): - pass - - @pytest.mark.skip(reason="test_wkt not implemented yet") - def test_wkt(self): - pass - - @pytest.mark.skip(reason="test_county not implemented yet") - def test_county(self): - pass - - @pytest.mark.skip(reason="test_huc not implemented yet") - def test_huc(self): - pass - - @pytest.mark.skip(reason="test_bbox not implemented yet") - def text_bbox(self): - pass diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index e69de29..9e23564 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -0,0 +1,187 @@ +from logging import shutdown as logger_shutdown +from pathlib import Path +import pytest +from shapely import Geometry + +from backend.config import Config, SOURCE_KEYS +from backend.constants import WATERLEVELS +from backend.logger import setup_logging +from backend.record import SummaryRecord, SiteRecord, ParameterRecord +from backend.unifier import unify_analytes, unify_waterlevels + +SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) +SITE_RECORD_HEADERS = list(SiteRecord.keys) +PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) + + +def recursively_clean_directory(path): + """Recursively delete all files and directories in the given path.""" + for item in path.iterdir(): + if item.is_dir(): + recursively_clean_directory(item) + else: + item.unlink() + path.rmdir() + + +class BaseSourceTestClass: + parameter: str + units: str + agency: str + bounds: Geometry + + # set site_limit for tests + site_limit: int = 3 + + @pytest.fixture(autouse=True) + def setup(self): + # SETUP CODE ---------------------------------------------------------- + # 1: setup test/config attributes + self.config = Config() + for agency in SOURCE_KEYS: + setattr(self.config, f"use_source_{agency}", False) + setattr(self.config, "site_limit", self.site_limit) + setattr(self.config, "parameter", self.parameter) + setattr(self.config, "units", self.units) + setattr(self.config, f"use_source_{self.agency}", True) + self.config.finalize() + + # 2: initiate logger + setup_logging(path=self.config.output_path) + + # RUN TESTS ------------------------------------------------------------ + yield + + # UNIVERSAL ASSERTIONS ------------------------------------------------- + # 1: log file exists + log_path = Path(self.config.output_path) / "die.log" + assert log_path.exists() + + # TEARDOWN CODE -------------------------------------------------------- + # 1: close logger to delete log file + logger_shutdown() + + # 2: delete newly created dirs and files + path_to_clean = Path(self.config.output_path) + print(f"Cleaning and removing {path_to_clean}") + recursively_clean_directory(path_to_clean) + + # reset test attributes + self.dirs_to_delete = [] + self.config = None + self.unifier = None + + def _run_unifier(self): + if self.parameter == WATERLEVELS: + unify_waterlevels(self.config) + else: + unify_analytes(self.config) + + def _check_sites_file(self): + sites_file = Path(self.config.output_path) / "sites.csv" + assert sites_file.exists() + + with open(sites_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SITE_RECORD_HEADERS + + # +1 for the header + with open(sites_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + + def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): + timeseries_file = Path(timeseries_dir) / timeseries_file_name + assert timeseries_file.exists() + + with open(timeseries_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == PARAMETER_RECORD_HEADERS + + def test_health(self): + # do a health check for the agency + source = self.config.all_site_sources()[0][0] + assert source.health() + + def test_summary(self): + # Arrange -------------------------------------------------------------- + self.config.output_summary = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the summary file + summary_file = Path(self.config.output_path) / "summary.csv" + assert summary_file.exists() + + # Check the column headers + with open(summary_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SUMMARY_RECORD_HEADERS + + # +1 for the header + with open(summary_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + + def test_timeseries_unified(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_unified = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries file + timeseries_dir = Path(self.config.output_path) + timeseries_file_name = "timeseries_unified.csv" + self._check_timeseries_file(timeseries_dir, timeseries_file_name) + + def test_timeseries_separated(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_separated = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file() + + # Check the timeseries files + timeseries_dir = Path(self.config.output_path) / "timeseries" + assert len([f for f in timeseries_dir.iterdir()]) == self.site_limit + + for timeseries_file in timeseries_dir.iterdir(): + self._check_timeseries_file(timeseries_dir, timeseries_file.name) + + @pytest.mark.skip(reason="test_date_range not implemented yet") + def test_date_range(self): + pass + + @pytest.mark.skip(reason="test_bounds not implemented yet") + def test_bounds(self): + pass + + @pytest.mark.skip(reason="test_wkt not implemented yet") + def test_wkt(self): + pass + + @pytest.mark.skip(reason="test_county not implemented yet") + def test_county(self): + pass + + @pytest.mark.skip(reason="test_huc not implemented yet") + def test_huc(self): + pass + + @pytest.mark.skip(reason="test_bbox not implemented yet") + def text_bbox(self): + pass diff --git a/tests/test_sources/test_bernco.py b/tests/test_sources/test_bernco.py index 16290c8..48004a9 100644 --- a/tests/test_sources/test_bernco.py +++ b/tests/test_sources/test_bernco.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestBernCoWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_bor.py b/tests/test_sources/test_bor.py index 623491b..003391d 100644 --- a/tests/test_sources/test_bor.py +++ b/tests/test_sources/test_bor.py @@ -1,5 +1,5 @@ from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestBoRAnalyte(BaseSourceTestClass): diff --git a/tests/test_sources/test_cabq.py b/tests/test_sources/test_cabq.py index da0324b..9f3ff3c 100644 --- a/tests/test_sources/test_cabq.py +++ b/tests/test_sources/test_cabq.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestCABQWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_ebid.py b/tests/test_sources/test_ebid.py index 1ddbeda..aa38894 100644 --- a/tests/test_sources/test_ebid.py +++ b/tests/test_sources/test_ebid.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from teststest_sources import BaseSourceTestClass class TestEBIDWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_nmbgmr.py b/tests/test_sources/test_nmbgmr.py index 1d86731..90bba2c 100644 --- a/tests/test_sources/test_nmbgmr.py +++ b/tests/test_sources/test_nmbgmr.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestNMBGMRWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_nmed_dwb.py b/tests/test_sources/test_nmed_dwb.py index 5b35718..2a27be3 100644 --- a/tests/test_sources/test_nmed_dwb.py +++ b/tests/test_sources/test_nmed_dwb.py @@ -1,5 +1,5 @@ from backend.constants import CALCIUM, MILLIGRAMS_PER_LITER -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestNMEDDWBAnalyte(BaseSourceTestClass): diff --git a/tests/test_sources/test_nmose_isc_seven_rivers.py b/tests/test_sources/test_nmose_isc_seven_rivers.py index 6a4b021..55b345e 100644 --- a/tests/test_sources/test_nmose_isc_seven_rivers.py +++ b/tests/test_sources/test_nmose_isc_seven_rivers.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, CALCIUM, FEET, MILLIGRAMS_PER_LITER -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestNMOSEISCSevenRiversWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_nmose_roswell.py b/tests/test_sources/test_nmose_roswell.py index 3c21f4d..585090f 100644 --- a/tests/test_sources/test_nmose_roswell.py +++ b/tests/test_sources/test_nmose_roswell.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestNMOSERoswellWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_nwis.py b/tests/test_sources/test_nwis.py index ff74edc..b7bf272 100644 --- a/tests/test_sources/test_nwis.py +++ b/tests/test_sources/test_nwis.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestNWISWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_pvacd.py b/tests/test_sources/test_pvacd.py index 6f5e551..715acf7 100644 --- a/tests/test_sources/test_pvacd.py +++ b/tests/test_sources/test_pvacd.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestPVACDWaterlevels(BaseSourceTestClass): diff --git a/tests/test_sources/test_wqp.py b/tests/test_sources/test_wqp.py index 10f9ea2..4f8437e 100644 --- a/tests/test_sources/test_wqp.py +++ b/tests/test_sources/test_wqp.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET -from tests import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestWQPWaterlevels(BaseSourceTestClass): From 6cef8c456b081210c1bdacbf57ed05d4892ecdf7 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 16:30:15 -0600 Subject: [PATCH 087/143] Work on CLI tests --- tests/test_cli/__init__.py | 70 +++++++++++++++++++++++++++++++++++ tests/test_cli/test_nmbgmr.py | 13 +++++++ 2 files changed, 83 insertions(+) create mode 100644 tests/test_cli/__init__.py create mode 100644 tests/test_cli/test_nmbgmr.py diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py new file mode 100644 index 0000000..7a8da3e --- /dev/null +++ b/tests/test_cli/__init__.py @@ -0,0 +1,70 @@ +from click.testing import CliRunner +import pytest +from typing import List, Any + +from backend.config import SOURCE_KEYS +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM +) +from frontend.cli import weave + +class BaseCLITestClass(): + + runner: CliRunner + agency: str + no_agencies: List[str] = [] + + @pytest.fixture(autouse=True) + def setup(self): + # STEUP CODE ----------------------------------------------------------- + self.runner = CliRunner() + + # turn off all sources except for the one being tested + for source in SOURCE_KEYS: + if source == self.agency: + continue + else: + source_with_dash = source.replace("_", "-") + self.no_agencies.append(f"--no-{source_with_dash}") + + + # RUN TESTS ------------------------------------------------------------ + yield + + # TEARDOWN CODE --------------------------------------------------------- + self.no_agencies = [] + + def _test_weave(self, parameter, output): + # Arrange + arguments = [ + parameter, + f"--output {output}", + "--dry" + ] + + arguments.extend(self.no_agencies) + + print(arguments) + + # Act + result = self.runner.invoke(weave, arguments) + print(result.output) + print(result.__dict__) + + # Assert + assert result.exit_code == 0 \ No newline at end of file diff --git a/tests/test_cli/test_nmbgmr.py b/tests/test_cli/test_nmbgmr.py new file mode 100644 index 0000000..faa8e47 --- /dev/null +++ b/tests/test_cli/test_nmbgmr.py @@ -0,0 +1,13 @@ +from tests.test_cli import BaseCLITestClass + +class TestNMBGMRCLI(BaseCLITestClass): + """Test the CLI for the NMBGMR source.""" + + agency = "nmbgmr-amp" + + def test_weave(self): + # Test the weave command for NMBGMR + self._test_weave( + parameter="waterlevels", + output="summary" + ) \ No newline at end of file From 4adafb1f51483a0061ca541b71cbebe5eace74f3 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 11 Apr 2025 16:30:37 -0600 Subject: [PATCH 088/143] Comment out test dev in progress --- tests/test_cli/test_nmbgmr.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_cli/test_nmbgmr.py b/tests/test_cli/test_nmbgmr.py index faa8e47..9a5a1b8 100644 --- a/tests/test_cli/test_nmbgmr.py +++ b/tests/test_cli/test_nmbgmr.py @@ -5,9 +5,9 @@ class TestNMBGMRCLI(BaseCLITestClass): agency = "nmbgmr-amp" - def test_weave(self): - # Test the weave command for NMBGMR - self._test_weave( - parameter="waterlevels", - output="summary" - ) \ No newline at end of file + # def test_weave(self): + # # Test the weave command for NMBGMR + # self._test_weave( + # parameter="waterlevels", + # output="summary" + # ) \ No newline at end of file From d2b6d6c5b327715d7b2c1f79f78b0a3c0b9b412c Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 11 Apr 2025 22:47:46 +0000 Subject: [PATCH 089/143] Formatting changes --- backend/source.py | 24 ++++++++++++++++-------- backend/transformer.py | 8 ++++++-- tests/archived/test_cli.py | 1 - tests/test_cli/__init__.py | 14 +++++--------- tests/test_cli/test_nmbgmr.py | 3 ++- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/backend/source.py b/backend/source.py index f81b26f..5189258 100644 --- a/backend/source.py +++ b/backend/source.py @@ -102,7 +102,8 @@ def func(x): else: raise ValueError( f"Invalid bookend {bookend}. Must be either {EARLIEST} or {LATEST}" - ) + ) + def get_analyte_search_param(parameter: str, mapping: dict) -> str: """ @@ -608,13 +609,20 @@ def _extract_latest_record(self, records: list) -> dict: return self._extract_terminal_record(records, bookend=LATEST) def read( - self, site_record: SiteRecord | list, use_summarize: bool, start_ind: int, end_ind: int - ) -> List[ - AnalyteRecord - | AnalyteSummaryRecord - | WaterLevelRecord - | WaterLevelSummaryRecord - ] | None: + self, + site_record: SiteRecord | list, + use_summarize: bool, + start_ind: int, + end_ind: int, + ) -> ( + List[ + AnalyteRecord + | AnalyteSummaryRecord + | WaterLevelRecord + | WaterLevelSummaryRecord + ] + | None + ): """ Returns a list of transformed parameter records. Transformed parameter records are standardized so that all of the records have the same format. They are diff --git a/backend/transformer.py b/backend/transformer.py index 80e8721..cb3afe5 100644 --- a/backend/transformer.py +++ b/backend/transformer.py @@ -412,7 +412,9 @@ def do_transform( # ensure that a site or summary record is contained within the boundaing polygon if "longitude" in transformed_record and "latitude" in transformed_record: - if not self.contained(transformed_record["longitude"], transformed_record["latitude"]): + if not self.contained( + transformed_record["longitude"], transformed_record["latitude"] + ): self.warn( f"Skipping site {transformed_record['id']}. It is not within the defined geographic bounds" ) @@ -447,7 +449,9 @@ def do_transform( x = float(klassed_record.longitude) if x == 0 or y == 0: - self.warn(f"Skipping site {klassed_record.id}. Latitude or Longitude is 0") + self.warn( + f"Skipping site {klassed_record.id}. Latitude or Longitude is 0" + ) return None input_horizontal_datum = klassed_record.horizontal_datum diff --git a/tests/archived/test_cli.py b/tests/archived/test_cli.py index 6fa2baa..3e53924 100644 --- a/tests/archived/test_cli.py +++ b/tests/archived/test_cli.py @@ -218,7 +218,6 @@ def test_waterlevels_invalid_end(): _tester(waterlevels, args, fail=True) - def _tester(source, func, county, bbox, args=None): runner = CliRunner() diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 7a8da3e..e9ae2f1 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -19,11 +19,12 @@ SODIUM, SULFATE, TDS, - URANIUM + URANIUM, ) from frontend.cli import weave -class BaseCLITestClass(): + +class BaseCLITestClass: runner: CliRunner agency: str @@ -41,7 +42,6 @@ def setup(self): else: source_with_dash = source.replace("_", "-") self.no_agencies.append(f"--no-{source_with_dash}") - # RUN TESTS ------------------------------------------------------------ yield @@ -51,11 +51,7 @@ def setup(self): def _test_weave(self, parameter, output): # Arrange - arguments = [ - parameter, - f"--output {output}", - "--dry" - ] + arguments = [parameter, f"--output {output}", "--dry"] arguments.extend(self.no_agencies) @@ -67,4 +63,4 @@ def _test_weave(self, parameter, output): print(result.__dict__) # Assert - assert result.exit_code == 0 \ No newline at end of file + assert result.exit_code == 0 diff --git a/tests/test_cli/test_nmbgmr.py b/tests/test_cli/test_nmbgmr.py index 9a5a1b8..6928473 100644 --- a/tests/test_cli/test_nmbgmr.py +++ b/tests/test_cli/test_nmbgmr.py @@ -1,5 +1,6 @@ from tests.test_cli import BaseCLITestClass + class TestNMBGMRCLI(BaseCLITestClass): """Test the CLI for the NMBGMR source.""" @@ -10,4 +11,4 @@ class TestNMBGMRCLI(BaseCLITestClass): # self._test_weave( # parameter="waterlevels", # output="summary" - # ) \ No newline at end of file + # ) From b4f4f2dee2400a4e2c8de561f917c3cd69b0ad8f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 08:00:26 -0700 Subject: [PATCH 090/143] fix import error --- tests/test_sources/test_ebid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sources/test_ebid.py b/tests/test_sources/test_ebid.py index aa38894..6adfd6f 100644 --- a/tests/test_sources/test_ebid.py +++ b/tests/test_sources/test_ebid.py @@ -1,5 +1,5 @@ from backend.constants import WATERLEVELS, FEET -from teststest_sources import BaseSourceTestClass +from tests.test_sources import BaseSourceTestClass class TestEBIDWaterlevels(BaseSourceTestClass): From cae4227730d5af2a90d22d5bf0ac2f5629190941 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:12:37 -0700 Subject: [PATCH 091/143] put cleanup function in base dir for reuse --- tests/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..9316fba 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,8 @@ +def recursively_clean_directory(path): + """Recursively delete all files and directories in the given path.""" + for item in path.iterdir(): + if item.is_dir(): + recursively_clean_directory(item) + else: + item.unlink() + path.rmdir() \ No newline at end of file From 1b42e2df341004a389d4b87688aa57c7b485b639 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:12:53 -0700 Subject: [PATCH 092/143] rename nmbgmr to nmbgmr_amp for clarity/consistency --- tests/test_sources/__init__.py | 11 +---------- .../{test_nmbgmr.py => test_nmbgmr_amp.py} | 0 2 files changed, 1 insertion(+), 10 deletions(-) rename tests/test_sources/{test_nmbgmr.py => test_nmbgmr_amp.py} (100%) diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index 9e23564..a18dd94 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -8,22 +8,13 @@ from backend.logger import setup_logging from backend.record import SummaryRecord, SiteRecord, ParameterRecord from backend.unifier import unify_analytes, unify_waterlevels +from tests import recursively_clean_directory SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) SITE_RECORD_HEADERS = list(SiteRecord.keys) PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) -def recursively_clean_directory(path): - """Recursively delete all files and directories in the given path.""" - for item in path.iterdir(): - if item.is_dir(): - recursively_clean_directory(item) - else: - item.unlink() - path.rmdir() - - class BaseSourceTestClass: parameter: str units: str diff --git a/tests/test_sources/test_nmbgmr.py b/tests/test_sources/test_nmbgmr_amp.py similarity index 100% rename from tests/test_sources/test_nmbgmr.py rename to tests/test_sources/test_nmbgmr_amp.py From 685f242490e532904b223108cb8559301a37c80f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:13:00 -0700 Subject: [PATCH 093/143] CLI tests for all sources and ways to set config --- tests/test_cli/__init__.py | 271 ++++++++++++++++-- tests/test_cli/test_bernco.py | 43 +++ tests/test_cli/test_cabq.py | 43 +++ tests/test_cli/test_ebid.py | 43 +++ tests/test_cli/test_nmbgmr.py | 14 - tests/test_cli/test_nmbgmr_amp.py | 44 +++ tests/test_cli/test_nmed_dwb.py | 43 +++ tests/test_cli/test_nmose_isc_seven_rivers.py | 44 +++ tests/test_cli/test_nmose_roswell.py | 43 +++ tests/test_cli/test_nwis.py | 43 +++ tests/test_cli/test_pvacd.py | 43 +++ tests/test_cli/test_wqp.py | 43 +++ 12 files changed, 684 insertions(+), 33 deletions(-) create mode 100644 tests/test_cli/test_bernco.py create mode 100644 tests/test_cli/test_cabq.py create mode 100644 tests/test_cli/test_ebid.py delete mode 100644 tests/test_cli/test_nmbgmr.py create mode 100644 tests/test_cli/test_nmbgmr_amp.py create mode 100644 tests/test_cli/test_nmed_dwb.py create mode 100644 tests/test_cli/test_nmose_isc_seven_rivers.py create mode 100644 tests/test_cli/test_nmose_roswell.py create mode 100644 tests/test_cli/test_nwis.py create mode 100644 tests/test_cli/test_pvacd.py create mode 100644 tests/test_cli/test_wqp.py diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index e9ae2f1..0ede68e 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -1,6 +1,8 @@ from click.testing import CliRunner +from logging import shutdown as logger_shutdown +from pathlib import Path import pytest -from typing import List, Any +from typing import List from backend.config import SOURCE_KEYS from backend.constants import ( @@ -22,45 +24,276 @@ URANIUM, ) from frontend.cli import weave +from tests import recursively_clean_directory class BaseCLITestClass: runner: CliRunner agency: str - no_agencies: List[str] = [] + agency_reports_parameter: dict + output_dir: Path @pytest.fixture(autouse=True) def setup(self): - # STEUP CODE ----------------------------------------------------------- + # SETUP CODE ----------------------------------------------------------- self.runner = CliRunner() - # turn off all sources except for the one being tested - for source in SOURCE_KEYS: - if source == self.agency: - continue - else: - source_with_dash = source.replace("_", "-") - self.no_agencies.append(f"--no-{source_with_dash}") - # RUN TESTS ------------------------------------------------------------ yield # TEARDOWN CODE --------------------------------------------------------- - self.no_agencies = [] + logger_shutdown() + recursively_clean_directory(self.output_dir) - def _test_weave(self, parameter, output): + def _test_weave( + self, + parameter: str, + output: str, + site_limit: int = 4, + start_date: str = "1990-08-10", + end_date: str = "1990-08-11", + bbox: str | None = None, + county: str | None = None, + wkt: str | None = None, + ): # Arrange - arguments = [parameter, f"--output {output}", "--dry"] + # turn off all sources except for the one being tested + no_agencies = [] + for source in SOURCE_KEYS: + source_with_dash = source.replace("_", "-") + if source_with_dash == self.agency: + continue + else: + no_agencies.append(f"--no-{source_with_dash}") - arguments.extend(self.no_agencies) + geographic_filter_name: str | None = None + geographic_filter_value: str | None = None + if bbox: + geographic_filter_name = "bbox" + geographic_filter_value = bbox + elif county: + geographic_filter_name = "county" + geographic_filter_value = county + elif wkt: + geographic_filter_name = "wkt" + geographic_filter_value = wkt + + arguments = [ + parameter, + "--output", + output, + "--dry", + "--site-limit", + site_limit, + "--start-date", + start_date, + "--end-date", + end_date, + ] - print(arguments) + if geographic_filter_name and geographic_filter_value: + arguments.extend([f"--{geographic_filter_name}", geographic_filter_value]) + + arguments.extend(no_agencies) # Act - result = self.runner.invoke(weave, arguments) - print(result.output) - print(result.__dict__) + result = self.runner.invoke(weave, arguments, standalone_mode=False) # Assert assert result.exit_code == 0 + + """ + For the config, check that + + 0. (set output dir to clean up tests results even in event of failure) + 1. The parameter is set correctly + 2. The agencies are set correctly + 3. The output types are set correctly + 4. The site limit is set correctly + 5. The dry is set correctly + 6. The start date is set correctly + 7. The end date is set correctly + 8. The geographic filter is set correctly + """ + config = result.return_value + + # 0 + self.output_dir = Path(config.output_path) + + # 1 + assert getattr(config, "parameter") == parameter + + # 2 + agency_with_underscore = self.agency.replace("-", "_") + if self.agency_reports_parameter[parameter]: + assert getattr(config, f"use_source_{agency_with_underscore}") is True + else: + assert getattr(config, f"use_source_{agency_with_underscore}") is False + + for no_agency in no_agencies: + no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") + assert getattr(config, f"use_source_{no_agency_with_underscore}") is False + + # 3 + output_types = ["summary", "timeseries_unified", "timeseries_separated"] + for output_type in output_types: + if output_type == output: + assert getattr(config, f"output_{output_type}") is True + else: + assert getattr(config, f"output_{output_type}") is False + + # 4 + assert getattr(config, "site_limit") == 4 + + # 5 + assert getattr(config, "dry") is True + + # 6 + assert getattr(config, "start_date") == start_date + + # 7 + assert getattr(config, "end_date") == end_date + + # 8 + if geographic_filter_name and geographic_filter_value: + for _geographic_filter_name in ["bbox", "county", "wkt"]: + if _geographic_filter_name == geographic_filter_name: + assert getattr(config, _geographic_filter_name) == geographic_filter_value + else: + assert getattr(config, _geographic_filter_name) == "" + + def test_weave_summary(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary" + ) + + def test_weave_timeseries_unified(self): + self._test_weave( + parameter=WATERLEVELS, + output="timeseries_unified" + ) + + def test_weave_timeseries_separated(self): + self._test_weave( + parameter=WATERLEVELS, + output="timeseries_separated" + ) + + def test_weave_bbox(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary", + bbox="32.0,-106.0,36.0,-102.0" + ) + + def test_weave_county(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary", + county="Bernalillo" + ) + + def test_weave_wkt(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary", + wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))" + ) + + def test_weave_waterlevels(self): + self._test_weave( + parameter=WATERLEVELS, + output="summary" + ) + + def test_weave_arsenic(self): + self._test_weave( + parameter=ARSENIC, + output="summary" + ) + + def test_weave_bicarbonate(self): + self._test_weave( + parameter=BICARBONATE, + output="summary" + ) + + def test_weave_calcium(self): + self._test_weave( + parameter=CALCIUM, + output="summary" + ) + + def test_weave_carbonate(self): + self._test_weave( + parameter=CARBONATE, + output="summary" + ) + + def test_weave_chloride(self): + self._test_weave( + parameter=CHLORIDE, + output="summary" + ) + + def test_weave_fluoride(self): + self._test_weave( + parameter=FLUORIDE, + output="summary" + ) + + def test_weave_magnesium(self): + self._test_weave( + parameter=MAGNESIUM, + output="summary" + ) + + def test_weave_nitrate(self): + self._test_weave( + parameter=NITRATE, + output="summary" + ) + + def test_weave_ph(self): + self._test_weave( + parameter=PH, + output="summary" + ) + + def test_weave_potassium(self): + self._test_weave( + parameter=POTASSIUM, + output="summary" + ) + + def test_weave_silica(self): + self._test_weave( + parameter=SILICA, + output="summary" + ) + + def test_weave_sodium(self): + self._test_weave( + parameter=SODIUM, + output="summary" + ) + + def test_weave_sulfate(self): + self._test_weave( + parameter=SULFATE, + output="summary" + ) + + def test_weave_tds(self): + self._test_weave( + parameter=TDS, + output="summary" + ) + + def test_weave_uranium(self): + self._test_weave( + parameter=URANIUM, + output="summary" + ) \ No newline at end of file diff --git a/tests/test_cli/test_bernco.py b/tests/test_cli/test_bernco.py new file mode 100644 index 0000000..c6e4031 --- /dev/null +++ b/tests/test_cli/test_bernco.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestBernCoCLI(BaseCLITestClass): + + agency = "bernco" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_cabq.py b/tests/test_cli/test_cabq.py new file mode 100644 index 0000000..5e96a07 --- /dev/null +++ b/tests/test_cli/test_cabq.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestCABQCLI(BaseCLITestClass): + + agency = "cabq" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_ebid.py b/tests/test_cli/test_ebid.py new file mode 100644 index 0000000..1f88692 --- /dev/null +++ b/tests/test_cli/test_ebid.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestEBIDCLI(BaseCLITestClass): + + agency = "ebid" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_nmbgmr.py b/tests/test_cli/test_nmbgmr.py deleted file mode 100644 index 6928473..0000000 --- a/tests/test_cli/test_nmbgmr.py +++ /dev/null @@ -1,14 +0,0 @@ -from tests.test_cli import BaseCLITestClass - - -class TestNMBGMRCLI(BaseCLITestClass): - """Test the CLI for the NMBGMR source.""" - - agency = "nmbgmr-amp" - - # def test_weave(self): - # # Test the weave command for NMBGMR - # self._test_weave( - # parameter="waterlevels", - # output="summary" - # ) diff --git a/tests/test_cli/test_nmbgmr_amp.py b/tests/test_cli/test_nmbgmr_amp.py new file mode 100644 index 0000000..b582045 --- /dev/null +++ b/tests/test_cli/test_nmbgmr_amp.py @@ -0,0 +1,44 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMBGMRCLI(BaseCLITestClass): + + agency = "nmbgmr-amp" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: True, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } + + diff --git a/tests/test_cli/test_nmed_dwb.py b/tests/test_cli/test_nmed_dwb.py new file mode 100644 index 0000000..4e8660c --- /dev/null +++ b/tests/test_cli/test_nmed_dwb.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestNMEDDWBCLI(BaseCLITestClass): + + agency = "nmed-dwb" + agency_reports_parameter = { + WATERLEVELS: False, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: False, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } + + diff --git a/tests/test_cli/test_nmose_isc_seven_rivers.py b/tests/test_cli/test_nmose_isc_seven_rivers.py new file mode 100644 index 0000000..2ab6bc1 --- /dev/null +++ b/tests/test_cli/test_nmose_isc_seven_rivers.py @@ -0,0 +1,44 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + + +class TestNMOSEISCSevenRiversCLI(BaseCLITestClass): + + agency = "nmose-isc-seven-rivers" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: False, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_nmose_roswell.py b/tests/test_cli/test_nmose_roswell.py new file mode 100644 index 0000000..1fafcc8 --- /dev/null +++ b/tests/test_cli/test_nmose_roswell.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestNMOSERoswellCLI(BaseCLITestClass): + + agency = "nmose-roswell" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_nwis.py b/tests/test_cli/test_nwis.py new file mode 100644 index 0000000..3f5dd55 --- /dev/null +++ b/tests/test_cli/test_nwis.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestNWISCLI(BaseCLITestClass): + + agency = "nwis" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_pvacd.py b/tests/test_cli/test_pvacd.py new file mode 100644 index 0000000..167e8d6 --- /dev/null +++ b/tests/test_cli/test_pvacd.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestPVACDCLI(BaseCLITestClass): + + agency = "pvacd" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: False, + BICARBONATE: False, + CALCIUM: False, + CARBONATE: False, + CHLORIDE: False, + FLUORIDE: False, + MAGNESIUM: False, + NITRATE: False, + PH: False, + POTASSIUM: False, + SILICA: False, + SODIUM: False, + SULFATE: False, + TDS: False, + URANIUM: False, + } + + diff --git a/tests/test_cli/test_wqp.py b/tests/test_cli/test_wqp.py new file mode 100644 index 0000000..3ef021c --- /dev/null +++ b/tests/test_cli/test_wqp.py @@ -0,0 +1,43 @@ +from backend.constants import ( + WATERLEVELS, + ARSENIC, + BICARBONATE, + CALCIUM, + CARBONATE, + CHLORIDE, + FLUORIDE, + MAGNESIUM, + NITRATE, + PH, + POTASSIUM, + SILICA, + SODIUM, + SULFATE, + TDS, + URANIUM, +) +from tests.test_cli import BaseCLITestClass + +class TestWQPCLI(BaseCLITestClass): + + agency = "wqp" + agency_reports_parameter = { + WATERLEVELS: True, + ARSENIC: True, + BICARBONATE: True, + CALCIUM: True, + CARBONATE: True, + CHLORIDE: True, + FLUORIDE: True, + MAGNESIUM: True, + NITRATE: True, + PH: True, + POTASSIUM: True, + SILICA: True, + SODIUM: True, + SULFATE: True, + TDS: True, + URANIUM: True, + } + + From 22c3be07fb6fbfcbcfa242ef4914c1a70151a0cd Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:13:31 -0700 Subject: [PATCH 094/143] keep bbox str to be consistent with other geographic filters --- backend/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/config.py b/backend/config.py index 25f61fd..a1ce0e8 100644 --- a/backend/config.py +++ b/backend/config.py @@ -109,7 +109,7 @@ class Config(Loggable): end_date: str = "" # spatial - bbox: dict # dict or str + bbox: str = "" county: str = "" wkt: str = "" @@ -155,7 +155,6 @@ def __init__(self, model=None, payload=None): # need to initialize logger super().__init__() - self.bbox = {} if model: if model.wkt: self.wkt = model.wkt From c4e0d29d129b5686c74544bfde66032249373f2f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:13:58 -0700 Subject: [PATCH 095/143] add wkt spatial option --- frontend/cli.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 425fdfa..f724874 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -130,6 +130,11 @@ def cli(): default="", help="New Mexico county name", ), + click.option( + "--wkt", + default="", + help="Well known text (WKT) representation of a geometry. For example, 'POLYGON((x1 y1, x2 y2, x3 y3, x1 y1))'", + ) ] DEBUG_OPTIONS = [ click.option( @@ -227,6 +232,7 @@ def weave( start_date, end_date, bbox, + wkt, county, no_bernco, no_bor, @@ -249,7 +255,7 @@ def weave( """ parameter = weave # instantiate config and set up parameter - config = setup_config(f"{parameter}", bbox, county, site_limit, dry) + config = setup_config(f"{parameter}", bbox, wkt, county, site_limit, dry) config.parameter = parameter # output type @@ -288,18 +294,17 @@ def weave( config.finalize() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) - + + config.report() if not dry: - config.report() # prompt user to continue if not click.confirm("Do you want to continue?", default=True): return - - if parameter.lower() == "waterlevels": - unify_waterlevels(config) - else: - unify_analytes(config) - + if parameter.lower() == "waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) + return config @cli.command() @add_options(SPATIAL_OPTIONS) @@ -308,6 +313,7 @@ def weave( @add_options(DEBUG_OPTIONS) def wells( bbox, + wkt, county, output_dir, no_bernco, @@ -330,7 +336,7 @@ def wells( Get locations """ - config = setup_config("sites", bbox, county, site_limit, dry) + config = setup_config("sites", bbox, wkt, county, site_limit, dry) config_agencies = [ "bernco", "bor", @@ -370,7 +376,7 @@ def wells( required=True, ) @add_options(SPATIAL_OPTIONS) -def sources(sources, bbox, county): +def sources(sources, bbox, wkt, county): """ List available sources """ @@ -381,6 +387,8 @@ def sources(sources, bbox, county): config.county = county elif bbox: config.bbox = bbox + elif wkt: + config.wkt = wkt parameter = sources config.parameter = parameter @@ -394,7 +402,7 @@ def sources(sources, bbox, county): click.echo(s) -def setup_config(tag, bbox, county, site_limit, dry): +def setup_config(tag, bbox, wkt, county, site_limit, dry): config = Config() if county: click.echo(f"Getting {tag} for county {county}") @@ -403,6 +411,9 @@ def setup_config(tag, bbox, county, site_limit, dry): click.echo(f"Getting {tag} for bounding box {bbox}") # bbox = -105.396826 36.219290, -106.024162 35.384307 config.bbox = bbox + elif wkt: + click.echo(f"Getting {tag} for WKT {wkt}") + config.wkt = wkt config.site_limit = site_limit config.dry = dry From a1bc4d04a8981004f4e587f5daf3e181811cef11 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 12:14:16 -0700 Subject: [PATCH 096/143] code cleanup --- backend/unifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/unifier.py b/backend/unifier.py index ce03a3d..7872743 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -115,7 +115,7 @@ def _perister_factory(config): def _site_wrapper(site_source, parameter_source, persister, config): - + try: # TODO: fully develop checks/discoveries below # if not site_source.check(): From 08525c2108cdd387be28377ca0bde94b850145f6 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 13:32:23 -0700 Subject: [PATCH 097/143] mypy fix --- frontend/cli.py | 2 +- tests/test_cli/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index f724874..abf6ed4 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -415,7 +415,7 @@ def setup_config(tag, bbox, wkt, county, site_limit, dry): click.echo(f"Getting {tag} for WKT {wkt}") config.wkt = wkt - config.site_limit = site_limit + config.site_limit = int(site_limit) config.dry = dry return config diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 0ede68e..6a7eb7c 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -85,7 +85,7 @@ def _test_weave( output, "--dry", "--site-limit", - site_limit, + str(site_limit), "--start-date", start_date, "--end-date", From ea23c47653dbc50413d96dc9b5f1011d1ea5bd0d Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 14 Apr 2025 20:33:26 +0000 Subject: [PATCH 098/143] Formatting changes --- backend/config.py | 2 +- backend/unifier.py | 2 +- frontend/cli.py | 5 +- tests/__init__.py | 2 +- tests/test_cli/__init__.py | 140 +++++------------- tests/test_cli/test_bernco.py | 3 +- tests/test_cli/test_cabq.py | 3 +- tests/test_cli/test_ebid.py | 3 +- tests/test_cli/test_nmbgmr_amp.py | 4 +- tests/test_cli/test_nmed_dwb.py | 3 +- tests/test_cli/test_nmose_isc_seven_rivers.py | 4 +- tests/test_cli/test_nmose_roswell.py | 3 +- tests/test_cli/test_nwis.py | 3 +- tests/test_cli/test_pvacd.py | 3 +- tests/test_cli/test_wqp.py | 3 +- 15 files changed, 56 insertions(+), 127 deletions(-) diff --git a/backend/config.py b/backend/config.py index a1ce0e8..0ec1dd7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -109,7 +109,7 @@ class Config(Loggable): end_date: str = "" # spatial - bbox: str = "" + bbox: str = "" county: str = "" wkt: str = "" diff --git a/backend/unifier.py b/backend/unifier.py index 7872743..ce03a3d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -115,7 +115,7 @@ def _perister_factory(config): def _site_wrapper(site_source, parameter_source, persister, config): - + try: # TODO: fully develop checks/discoveries below # if not site_source.check(): diff --git a/frontend/cli.py b/frontend/cli.py index abf6ed4..3dcad53 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -134,7 +134,7 @@ def cli(): "--wkt", default="", help="Well known text (WKT) representation of a geometry. For example, 'POLYGON((x1 y1, x2 y2, x3 y3, x1 y1))'", - ) + ), ] DEBUG_OPTIONS = [ click.option( @@ -294,7 +294,7 @@ def weave( config.finalize() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) - + config.report() if not dry: # prompt user to continue @@ -306,6 +306,7 @@ def weave( unify_analytes(config) return config + @cli.command() @add_options(SPATIAL_OPTIONS) @add_options(PERSISTER_OPTIONS) diff --git a/tests/__init__.py b/tests/__init__.py index 9316fba..bcf9e80 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -5,4 +5,4 @@ def recursively_clean_directory(path): recursively_clean_directory(item) else: item.unlink() - path.rmdir() \ No newline at end of file + path.rmdir() diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 6a7eb7c..84923b8 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -47,16 +47,16 @@ def setup(self): recursively_clean_directory(self.output_dir) def _test_weave( - self, - parameter: str, - output: str, - site_limit: int = 4, - start_date: str = "1990-08-10", - end_date: str = "1990-08-11", - bbox: str | None = None, - county: str | None = None, - wkt: str | None = None, - ): + self, + parameter: str, + output: str, + site_limit: int = 4, + start_date: str = "1990-08-10", + end_date: str = "1990-08-11", + bbox: str | None = None, + county: str | None = None, + wkt: str | None = None, + ): # Arrange # turn off all sources except for the one being tested no_agencies = [] @@ -78,7 +78,7 @@ def _test_weave( elif wkt: geographic_filter_name = "wkt" geographic_filter_value = wkt - + arguments = [ parameter, "--output", @@ -119,14 +119,14 @@ def _test_weave( config = result.return_value # 0 - self.output_dir = Path(config.output_path) + self.output_dir = Path(config.output_path) # 1 assert getattr(config, "parameter") == parameter # 2 agency_with_underscore = self.agency.replace("-", "_") - if self.agency_reports_parameter[parameter]: + if self.agency_reports_parameter[parameter]: assert getattr(config, f"use_source_{agency_with_underscore}") is True else: assert getattr(config, f"use_source_{agency_with_underscore}") is False @@ -134,7 +134,7 @@ def _test_weave( for no_agency in no_agencies: no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") assert getattr(config, f"use_source_{no_agency_with_underscore}") is False - + # 3 output_types = ["summary", "timeseries_unified", "timeseries_separated"] for output_type in output_types: @@ -159,141 +159,81 @@ def _test_weave( if geographic_filter_name and geographic_filter_value: for _geographic_filter_name in ["bbox", "county", "wkt"]: if _geographic_filter_name == geographic_filter_name: - assert getattr(config, _geographic_filter_name) == geographic_filter_value + assert ( + getattr(config, _geographic_filter_name) + == geographic_filter_value + ) else: assert getattr(config, _geographic_filter_name) == "" def test_weave_summary(self): - self._test_weave( - parameter=WATERLEVELS, - output="summary" - ) + self._test_weave(parameter=WATERLEVELS, output="summary") def test_weave_timeseries_unified(self): - self._test_weave( - parameter=WATERLEVELS, - output="timeseries_unified" - ) + self._test_weave(parameter=WATERLEVELS, output="timeseries_unified") def test_weave_timeseries_separated(self): - self._test_weave( - parameter=WATERLEVELS, - output="timeseries_separated" - ) + self._test_weave(parameter=WATERLEVELS, output="timeseries_separated") def test_weave_bbox(self): self._test_weave( - parameter=WATERLEVELS, - output="summary", - bbox="32.0,-106.0,36.0,-102.0" + parameter=WATERLEVELS, output="summary", bbox="32.0,-106.0,36.0,-102.0" ) def test_weave_county(self): - self._test_weave( - parameter=WATERLEVELS, - output="summary", - county="Bernalillo" - ) + self._test_weave(parameter=WATERLEVELS, output="summary", county="Bernalillo") def test_weave_wkt(self): self._test_weave( parameter=WATERLEVELS, output="summary", - wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))" + wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))", ) def test_weave_waterlevels(self): - self._test_weave( - parameter=WATERLEVELS, - output="summary" - ) + self._test_weave(parameter=WATERLEVELS, output="summary") def test_weave_arsenic(self): - self._test_weave( - parameter=ARSENIC, - output="summary" - ) + self._test_weave(parameter=ARSENIC, output="summary") def test_weave_bicarbonate(self): - self._test_weave( - parameter=BICARBONATE, - output="summary" - ) + self._test_weave(parameter=BICARBONATE, output="summary") def test_weave_calcium(self): - self._test_weave( - parameter=CALCIUM, - output="summary" - ) + self._test_weave(parameter=CALCIUM, output="summary") def test_weave_carbonate(self): - self._test_weave( - parameter=CARBONATE, - output="summary" - ) + self._test_weave(parameter=CARBONATE, output="summary") def test_weave_chloride(self): - self._test_weave( - parameter=CHLORIDE, - output="summary" - ) + self._test_weave(parameter=CHLORIDE, output="summary") def test_weave_fluoride(self): - self._test_weave( - parameter=FLUORIDE, - output="summary" - ) + self._test_weave(parameter=FLUORIDE, output="summary") def test_weave_magnesium(self): - self._test_weave( - parameter=MAGNESIUM, - output="summary" - ) + self._test_weave(parameter=MAGNESIUM, output="summary") def test_weave_nitrate(self): - self._test_weave( - parameter=NITRATE, - output="summary" - ) + self._test_weave(parameter=NITRATE, output="summary") def test_weave_ph(self): - self._test_weave( - parameter=PH, - output="summary" - ) + self._test_weave(parameter=PH, output="summary") def test_weave_potassium(self): - self._test_weave( - parameter=POTASSIUM, - output="summary" - ) + self._test_weave(parameter=POTASSIUM, output="summary") def test_weave_silica(self): - self._test_weave( - parameter=SILICA, - output="summary" - ) + self._test_weave(parameter=SILICA, output="summary") def test_weave_sodium(self): - self._test_weave( - parameter=SODIUM, - output="summary" - ) + self._test_weave(parameter=SODIUM, output="summary") def test_weave_sulfate(self): - self._test_weave( - parameter=SULFATE, - output="summary" - ) + self._test_weave(parameter=SULFATE, output="summary") def test_weave_tds(self): - self._test_weave( - parameter=TDS, - output="summary" - ) + self._test_weave(parameter=TDS, output="summary") def test_weave_uranium(self): - self._test_weave( - parameter=URANIUM, - output="summary" - ) \ No newline at end of file + self._test_weave(parameter=URANIUM, output="summary") diff --git a/tests/test_cli/test_bernco.py b/tests/test_cli/test_bernco.py index c6e4031..331ed26 100644 --- a/tests/test_cli/test_bernco.py +++ b/tests/test_cli/test_bernco.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestBernCoCLI(BaseCLITestClass): agency = "bernco" @@ -39,5 +40,3 @@ class TestBernCoCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_cabq.py b/tests/test_cli/test_cabq.py index 5e96a07..1748975 100644 --- a/tests/test_cli/test_cabq.py +++ b/tests/test_cli/test_cabq.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestCABQCLI(BaseCLITestClass): agency = "cabq" @@ -39,5 +40,3 @@ class TestCABQCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_ebid.py b/tests/test_cli/test_ebid.py index 1f88692..76429f1 100644 --- a/tests/test_cli/test_ebid.py +++ b/tests/test_cli/test_ebid.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestEBIDCLI(BaseCLITestClass): agency = "ebid" @@ -39,5 +40,3 @@ class TestEBIDCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_nmbgmr_amp.py b/tests/test_cli/test_nmbgmr_amp.py index b582045..df4ea49 100644 --- a/tests/test_cli/test_nmbgmr_amp.py +++ b/tests/test_cli/test_nmbgmr_amp.py @@ -39,6 +39,4 @@ class TestNMBGMRCLI(BaseCLITestClass): SULFATE: True, TDS: True, URANIUM: True, - } - - + } diff --git a/tests/test_cli/test_nmed_dwb.py b/tests/test_cli/test_nmed_dwb.py index 4e8660c..edd9d68 100644 --- a/tests/test_cli/test_nmed_dwb.py +++ b/tests/test_cli/test_nmed_dwb.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestNMEDDWBCLI(BaseCLITestClass): agency = "nmed-dwb" @@ -39,5 +40,3 @@ class TestNMEDDWBCLI(BaseCLITestClass): TDS: True, URANIUM: True, } - - diff --git a/tests/test_cli/test_nmose_isc_seven_rivers.py b/tests/test_cli/test_nmose_isc_seven_rivers.py index 2ab6bc1..0f99e70 100644 --- a/tests/test_cli/test_nmose_isc_seven_rivers.py +++ b/tests/test_cli/test_nmose_isc_seven_rivers.py @@ -39,6 +39,4 @@ class TestNMOSEISCSevenRiversCLI(BaseCLITestClass): SULFATE: True, TDS: True, URANIUM: False, - } - - + } diff --git a/tests/test_cli/test_nmose_roswell.py b/tests/test_cli/test_nmose_roswell.py index 1fafcc8..0c2be39 100644 --- a/tests/test_cli/test_nmose_roswell.py +++ b/tests/test_cli/test_nmose_roswell.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestNMOSERoswellCLI(BaseCLITestClass): agency = "nmose-roswell" @@ -39,5 +40,3 @@ class TestNMOSERoswellCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_nwis.py b/tests/test_cli/test_nwis.py index 3f5dd55..0fd236a 100644 --- a/tests/test_cli/test_nwis.py +++ b/tests/test_cli/test_nwis.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestNWISCLI(BaseCLITestClass): agency = "nwis" @@ -39,5 +40,3 @@ class TestNWISCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_pvacd.py b/tests/test_cli/test_pvacd.py index 167e8d6..041c9a9 100644 --- a/tests/test_cli/test_pvacd.py +++ b/tests/test_cli/test_pvacd.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestPVACDCLI(BaseCLITestClass): agency = "pvacd" @@ -39,5 +40,3 @@ class TestPVACDCLI(BaseCLITestClass): TDS: False, URANIUM: False, } - - diff --git a/tests/test_cli/test_wqp.py b/tests/test_cli/test_wqp.py index 3ef021c..f3beb7b 100644 --- a/tests/test_cli/test_wqp.py +++ b/tests/test_cli/test_wqp.py @@ -18,6 +18,7 @@ ) from tests.test_cli import BaseCLITestClass + class TestWQPCLI(BaseCLITestClass): agency = "wqp" @@ -39,5 +40,3 @@ class TestWQPCLI(BaseCLITestClass): TDS: True, URANIUM: True, } - - From 3620df46ed104b84cbb798f50a840cd6d507700d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 13:42:48 -0700 Subject: [PATCH 099/143] bump version to 0.8.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9f43e06..f06990d 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.8.1", + version="0.8.2", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 0cf98b4ae98e6bb90ae0891d970f4c2553b85f8f Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 14 Apr 2025 16:13:48 -0700 Subject: [PATCH 100/143] PR 43 review changes --- frontend/cli.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 3dcad53..04e2949 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -215,7 +215,7 @@ def _add_options(func): @cli.command() @click.argument( - "weave", + "parameter", type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) @@ -226,7 +226,7 @@ def _add_options(func): @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def weave( - weave, + parameter, output, output_dir, start_date, @@ -253,9 +253,8 @@ def weave( """ Get parameter timeseries or summary data """ - parameter = weave # instantiate config and set up parameter - config = setup_config(f"{parameter}", bbox, wkt, county, site_limit, dry) + config = setup_config(parameter, bbox, wkt, county, site_limit, dry) config.parameter = parameter # output type @@ -416,7 +415,10 @@ def setup_config(tag, bbox, wkt, county, site_limit, dry): click.echo(f"Getting {tag} for WKT {wkt}") config.wkt = wkt - config.site_limit = int(site_limit) + if site_limit: + config.site_limit = int(site_limit) + else: + config.site_limit = None config.dry = dry return config From 6cc5a15244867e0edc3210c6643b78a836e6f1de Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 15 Apr 2025 16:52:50 -0700 Subject: [PATCH 101/143] Implement GeoJSON for sites & summary --- backend/config.py | 5 +-- backend/persister.py | 100 ++++++++++++++++++++++++++++--------------- backend/record.py | 9 ++-- backend/unifier.py | 46 ++++++++++---------- frontend/cli.py | 36 +++++++--------- setup.py | 2 +- 6 files changed, 112 insertions(+), 86 deletions(-) diff --git a/backend/config.py b/backend/config.py index 0ec1dd7..d59a8ea 100644 --- a/backend/config.py +++ b/backend/config.py @@ -142,15 +142,13 @@ class Config(Loggable): output_summary: bool = False output_timeseries_unified: bool = False output_timeseries_separated: bool = False + output_site_file_type: str = "csv" latest_water_level_only: bool = False analyte_output_units: str = MILLIGRAMS_PER_LITER waterlevel_output_units: str = FEET - use_csv: bool = True - use_geojson: bool = False - def __init__(self, model=None, payload=None): # need to initialize logger super().__init__() @@ -417,6 +415,7 @@ def _report_attributes(title, attrs): "output_summary", "output_timeseries_unified", "output_timeseries_separated", + "output_site_file_type", "output_horizontal_datum", "output_elevation_units", ), diff --git a/backend/persister.py b/backend/persister.py index a89572c..d7fb56e 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -17,9 +17,12 @@ import io import os import shutil +from pprint import pprint +import json import pandas as pd import geopandas as gpd +from shapely import Point from backend.logger import Loggable @@ -34,9 +37,7 @@ class BasePersister(Loggable): Class to persist the data to a file or cloud storage. If persisting to a file, the output directory is created by config._make_output_path() """ - - extension: str - # output_id: str + add_extension: str = "csv" def __init__(self): self.records = [] @@ -75,7 +76,7 @@ def dump_timeseries_unified(self, path: str): path = os.path.join(path, "timeseries_unified") path = self.add_extension(path) self.log(f"dumping unified timeseries to {os.path.abspath(path)}") - self._dump_timeseries_unified(path, self.timeseries) + self._dump_timeseries(path, self.timeseries) else: self.log("no timeseries records to dump", fg="red") @@ -85,21 +86,16 @@ def dump_timeseries_separated(self, path: str): # the individual site timeseries will be dumped timeseries_path = os.path.join(path, "timeseries") self._make_output_directory(timeseries_path) - for site, records in self.timeseries: - path = os.path.join(timeseries_path, str(site.id).replace(" ", "_")) + for records in self.timeseries: + site_id = records[0].id + path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) path = self.add_extension(path) - self.log(f"dumping {site.id} to {os.path.abspath(path)}") - self._write(path, records) - else: - self.log("no timeseries records to dump", fg="red") + self.log(f"dumping {site_id} to {os.path.abspath(path)}") - def save(self, path: str): - if self.records: - path = self.add_extension(path) - self.log(f"saving to {path}") - self._write(path, self.records) + list_of_records = [records] + self._dump_timeseries(path, list_of_records) else: - self.log("no records to save", fg="red") + self.log("no timeseries records to dump", fg="red") def add_extension(self, path: str): if not self.extension: @@ -111,15 +107,14 @@ def add_extension(self, path: str): def _write(self, path: str, records): raise NotImplementedError - - def _dump_timeseries_unified(self, path: str, timeseries: list): + + def _dump_timeseries(self, path: str, timeseries: list): raise NotImplementedError def _make_output_directory(self, output_directory: str): os.mkdir(output_directory) - -def write_file(path, func, records): +def write_csv_file(path, func, records): with open(path, "w", newline="") as f: func(csv.writer(f), records) @@ -130,10 +125,16 @@ def write_memory(path, func, records): return f.getvalue() -def dump_timeseries_unified(writer, timeseries): +def dump_timeseries(writer, timeseries: list[list]): + """ + Dumps timeseries records to a CSV file. The timeseries must be a list of + lists, where each inner list contains the records for a single site. In the case + of timeseries separated, the inner list will contain the records for a single site + and this function will be called multiple times, once for each site. + """ headers_have_not_been_written = True - for i, (site, records) in enumerate(timeseries): - for j, record in enumerate(records): + for i, records in enumerate(timeseries): + for record in records: if i == 0 and headers_have_not_been_written: writer.writerow(record.keys) headers_have_not_been_written = False @@ -192,7 +193,7 @@ def _add_content(self, path: str, content: str): self._content.append((path, content)) def _dump_timeseries_unified(self, path: str, timeseries: list): - content = write_memory(path, dump_timeseries_unified, timeseries) + content = write_memory(path, dump_timeseries, timeseries) self._add_content(path, content) @@ -200,24 +201,53 @@ class CSVPersister(BasePersister): extension = "csv" def _write(self, path: str, records: list): - write_file(path, dump_sites, records) + write_csv_file(path, dump_sites, records) - def _dump_timeseries_unified(self, path: str, timeseries: list): - write_file(path, dump_timeseries_unified, timeseries) + def _dump_timeseries(self, path: str, timeseries: list): + write_csv_file(path, dump_timeseries, timeseries) class GeoJSONPersister(BasePersister): extension = "geojson" def _write(self, path: str, records: list): - r0 = records[0] - df = pd.DataFrame([r.to_row() for r in records], columns=r0.keys) - - gdf = gpd.GeoDataFrame( - df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326" - ) - gdf.to_file(path, driver="GeoJSON") - + feature_collection = { + "type": "FeatureCollection", + "features": [], + } + + features = [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [record.get("longitude"), record.get("latitude"), record.get("elevation")], + }, + "properties": {k: record.get(k) for k in record.keys if k not in ["latitude", "longitude", "elevation"]}, + } + for record in records + ] + feature_collection["features"].extend(features) + + + with open(path, "w") as f: + json.dump(feature_collection, f, indent=4) + + + def _get_gdal_type(self, dtype): + """ + Map pandas dtypes to GDAL-compatible types for the schema. + """ + if pd.api.types.is_integer_dtype(dtype): + return "int" + elif pd.api.types.is_float_dtype(dtype): + return "float" + elif pd.api.types.is_string_dtype(dtype): + return "str" + elif pd.api.types.is_datetime64_any_dtype(dtype): + return "datetime" + else: + return "str" # Default to string for unsupported types # class ST2Persister(BasePersister): # extension = "st2" diff --git a/backend/record.py b/backend/record.py index 5cfe5e9..3185363 100644 --- a/backend/record.py +++ b/backend/record.py @@ -30,10 +30,8 @@ def to_csv(self): def __init__(self, payload): self._payload = payload - - def to_row(self): - - def get(attr): + + def get(self, attr): # v = self._payload.get(attr) # if v is None and self.defaults: # v = self.defaults.get(attr) @@ -64,7 +62,8 @@ def get(attr): break return v - return [get(k) for k in self.keys] + def to_row(self): + return [self.get(k) for k in self.keys] def update(self, **kw): self._payload.update(kw) diff --git a/backend/unifier.py b/backend/unifier.py index ce03a3d..4539f11 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -100,9 +100,9 @@ def _perister_factory(config): persister_klass = CSVPersister if config.use_cloud_storage: persister_klass = CloudStoragePersister - elif config.use_csv: + elif config.output_site_file_type == "csv": persister_klass = CSVPersister - elif config.use_geojson: + elif config.output_site_file_type == "geojson": persister_klass = GeoJSONPersister return persister_klass() @@ -114,7 +114,7 @@ def _perister_factory(config): # persister.save(config.output_path) -def _site_wrapper(site_source, parameter_source, persister, config): +def _site_wrapper(site_source, parameter_source, sites_summary_persister, timeseries_persister, config): try: # TODO: fully develop checks/discoveries below @@ -142,7 +142,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): first_flag = True if config.sites_only: - persister.sites.extend(sites) + sites_summary_persister.sites.extend(sites) else: for site_records in site_source.chunks(sites): if type(site_records) == list: @@ -159,7 +159,7 @@ def _site_wrapper(site_source, parameter_source, persister, config): site_records, use_summarize, start_ind, end_ind ) if summary_records: - persister.records.extend(summary_records) + sites_summary_persister.records.extend(summary_records) sites_with_records_count += len(summary_records) else: continue @@ -176,8 +176,8 @@ def _site_wrapper(site_source, parameter_source, persister, config): sites_with_records_count += len(results) for site, records in results: - persister.timeseries.append((site, records)) - persister.sites.append(site) + timeseries_persister.timeseries.append(records) + sites_summary_persister.sites.append(site) if site_limit: # print( @@ -203,15 +203,15 @@ def _site_wrapper(site_source, parameter_source, persister, config): # num_sites_to_remove from the length of the list # to remove the last num_sites_to_remove sites if use_summarize: - persister.records = persister.records[ - : len(persister.records) - num_sites_to_remove + sites_summary_persister.records = sites_summary_persister.records[ + : len(sites_summary_persister.records) - num_sites_to_remove ] else: - persister.timeseries = persister.timeseries[ - : len(persister.timeseries) - num_sites_to_remove + timeseries_persister.timeseries = timeseries_persister.timeseries[ + : len(timeseries_persister.timeseries) - num_sites_to_remove ] - persister.sites = persister.sites[ - : len(persister.sites) - num_sites_to_remove + sites_summary_persister.sites = sites_summary_persister.sites[ + : len(sites_summary_persister.sites) - num_sites_to_remove ] break @@ -227,22 +227,24 @@ def _unify_parameter( config, sources, ): - persister = _perister_factory(config) + sites_summary_persister = _perister_factory(config) + timeseries_persister = CSVPersister() for site_source, parameter_source in sources: - _site_wrapper(site_source, parameter_source, persister, config) + _site_wrapper(site_source, parameter_source, sites_summary_persister, timeseries_persister, config) if config.output_summary: - persister.dump_summary(config.output_path) + sites_summary_persister.dump_summary(config.output_path) elif config.output_timeseries_unified: - persister.dump_timeseries_unified(config.output_path) - persister.dump_sites(config.output_path) + timeseries_persister.dump_timeseries_unified(config.output_path) + sites_summary_persister.dump_sites(config.output_path) elif config.sites_only: - persister.dump_sites(config.output_path) + sites_summary_persister.dump_sites(config.output_path) else: # config.output_timeseries_separated - persister.dump_timeseries_separated(config.output_path) - persister.dump_sites(config.output_path) + timeseries_persister.dump_timeseries_separated(config.output_path) + sites_summary_persister.dump_sites(config.output_path) - persister.finalize(config.output_name) + timeseries_persister.finalize(config.output_name) + sites_summary_persister.finalize(config.output_name) def get_sources_in_polygon(polygon): diff --git a/frontend/cli.py b/frontend/cli.py index 04e2949..c0c4f15 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -169,24 +169,6 @@ def cli(): help="End date in the form 'YYYY', 'YYYY-MM', 'YYYY-MM-DD', 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'", ), ] - -TIMESERIES_OPTIONS = [ - click.option( - "--separated_timeseries", - is_flag=True, - default=False, - show_default=True, - help="Output separate timeseries files for every site", - ), - click.option( - "--unified_timeseries", - is_flag=True, - default=False, - show_default=True, - help="Output single timeseries file, which includes all sites", - ), -] - OUTPUT_OPTIONS = [ click.option( "--output", @@ -195,6 +177,7 @@ def cli(): help="Output summary file, single unified timeseries file, or separated timeseries files", ), ] + PERSISTER_OPTIONS = [ click.option( "--output-dir", @@ -203,6 +186,15 @@ def cli(): ) ] +SITE_OUTPUT_TYPE_OPTIONS = [ + click.option( + "--site-output-type", + type=click.Choice(["csv", "geojson"]), + default="csv", + help="Output file format for sites (csv or geoson). Default is csv", + ) +] + def add_options(options): def _add_options(func): @@ -225,6 +217,7 @@ def _add_options(func): @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) +@add_options(SITE_OUTPUT_TYPE_OPTIONS) def weave( parameter, output, @@ -249,12 +242,13 @@ def weave( site_limit, dry, yes, + site_output_type, ): """ Get parameter timeseries or summary data """ # instantiate config and set up parameter - config = setup_config(parameter, bbox, wkt, county, site_limit, dry) + config = setup_config(parameter, bbox, wkt, county, site_limit, dry, site_output_type) config.parameter = parameter # output type @@ -402,7 +396,7 @@ def sources(sources, bbox, wkt, county): click.echo(s) -def setup_config(tag, bbox, wkt, county, site_limit, dry): +def setup_config(tag, bbox, wkt, county, site_limit, dry, site_output_type="csv"): config = Config() if county: click.echo(f"Getting {tag} for county {county}") @@ -421,6 +415,8 @@ def setup_config(tag, bbox, wkt, county, site_limit, dry): config.site_limit = None config.dry = dry + config.output_site_file_type = site_output_type + return config diff --git a/setup.py b/setup.py index f06990d..05b81a8 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.8.2", + version="0.9.0", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 60ef303437b5beae83564cb82a4315dfa510a59d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 15 Apr 2025 17:28:00 -0700 Subject: [PATCH 102/143] geojson output for sites and summary --- backend/config.py | 4 +- backend/unifier.py | 4 +- frontend/cli.py | 10 +-- tests/test_cli/__init__.py | 25 ++++++ tests/test_sources/__init__.py | 141 ++++++++++++++++++++++++++------- 5 files changed, 147 insertions(+), 37 deletions(-) diff --git a/backend/config.py b/backend/config.py index d59a8ea..de2bc64 100644 --- a/backend/config.py +++ b/backend/config.py @@ -142,7 +142,7 @@ class Config(Loggable): output_summary: bool = False output_timeseries_unified: bool = False output_timeseries_separated: bool = False - output_site_file_type: str = "csv" + site_file_type: str = "csv" latest_water_level_only: bool = False @@ -415,7 +415,7 @@ def _report_attributes(title, attrs): "output_summary", "output_timeseries_unified", "output_timeseries_separated", - "output_site_file_type", + "site_file_type", "output_horizontal_datum", "output_elevation_units", ), diff --git a/backend/unifier.py b/backend/unifier.py index 4539f11..9e4e653 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -100,9 +100,9 @@ def _perister_factory(config): persister_klass = CSVPersister if config.use_cloud_storage: persister_klass = CloudStoragePersister - elif config.output_site_file_type == "csv": + elif config.site_file_type == "csv": persister_klass = CSVPersister - elif config.output_site_file_type == "geojson": + elif config.site_file_type == "geojson": persister_klass = GeoJSONPersister return persister_klass() diff --git a/frontend/cli.py b/frontend/cli.py index c0c4f15..cd9e180 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -188,7 +188,7 @@ def cli(): SITE_OUTPUT_TYPE_OPTIONS = [ click.option( - "--site-output-type", + "--site-file-type", type=click.Choice(["csv", "geojson"]), default="csv", help="Output file format for sites (csv or geoson). Default is csv", @@ -242,13 +242,13 @@ def weave( site_limit, dry, yes, - site_output_type, + site_file_type, ): """ Get parameter timeseries or summary data """ # instantiate config and set up parameter - config = setup_config(parameter, bbox, wkt, county, site_limit, dry, site_output_type) + config = setup_config(parameter, bbox, wkt, county, site_limit, dry, site_file_type) config.parameter = parameter # output type @@ -396,7 +396,7 @@ def sources(sources, bbox, wkt, county): click.echo(s) -def setup_config(tag, bbox, wkt, county, site_limit, dry, site_output_type="csv"): +def setup_config(tag, bbox, wkt, county, site_limit, dry, site_file_type="csv"): config = Config() if county: click.echo(f"Getting {tag} for county {county}") @@ -415,7 +415,7 @@ def setup_config(tag, bbox, wkt, county, site_limit, dry, site_output_type="csv" config.site_limit = None config.dry = dry - config.output_site_file_type = site_output_type + config.site_file_type = site_file_type return config diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 84923b8..0903403 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -50,6 +50,7 @@ def _test_weave( self, parameter: str, output: str, + site_output_type: str = "csv", site_limit: int = 4, start_date: str = "1990-08-10", end_date: str = "1990-08-11", @@ -92,6 +93,13 @@ def _test_weave( end_date, ] + if site_output_type == "csv": + arguments.append("--site-file-type") + arguments.append(site_output_type) + elif site_output_type == "geojson": + arguments.append("--site-file-type") + arguments.append(site_output_type) + if geographic_filter_name and geographic_filter_value: arguments.extend([f"--{geographic_filter_name}", geographic_filter_value]) @@ -115,6 +123,7 @@ def _test_weave( 6. The start date is set correctly 7. The end date is set correctly 8. The geographic filter is set correctly + 9. The site output type is set correctly """ config = result.return_value @@ -166,6 +175,12 @@ def _test_weave( else: assert getattr(config, _geographic_filter_name) == "" + # 9 + if site_output_type == "csv": + assert getattr(config, "site_file_type") == "csv" + elif site_output_type == "geojson": + assert getattr(config, "site_file_type") == "geojson" + def test_weave_summary(self): self._test_weave(parameter=WATERLEVELS, output="summary") @@ -175,6 +190,16 @@ def test_weave_timeseries_unified(self): def test_weave_timeseries_separated(self): self._test_weave(parameter=WATERLEVELS, output="timeseries_separated") + def test_weave_csv(self): + self._test_weave( + parameter=WATERLEVELS, output="summary", site_output_type="csv" + ) + + def test_weave_geojson(self): + self._test_weave( + parameter=WATERLEVELS, output="summary", site_output_type="geojson" + ) + def test_weave_bbox(self): self._test_weave( parameter=WATERLEVELS, output="summary", bbox="32.0,-106.0,36.0,-102.0" diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index a18dd94..d020b90 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -1,3 +1,4 @@ +import json from logging import shutdown as logger_shutdown from pathlib import Path import pytest @@ -10,8 +11,14 @@ from backend.unifier import unify_analytes, unify_waterlevels from tests import recursively_clean_directory -SUMMARY_RECORD_HEADERS = list(SummaryRecord.keys) -SITE_RECORD_HEADERS = list(SiteRecord.keys) +EXCLUDED_GEOJSON_KEYS = ["latitude", "longitude", "elevation"] + +SUMMARY_RECORD_CSV_HEADERS = list(SummaryRecord.keys) +SUMMARY_RECORD_GEOJSON_KEYS = [k for k in SUMMARY_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS] + +SITE_RECORD_CSV_HEADERS = list(SiteRecord.keys) +SITE_RECORD_GEOJSON_KEYS = [k for k in SITE_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS] + PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) @@ -68,18 +75,57 @@ def _run_unifier(self): else: unify_analytes(self.config) - def _check_sites_file(self): - sites_file = Path(self.config.output_path) / "sites.csv" - assert sites_file.exists() + def _check_summary_file(self, extension: str): + summary_file = Path(self.config.output_path) / f"summary.{extension}" + assert summary_file.exists() - with open(sites_file, "r") as f: - headers = f.readline().strip().split(",") - assert headers == SITE_RECORD_HEADERS + if extension == "csv": + with open(summary_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SUMMARY_RECORD_CSV_HEADERS + + # +1 for the header + with open(summary_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + elif extension == "geojson": + with open(summary_file, "r") as f: + summary = json.load(f) + assert len(summary["features"]) == self.site_limit + assert summary["type"] == "FeatureCollection" + for feature in summary["features"]: + assert feature["geometry"]["type"] == "Point" + assert len(feature["geometry"]["coordinates"]) == 3 + assert sorted(feature["properties"].keys()) == sorted(SUMMARY_RECORD_GEOJSON_KEYS) + assert summary["features"][0]["type"] == "Feature" + else: + raise ValueError(f"Unsupported file extension: {extension}") - # +1 for the header - with open(sites_file, "r") as f: - lines = f.readlines() - assert len(lines) == self.site_limit + 1 + def _check_sites_file(self, extension: str): + sites_file = Path(self.config.output_path) / f"sites.{extension}" + assert sites_file.exists() + + if extension == "csv": + with open(sites_file, "r") as f: + headers = f.readline().strip().split(",") + assert headers == SITE_RECORD_CSV_HEADERS + + # +1 for the header + with open(sites_file, "r") as f: + lines = f.readlines() + assert len(lines) == self.site_limit + 1 + elif extension == "geojson": + with open(sites_file, "r") as f: + sites = json.load(f) + assert len(sites["features"]) == self.site_limit + assert sites["type"] == "FeatureCollection" + for feature in sites["features"]: + assert feature["geometry"]["type"] == "Point" + assert len(feature["geometry"]["coordinates"]) == 3 + assert sorted(feature["properties"].keys()) == sorted(SITE_RECORD_GEOJSON_KEYS) + assert sites["features"][0]["type"] == "Feature" + else: + raise ValueError(f"Unsupported file extension: {extension}") def _check_timeseries_file(self, timeseries_dir, timeseries_file_name): timeseries_file = Path(timeseries_dir) / timeseries_file_name @@ -94,7 +140,7 @@ def test_health(self): source = self.config.all_site_sources()[0][0] assert source.health() - def test_summary(self): + def test_summary_csv(self): # Arrange -------------------------------------------------------------- self.config.output_summary = True self.config.report() @@ -103,21 +149,21 @@ def test_summary(self): self._run_unifier() # Assert --------------------------------------------------------------- - # Check the summary file - summary_file = Path(self.config.output_path) / "summary.csv" - assert summary_file.exists() + self._check_summary_file("csv") - # Check the column headers - with open(summary_file, "r") as f: - headers = f.readline().strip().split(",") - assert headers == SUMMARY_RECORD_HEADERS + def test_summary_geojson(self): + # Arrange -------------------------------------------------------------- + self.config.output_summary = True + self.config.site_file_type = "geojson" + self.config.report() - # +1 for the header - with open(summary_file, "r") as f: - lines = f.readlines() - assert len(lines) == self.site_limit + 1 + # Act ------------------------------------------------------------------ + self._run_unifier() - def test_timeseries_unified(self): + # Assert --------------------------------------------------------------- + self._check_summary_file("geojson") + + def test_timeseries_unified_csv(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_unified = True self.config.report() @@ -127,16 +173,54 @@ def test_timeseries_unified(self): # Assert --------------------------------------------------------------- # Check the sites file - self._check_sites_file() + self._check_sites_file("csv") # Check the timeseries file timeseries_dir = Path(self.config.output_path) timeseries_file_name = "timeseries_unified.csv" self._check_timeseries_file(timeseries_dir, timeseries_file_name) - def test_timeseries_separated(self): + def test_timeseries_unified_geojson(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_unified = True + self.config.site_file_type = "geojson" + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file("geojson") + + # Check the timeseries file + timeseries_dir = Path(self.config.output_path) + timeseries_file_name = "timeseries_unified.csv" + self._check_timeseries_file(timeseries_dir, timeseries_file_name) + + def test_timeseries_separated_csv(self): + # Arrange -------------------------------------------------------------- + self.config.output_timeseries_separated = True + self.config.report() + + # Act ------------------------------------------------------------------ + self._run_unifier() + + # Assert --------------------------------------------------------------- + # Check the sites file + self._check_sites_file("csv") + + # Check the timeseries files + timeseries_dir = Path(self.config.output_path) / "timeseries" + assert len([f for f in timeseries_dir.iterdir()]) == self.site_limit + + for timeseries_file in timeseries_dir.iterdir(): + self._check_timeseries_file(timeseries_dir, timeseries_file.name) + + def test_timeseries_separated_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_separated = True + self.config.site_file_type = "geojson" self.config.report() # Act ------------------------------------------------------------------ @@ -144,7 +228,7 @@ def test_timeseries_separated(self): # Assert --------------------------------------------------------------- # Check the sites file - self._check_sites_file() + self._check_sites_file("geojson") # Check the timeseries files timeseries_dir = Path(self.config.output_path) / "timeseries" @@ -152,6 +236,7 @@ def test_timeseries_separated(self): for timeseries_file in timeseries_dir.iterdir(): self._check_timeseries_file(timeseries_dir, timeseries_file.name) + @pytest.mark.skip(reason="test_date_range not implemented yet") def test_date_range(self): From 0f1d7272925609d543fcf5b504a7cb07a2e69ae1 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 16 Apr 2025 00:31:30 +0000 Subject: [PATCH 103/143] Formatting changes --- backend/persister.py | 21 ++++++++---- backend/record.py | 60 +++++++++++++++++----------------- backend/unifier.py | 39 +++++++++++++++------- tests/test_sources/__init__.py | 17 +++++++--- 4 files changed, 85 insertions(+), 52 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index d7fb56e..09d32e6 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -17,7 +17,7 @@ import io import os import shutil -from pprint import pprint +from pprint import pprint import json import pandas as pd @@ -37,6 +37,7 @@ class BasePersister(Loggable): Class to persist the data to a file or cloud storage. If persisting to a file, the output directory is created by config._make_output_path() """ + add_extension: str = "csv" def __init__(self): @@ -107,13 +108,14 @@ def add_extension(self, path: str): def _write(self, path: str, records): raise NotImplementedError - + def _dump_timeseries(self, path: str, timeseries: list): raise NotImplementedError def _make_output_directory(self, output_directory: str): os.mkdir(output_directory) + def write_csv_file(path, func, records): with open(path, "w", newline="") as f: func(csv.writer(f), records) @@ -221,19 +223,25 @@ def _write(self, path: str, records: list): "type": "Feature", "geometry": { "type": "Point", - "coordinates": [record.get("longitude"), record.get("latitude"), record.get("elevation")], + "coordinates": [ + record.get("longitude"), + record.get("latitude"), + record.get("elevation"), + ], + }, + "properties": { + k: record.get(k) + for k in record.keys + if k not in ["latitude", "longitude", "elevation"] }, - "properties": {k: record.get(k) for k in record.keys if k not in ["latitude", "longitude", "elevation"]}, } for record in records ] feature_collection["features"].extend(features) - with open(path, "w") as f: json.dump(feature_collection, f, indent=4) - def _get_gdal_type(self, dtype): """ Map pandas dtypes to GDAL-compatible types for the schema. @@ -249,6 +257,7 @@ def _get_gdal_type(self, dtype): else: return "str" # Default to string for unsupported types + # class ST2Persister(BasePersister): # extension = "st2" # diff --git a/backend/record.py b/backend/record.py index 3185363..61d9769 100644 --- a/backend/record.py +++ b/backend/record.py @@ -30,37 +30,37 @@ def to_csv(self): def __init__(self, payload): self._payload = payload - + def get(self, attr): - # v = self._payload.get(attr) - # if v is None and self.defaults: - # v = self.defaults.get(attr) - v = self.__getattr__(attr) - - field_sigfigs = [ - ("elevation", 2), - ("well_depth", 2), - ("latitude", 6), - ("longitude", 6), - ("min", 2), - ("max", 2), - ("mean", 2), - ] - - # both analyte and water level tables have the same fields, but the - # rounding should only occur for water level tables - if isinstance(self, WaterLevelRecord): - field_sigfigs.append((PARAMETER_VALUE, 2)) - - for field, sigfigs in field_sigfigs: - if v is not None and field == attr: - try: - v = round(v, sigfigs) - except TypeError as e: - print(field, attr) - raise e - break - return v + # v = self._payload.get(attr) + # if v is None and self.defaults: + # v = self.defaults.get(attr) + v = self.__getattr__(attr) + + field_sigfigs = [ + ("elevation", 2), + ("well_depth", 2), + ("latitude", 6), + ("longitude", 6), + ("min", 2), + ("max", 2), + ("mean", 2), + ] + + # both analyte and water level tables have the same fields, but the + # rounding should only occur for water level tables + if isinstance(self, WaterLevelRecord): + field_sigfigs.append((PARAMETER_VALUE, 2)) + + for field, sigfigs in field_sigfigs: + if v is not None and field == attr: + try: + v = round(v, sigfigs) + except TypeError as e: + print(field, attr) + raise e + break + return v def to_row(self): return [self.get(k) for k in self.keys] diff --git a/backend/unifier.py b/backend/unifier.py index 9e4e653..d90705c 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -114,7 +114,9 @@ def _perister_factory(config): # persister.save(config.output_path) -def _site_wrapper(site_source, parameter_source, sites_summary_persister, timeseries_persister, config): +def _site_wrapper( + site_source, parameter_source, sites_summary_persister, timeseries_persister, config +): try: # TODO: fully develop checks/discoveries below @@ -203,16 +205,25 @@ def _site_wrapper(site_source, parameter_source, sites_summary_persister, timese # num_sites_to_remove from the length of the list # to remove the last num_sites_to_remove sites if use_summarize: - sites_summary_persister.records = sites_summary_persister.records[ - : len(sites_summary_persister.records) - num_sites_to_remove - ] + sites_summary_persister.records = ( + sites_summary_persister.records[ + : len(sites_summary_persister.records) + - num_sites_to_remove + ] + ) else: - timeseries_persister.timeseries = timeseries_persister.timeseries[ - : len(timeseries_persister.timeseries) - num_sites_to_remove - ] - sites_summary_persister.sites = sites_summary_persister.sites[ - : len(sites_summary_persister.sites) - num_sites_to_remove - ] + timeseries_persister.timeseries = ( + timeseries_persister.timeseries[ + : len(timeseries_persister.timeseries) + - num_sites_to_remove + ] + ) + sites_summary_persister.sites = ( + sites_summary_persister.sites[ + : len(sites_summary_persister.sites) + - num_sites_to_remove + ] + ) break except BaseException: @@ -230,7 +241,13 @@ def _unify_parameter( sites_summary_persister = _perister_factory(config) timeseries_persister = CSVPersister() for site_source, parameter_source in sources: - _site_wrapper(site_source, parameter_source, sites_summary_persister, timeseries_persister, config) + _site_wrapper( + site_source, + parameter_source, + sites_summary_persister, + timeseries_persister, + config, + ) if config.output_summary: sites_summary_persister.dump_summary(config.output_path) diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index d020b90..78886bc 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -14,10 +14,14 @@ EXCLUDED_GEOJSON_KEYS = ["latitude", "longitude", "elevation"] SUMMARY_RECORD_CSV_HEADERS = list(SummaryRecord.keys) -SUMMARY_RECORD_GEOJSON_KEYS = [k for k in SUMMARY_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS] +SUMMARY_RECORD_GEOJSON_KEYS = [ + k for k in SUMMARY_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS +] SITE_RECORD_CSV_HEADERS = list(SiteRecord.keys) -SITE_RECORD_GEOJSON_KEYS = [k for k in SITE_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS] +SITE_RECORD_GEOJSON_KEYS = [ + k for k in SITE_RECORD_CSV_HEADERS if k not in EXCLUDED_GEOJSON_KEYS +] PARAMETER_RECORD_HEADERS = list(ParameterRecord.keys) @@ -96,7 +100,9 @@ def _check_summary_file(self, extension: str): for feature in summary["features"]: assert feature["geometry"]["type"] == "Point" assert len(feature["geometry"]["coordinates"]) == 3 - assert sorted(feature["properties"].keys()) == sorted(SUMMARY_RECORD_GEOJSON_KEYS) + assert sorted(feature["properties"].keys()) == sorted( + SUMMARY_RECORD_GEOJSON_KEYS + ) assert summary["features"][0]["type"] == "Feature" else: raise ValueError(f"Unsupported file extension: {extension}") @@ -122,7 +128,9 @@ def _check_sites_file(self, extension: str): for feature in sites["features"]: assert feature["geometry"]["type"] == "Point" assert len(feature["geometry"]["coordinates"]) == 3 - assert sorted(feature["properties"].keys()) == sorted(SITE_RECORD_GEOJSON_KEYS) + assert sorted(feature["properties"].keys()) == sorted( + SITE_RECORD_GEOJSON_KEYS + ) assert sites["features"][0]["type"] == "Feature" else: raise ValueError(f"Unsupported file extension: {extension}") @@ -236,7 +244,6 @@ def test_timeseries_separated_geojson(self): for timeseries_file in timeseries_dir.iterdir(): self._check_timeseries_file(timeseries_dir, timeseries_file.name) - @pytest.mark.skip(reason="test_date_range not implemented yet") def test_date_range(self): From 34eeddc0064b51b6c43226f30e074b473c7f7537 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 16 Apr 2025 07:48:43 -0700 Subject: [PATCH 104/143] Comment out cache because of errors --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 5ea32ac..8cc7a39 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -23,7 +23,7 @@ jobs: uses: actions/setup-python@v3 with: python-version: "3.10" - cache: "pip" + # cache: "pip" - name: Install dependencies run: | python -m pip install --upgrade pip From a047ba1dda1dcdc8db864801a99fd6d5ddac6153 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 16 Apr 2025 08:51:54 -0700 Subject: [PATCH 105/143] add well depth units to sites table output --- backend/record.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/record.py b/backend/record.py index 61d9769..0f8e34b 100644 --- a/backend/record.py +++ b/backend/record.py @@ -161,6 +161,7 @@ class SiteRecord(BaseRecord): "formation", "aquifer", "well_depth", + "well_depth_units", ) defaults: dict = { @@ -178,6 +179,7 @@ class SiteRecord(BaseRecord): "formation": "", "aquifer": "", "well_depth": None, + "well_depth_units": FEET, } From 0edebcd426bb327bd854fb206fec3f6b9df8dcff Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 16 Apr 2025 08:52:12 -0700 Subject: [PATCH 106/143] mypy fixes --- backend/persister.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 09d32e6..9cae709 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -38,7 +38,7 @@ class BasePersister(Loggable): If persisting to a file, the output directory is created by config._make_output_path() """ - add_extension: str = "csv" + extension: str = "csv" def __init__(self): self.records = [] @@ -213,11 +213,7 @@ class GeoJSONPersister(BasePersister): extension = "geojson" def _write(self, path: str, records: list): - feature_collection = { - "type": "FeatureCollection", - "features": [], - } - + features = [ { "type": "Feature", @@ -237,7 +233,10 @@ def _write(self, path: str, records: list): } for record in records ] - feature_collection["features"].extend(features) + feature_collection = { + "type": "FeatureCollection", + "features": features + } with open(path, "w") as f: json.dump(feature_collection, f, indent=4) From dd47369b86672146bf8adf7ab63f285c213efa2f Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 16 Apr 2025 16:28:38 +0000 Subject: [PATCH 107/143] Formatting changes --- backend/persister.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 9cae709..2288dc3 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -213,7 +213,7 @@ class GeoJSONPersister(BasePersister): extension = "geojson" def _write(self, path: str, records: list): - + features = [ { "type": "Feature", @@ -233,10 +233,7 @@ def _write(self, path: str, records: list): } for record in records ] - feature_collection = { - "type": "FeatureCollection", - "features": features - } + feature_collection = {"type": "FeatureCollection", "features": features} with open(path, "w") as f: json.dump(feature_collection, f, indent=4) From 1a0511dc49feb7f87dc427896c53d5c71fe65be7 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 16 Apr 2025 09:57:00 -0700 Subject: [PATCH 108/143] work on caching in workflow --- .github/workflows/cicd.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 77ce402..05fe71e 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -18,10 +18,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: "3.10" cache: "pip" From d498f65779b769d7a4d7b86e06b45ddfdeb5ecad Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 16 Apr 2025 11:25:08 -0600 Subject: [PATCH 109/143] add geoserver persister --- backend/__init__.py | 7 ++ backend/config.py | 74 ++++++++++++++------ backend/persister.py | 140 ++++++++++++++++++++++++++++++++----- backend/record.py | 75 +++++++++++--------- backend/unifier.py | 12 ++-- frontend/cli.py | 35 +++++++--- frontend/cronjob_worker.sh | 2 +- 7 files changed, 258 insertions(+), 87 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index e69de29..2034a38 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class OutputFormat(str, Enum): + GEOJSON = "geojson" + CSV = "csv" + GEOSERVER = "geoserver" \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index 306a82c..70ddc47 100644 --- a/backend/config.py +++ b/backend/config.py @@ -17,10 +17,12 @@ import sys import time from datetime import datetime, timedelta +from enum import Enum import shapely.wkt from backend.logging import Loggable +from . import OutputFormat from .bounding_polygons import get_county_polygon from .connectors.nmbgmr.source import ( @@ -90,6 +92,9 @@ def get_source(source): return klass() + + + class Config(Loggable): site_limit: int = 0 dry: bool = False @@ -138,14 +143,22 @@ class Config(Loggable): analyte_output_units: str = MILLIGRAMS_PER_LITER waterlevel_output_units: str = FEET - use_csv: bool = True - use_geojson: bool = False + # use_csv: bool = True + # use_geojson: bool = False - def __init__(self, model=None, payload=None): + output_format: OutputFormat = OutputFormat.CSV + + yes: bool = True + + def __init__(self, model=None, payload=None, path=None): # need to initialize logger super().__init__() self.bbox = {} + if path: + payload = self._load_from_yaml(path) + + self._payload = payload if model: if model.wkt: self.wkt = model.wkt @@ -159,22 +172,39 @@ def __init__(self, model=None, payload=None): for s in SOURCE_KEYS: setattr(self, f"use_source_{s}", s in model.sources) elif payload: - self.wkt = payload.get("wkt", "") - self.county = payload.get("county", "") - self.output_summary = payload.get("output_summary", False) - self.output_timeseries_unified = payload.get( - "output_timeseries_unified", False - ) - self.output_timeseries_separated = payload.get( - "output_timeseries_separated", False - ) - self.output_name = payload.get("output_name", "output") - self.start_date = payload.get("start_date", "") - self.end_date = payload.get("end_date", "") - self.parameter = payload.get("parameter", "") - - for s in SOURCE_KEYS: - setattr(self, f"use_source_{s}", s in payload.get("sources", [])) + sources = payload.get("sources", []) + if sources: + for sk in SOURCE_KEYS: + value = sources.get(sk) + if value is not None: + setattr(self, f"use_source_{sk}", value) + + for attr in ("wkt", "county", "bbox", + "output_summary", + "output_timeseries_unified", + "output_timeseries_separated", + "start_date", + "end_date", + "parameter", + "output_name", + "dry", + "latest_water_level_only", + "output_format", + "use_cloud_storage", + "yes"): + if attr in payload: + setattr(self, attr, payload[attr]) + + def _load_from_yaml(self, path): + import yaml + path = os.path.abspath(path) + if os.path.exists(path): + self.log(f"Loading config from {path}") + with open(path, "r") as f: + data = yaml.safe_load(f) + return data + else: + self.warn(f"Config file {path} not found") def finalize(self): self._update_output_units() @@ -337,6 +367,8 @@ def _report_attributes(title, attrs): "output_timeseries_separated", "output_horizontal_datum", "output_elevation_units", + "use_cloud_storage", + "output_format" ), ) @@ -455,5 +487,7 @@ def end_dt(self): def output_path(self): return os.path.join(self.output_dir, f"{self.output_name}") - + def get(self, attr): + if self._payload: + return self._payload.get(attr) # ============= EOF ============================================= diff --git a/backend/persister.py b/backend/persister.py index 38e8493..24498c6 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -20,7 +20,9 @@ import pandas as pd import geopandas as gpd +import psycopg2 +from backend import OutputFormat from backend.logging import Loggable try: @@ -38,10 +40,11 @@ class BasePersister(Loggable): extension: str # output_id: str - def __init__(self): + def __init__(self, config=None): self.records = [] self.timeseries = [] self.sites = [] + self.config = config super().__init__() # self.keys = record_klass.keys @@ -105,8 +108,14 @@ def add_extension(self, path: str): if not self.extension: raise NotImplementedError - if not path.endswith(self.extension): - path = f"{path}.{self.extension}" + ext = self.extension + if self.config.output_format == OutputFormat.CSV: + ext = "csv" + elif self.config.output_format == OutputFormat.GEOJSON: + ext = "geojson" + + if not path.endswith(ext): + path = f"{path}.{ext}" return path def _write(self, path: str, records): @@ -124,9 +133,9 @@ def write_file(path, func, records): func(csv.writer(f), records) -def write_memory(path, func, records): - f = io.StringIO() - func(csv.writer(f), records) +def write_memory(func, records, output_format=None): + f = io.BytesIO() + func(f, records, output_format) return f.getvalue() @@ -140,19 +149,113 @@ def dump_timeseries_unified(writer, timeseries): writer.writerow(record.to_row()) -def dump_sites(writer, records): - for i, site in enumerate(records): - if i == 0: - writer.writerow(site.keys) - writer.writerow(site.to_row()) +def dump_sites(filehandle, records, output_format): + if output_format == OutputFormat.CSV: + writer = csv.writer(filehandle) + for i, site in enumerate(records): + if i == 0: + writer.writerow(site.keys) + writer.writerow(site.to_row()) + else: + r0 = records[0] + df = pd.DataFrame([r.to_row() for r in records], columns=r0.keys) + + gdf = gpd.GeoDataFrame( + df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326" + ) + gdf.to_file(filehandle, driver="GeoJSON") + + +class GeoServerPersister(BasePersister): + def __init__(self, *args, **kwargs): + super(GeoServerPersister, self).__init__(*args, **kwargs) + self._connection = None + self._connect() + + def dump_sites(self, path: str): + if self.sites: + db = self.config.get('geoserver').get('db') + dbname = db.get('db_name') + self.log(f"dumping sites to {dbname}") + self._write_to_db(self.sites) + else: + self.log("no sites to dump", fg="red") + + def _connect(self): + """ + Connect to a PostgreSQL database on Cloud SQL. + """ + + db = self.config.get('geoserver').get('db') + try: + self._connection = psycopg2.connect( + dbname=db.get('dbname'), + user=db.get('user'), + password=db.get('password'), + host=db.get('host'), + port=db.get('port'), + ) + self.log("Successfully connected to the database.") + except psycopg2.Error as e: + self.log(f"Failed to connect to the database: {e}", fg="red") + + + def _write_to_db(self, records: list): + """ + Write records to a PostgreSQL database. + """ + # if not self._connection: + # self._connect() + + + sources = {r.source for r in records} + with self._connection.cursor() as cursor: + for source in sources: + # upsert sources + sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" + cursor.execute(sql, (source,)) + self._connection.commit() + + with self._connection.cursor() as cursor: + chunk_size = 100 # Adjust chunk size as needed + # Process records in chunks + keys= ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + for i in range(0, len(records), chunk_size): + chunk = records[i:i + chunk_size] + print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") + with self._connection.cursor() as cursor: + sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) + VALUES (%s, %s, public.ST_SetSRID(public.ST_MakePoint(%s, %s), 4326), %s) + ON CONFLICT (name) DO UPDATE SET properties = EXCLUDED.properties;""" + values = [ + (record.name, record.to_dict(keys), record.longitude, record.latitude, record.source) + for record in chunk + ] + cursor.executemany(sql, values) + self._connection.commit() + # for record in records: + # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) VALUES (%s,%s, + # public.ST_SetSRID(public.ST_MakePoint(%s,%s), 4326), + # %s)""" + # # print(record) + # values = [record.name, record.properties, + # record.longitude, record.latitude, record.source] + # print(values) + # cursor.execute(sql, values) + # + # self._connection.commit() + + + + class CloudStoragePersister(BasePersister): extension = "csv" _content: list - def __init__(self): - super(CloudStoragePersister, self).__init__() + def __init__(self, *args, **kwargs): + super(CloudStoragePersister, self).__init__(*args, **kwargs) self._content = [] def finalize(self, output_name: str): @@ -177,22 +280,27 @@ def finalize(self, output_name: str): blob.upload_from_string(zip_buffer.getvalue()) else: path, cnt = self._content[0] + + #this is a hack. need a better way to specify the output path + dirname = os.path.basename(os.path.dirname(path)) + path = os.path.join(dirname, os.path.basename(path)) + blob = bucket.blob(path) - blob.upload_from_string(cnt) + blob.upload_from_string(cnt, content_type="application/json" if self.config.output_format == OutputFormat.GEOJSON else "text/csv") def _make_output_directory(self, output_directory: str): # prevent making root directory, because we are not saving to disk pass def _write(self, path: str, records: list): - content = write_memory(path, dump_sites, records) + content = write_memory(dump_sites, records, self.config.output_format) self._add_content(path, content) def _add_content(self, path: str, content: str): self._content.append((path, content)) def _dump_timeseries_unified(self, path: str, timeseries: list): - content = write_memory(path, dump_timeseries_unified, timeseries) + content = write_memory(dump_timeseries_unified, timeseries) self._add_content(path, content) diff --git a/backend/record.py b/backend/record.py index 8772edd..44ee1f7 100644 --- a/backend/record.py +++ b/backend/record.py @@ -31,44 +31,51 @@ def to_csv(self): def __init__(self, payload): self._payload = payload - def to_row(self): - - def get(attr): - # v = self._payload.get(attr) - # if v is None and self.defaults: - # v = self.defaults.get(attr) - v = self.__getattr__(attr) - - field_sigfigs = [ - ("elevation", 2), - ("well_depth", 2), - ("latitude", 6), - ("longitude", 6), - ("min", 2), - ("max", 2), - ("mean", 2), - ] - - # both analyte and water level tables have the same fields, but the - # rounding should only occur for water level tables - if isinstance(self, WaterLevelRecord): - field_sigfigs.append((PARAMETER_VALUE, 2)) - - for field, sigfigs in field_sigfigs: - if v is not None and field == attr: - try: - v = round(v, sigfigs) - except TypeError as e: - print(field, attr) - raise e - break - return v - - return [get(k) for k in self.keys] + def to_row(self, keys=None): + if keys is None: + keys = self.keys + + return [self._get_sigfig_formatted_value(k) for k in keys] + + def to_dict(self, keys=None): + if keys is None: + keys = self.keys + return {k: self._get_sigfig_formatted_value(k) for k in keys} def update(self, **kw): self._payload.update(kw) + def _get_sigfig_formatted_value(self, attr): + # v = self._payload.get(attr) + # if v is None and self.defaults: + # v = self.defaults.get(attr) + v = self.__getattr__(attr) + + field_sigfigs = [ + ("elevation", 2), + ("well_depth", 2), + ("latitude", 6), + ("longitude", 6), + ("min", 2), + ("max", 2), + ("mean", 2), + ] + + # both analyte and water level tables have the same fields, but the + # rounding should only occur for water level tables + if isinstance(self, WaterLevelRecord): + field_sigfigs.append((PARAMETER_VALUE, 2)) + + for field, sigfigs in field_sigfigs: + if v is not None and field == attr: + try: + v = round(v, sigfigs) + except TypeError as e: + print(field, attr) + raise e + break + return v + def __getattr__(self, attr): v = self._payload.get(attr) if v is None and self.defaults: diff --git a/backend/unifier.py b/backend/unifier.py index f31fa99..7fc1d2d 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -15,9 +15,9 @@ # =============================================================================== import shapely -from backend.config import Config, get_source +from backend.config import Config, get_source, OutputFormat from backend.logging import setup_logging -from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister +from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister, GeoServerPersister from backend.source import BaseSiteSource @@ -95,12 +95,14 @@ def _perister_factory(config): persister_klass = CSVPersister if config.use_cloud_storage: persister_klass = CloudStoragePersister - elif config.use_csv: + elif config.output_format == OutputFormat.CSV: persister_klass = CSVPersister - elif config.use_geojson: + elif config.output_format == OutputFormat.GEOJSON: persister_klass = GeoJSONPersister + elif config.output_format == OutputFormat.GEOSERVER: + persister_klass = GeoServerPersister - return persister_klass() + return persister_klass(config) # def _unify_wrapper(config, func): diff --git a/frontend/cli.py b/frontend/cli.py index cf038ac..ac3484b 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -192,6 +192,14 @@ def cli(): ) ] +CONFIG_OPTIONS = [ + click.option( + "--config", + type=click.Path(exists=True), + default=None, + help="Path to config file. Default is config.yaml", + ), +] def add_options(options): def _add_options(func): @@ -208,6 +216,7 @@ def _add_options(func): type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) +@add_options(CONFIG_OPTIONS) @add_options(OUTPUT_OPTIONS) @add_options(PERSISTER_OPTIONS) @add_options(DT_OPTIONS) @@ -216,6 +225,7 @@ def _add_options(func): @add_options(DEBUG_OPTIONS) def weave( weave, + config_path, output, output_dir, start_date, @@ -241,7 +251,7 @@ def weave( """ parameter = weave # instantiate config and set up parameter - config = setup_config(f"{parameter}", bbox, county, site_limit, dry) + config = setup_config(f"{parameter}", config_path, bbox, county, site_limit, dry) config.parameter = parameter # # make sure config.output_name is properly set @@ -337,11 +347,13 @@ def weave( @cli.command() +@add_options(CONFIG_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -def wells(bbox, county, +def sites(config, + bbox, county, output_dir, no_bernco, no_bor, @@ -361,22 +373,23 @@ def wells(bbox, county, Get locations """ - config = setup_config("sites", bbox, county, site_limit, dry) + config = setup_config("sites", config, bbox, county, site_limit, dry) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", - "wqp"] - lcs = locals() - for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + "wqp", "nmose_pod"] + if config is None: + lcs = locals() + for agency in config_agencies: + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + config.output_dir = output_dir config.sites_only = True - config.output_dir = output_dir config.finalize() # setup logging here so that the path can be set to config.output_path setup_logging(path=config.output_path) config.report() - if not yes: + if not yes and not config.yes: # prompt user to continue if not click.confirm("Do you want to continue?", default=True): return @@ -410,8 +423,8 @@ def sources(sources, bbox, county): click.echo(s) -def setup_config(tag, bbox, county, site_limit, dry): - config = Config() +def setup_config(tag, config_path, bbox, county, site_limit, dry): + config = Config(path=config_path) if county: click.echo(f"Getting {tag} for county {county}") config.county = county diff --git a/frontend/cronjob_worker.sh b/frontend/cronjob_worker.sh index 4a3925f..46b086a 100644 --- a/frontend/cronjob_worker.sh +++ b/frontend/cronjob_worker.sh @@ -1,3 +1,3 @@ -die weave \ No newline at end of file +die sites --config config.yaml \ No newline at end of file From dec648eaa954a6f98a3aa3e6b42ff5830ba5e915 Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 16 Apr 2025 11:29:20 -0600 Subject: [PATCH 110/143] refactored geoserver persister --- backend/persister.py | 79 ---------------------------- backend/persisters/__init__.py | 10 ++++ backend/persisters/geoserver.py | 93 +++++++++++++++++++++++++++++++++ backend/unifier.py | 3 +- 4 files changed, 105 insertions(+), 80 deletions(-) create mode 100644 backend/persisters/__init__.py create mode 100644 backend/persisters/geoserver.py diff --git a/backend/persister.py b/backend/persister.py index 24498c6..fa5cfd0 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -166,85 +166,6 @@ def dump_sites(filehandle, records, output_format): gdf.to_file(filehandle, driver="GeoJSON") -class GeoServerPersister(BasePersister): - def __init__(self, *args, **kwargs): - super(GeoServerPersister, self).__init__(*args, **kwargs) - self._connection = None - self._connect() - - def dump_sites(self, path: str): - if self.sites: - db = self.config.get('geoserver').get('db') - dbname = db.get('db_name') - self.log(f"dumping sites to {dbname}") - self._write_to_db(self.sites) - else: - self.log("no sites to dump", fg="red") - - def _connect(self): - """ - Connect to a PostgreSQL database on Cloud SQL. - """ - - db = self.config.get('geoserver').get('db') - try: - self._connection = psycopg2.connect( - dbname=db.get('dbname'), - user=db.get('user'), - password=db.get('password'), - host=db.get('host'), - port=db.get('port'), - ) - self.log("Successfully connected to the database.") - except psycopg2.Error as e: - self.log(f"Failed to connect to the database: {e}", fg="red") - - - def _write_to_db(self, records: list): - """ - Write records to a PostgreSQL database. - """ - # if not self._connection: - # self._connect() - - - sources = {r.source for r in records} - with self._connection.cursor() as cursor: - for source in sources: - # upsert sources - sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" - cursor.execute(sql, (source,)) - self._connection.commit() - - with self._connection.cursor() as cursor: - chunk_size = 100 # Adjust chunk size as needed - # Process records in chunks - keys= ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] - for i in range(0, len(records), chunk_size): - chunk = records[i:i + chunk_size] - print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") - with self._connection.cursor() as cursor: - sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) - VALUES (%s, %s, public.ST_SetSRID(public.ST_MakePoint(%s, %s), 4326), %s) - ON CONFLICT (name) DO UPDATE SET properties = EXCLUDED.properties;""" - values = [ - (record.name, record.to_dict(keys), record.longitude, record.latitude, record.source) - for record in chunk - ] - cursor.executemany(sql, values) - self._connection.commit() - # for record in records: - # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) VALUES (%s,%s, - # public.ST_SetSRID(public.ST_MakePoint(%s,%s), 4326), - # %s)""" - # # print(record) - # values = [record.name, record.properties, - # record.longitude, record.latitude, record.source] - # print(values) - # cursor.execute(sql, values) - # - # self._connection.commit() - diff --git a/backend/persisters/__init__.py b/backend/persisters/__init__.py new file mode 100644 index 0000000..28a0970 --- /dev/null +++ b/backend/persisters/__init__.py @@ -0,0 +1,10 @@ +# =============================================================================== +# Author: Jake Ross +# Copyright 2025 New Mexico Bureau of Geology & Mineral Resources +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# =============================================================================== + + +# ============= EOF ============================================= diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py new file mode 100644 index 0000000..10dd1c0 --- /dev/null +++ b/backend/persisters/geoserver.py @@ -0,0 +1,93 @@ +# =============================================================================== +# Author: Jake Ross +# Copyright 2025 New Mexico Bureau of Geology & Mineral Resources +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 +# =============================================================================== +import psycopg2 + +from backend.persister import BasePersister + + +class GeoServerPersister(BasePersister): + def __init__(self, *args, **kwargs): + super(GeoServerPersister, self).__init__(*args, **kwargs) + self._connection = None + self._connect() + + def dump_sites(self, path: str): + if self.sites: + db = self.config.get('geoserver').get('db') + dbname = db.get('db_name') + self.log(f"dumping sites to {dbname}") + self._write_to_db(self.sites) + else: + self.log("no sites to dump", fg="red") + + def _connect(self): + """ + Connect to a PostgreSQL database on Cloud SQL. + """ + + db = self.config.get('geoserver').get('db') + try: + self._connection = psycopg2.connect( + dbname=db.get('dbname'), + user=db.get('user'), + password=db.get('password'), + host=db.get('host'), + port=db.get('port'), + ) + self.log("Successfully connected to the database.") + except psycopg2.Error as e: + self.log(f"Failed to connect to the database: {e}", fg="red") + + + def _write_to_db(self, records: list): + """ + Write records to a PostgreSQL database. + """ + # if not self._connection: + # self._connect() + + + sources = {r.source for r in records} + with self._connection.cursor() as cursor: + for source in sources: + # upsert sources + sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" + cursor.execute(sql, (source,)) + self._connection.commit() + + with self._connection.cursor() as cursor: + chunk_size = 100 # Adjust chunk size as needed + # Process records in chunks + keys= ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + for i in range(0, len(records), chunk_size): + chunk = records[i:i + chunk_size] + print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") + with self._connection.cursor() as cursor: + sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) + VALUES (%s, %s, public.ST_SetSRID(public.ST_MakePoint(%s, %s), 4326), %s) + ON CONFLICT (name) DO UPDATE SET properties = EXCLUDED.properties;""" + values = [ + (record.name, record.to_dict(keys), record.longitude, record.latitude, record.source) + for record in chunk + ] + cursor.executemany(sql, values) + self._connection.commit() + # for record in records: + # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) VALUES (%s,%s, + # public.ST_SetSRID(public.ST_MakePoint(%s,%s), 4326), + # %s)""" + # # print(record) + # values = [record.name, record.properties, + # record.longitude, record.latitude, record.source] + # print(values) + # cursor.execute(sql, values) + # + # self._connection.commit() + + +# ============= EOF ============================================= diff --git a/backend/unifier.py b/backend/unifier.py index 7fc1d2d..311e1d2 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -17,7 +17,8 @@ from backend.config import Config, get_source, OutputFormat from backend.logging import setup_logging -from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister, GeoServerPersister +from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister +from backend.persisters.geoserver import GeoServerPersister from backend.source import BaseSiteSource From 6d9ea6f9fda7d7e80be6846e6dcbc9660a747bd9 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 16 Apr 2025 14:34:32 -0700 Subject: [PATCH 111/143] Bulk well retrieval --- backend/connectors/nmbgmr/source.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 8700a98..2b2fd62 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -82,19 +82,21 @@ def get_records(self): sites = self._execute_json_request( _make_url("locations"), params, tag="features", timeout=30 ) + pointids = [site["properties"]["point_id"] for site in sites] + wells = self._execute_json_request( + _make_url("wells"), params={"pointid": ",".join(pointids)}, tag="" + ) for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None - # site["properties"]["well_depth_units"] = FEET + pointid = site["properties"]["point_id"] + well_data = wells.get(pointid) + if well_data: + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] + site["properties"]["well_depth_units"] = FEET + else: + site["properties"]["formation"] = None + site["properties"]["well_depth"] = None + site["properties"]["well_depth_units"] = None return sites From 1bf8f3cd8b27a6dc3aefbf147763827de4e49e6f Mon Sep 17 00:00:00 2001 From: jross Date: Wed, 16 Apr 2025 16:54:29 -0600 Subject: [PATCH 112/143] added orm --- auto_worker_requirements.txt | 11 ++ backend/config.py | 4 +- backend/persisters/geoserver.py | 176 ++++++++++++++++++++++---------- 3 files changed, 137 insertions(+), 54 deletions(-) create mode 100644 auto_worker_requirements.txt diff --git a/auto_worker_requirements.txt b/auto_worker_requirements.txt new file mode 100644 index 0000000..cff7193 --- /dev/null +++ b/auto_worker_requirements.txt @@ -0,0 +1,11 @@ +flask +gunicorn +httpx +pandas +geopandas +frost_sta_client +google-cloud-storage +pytest +urllib3>=2.2.0,<3.0.0 +Geoalchemy2 +sqlalchemy \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index 70ddc47..dcab09a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -208,8 +208,10 @@ def _load_from_yaml(self, path): def finalize(self): self._update_output_units() + if self.output_format != OutputFormat.GEOSERVER: + self.update_output_name() + self.make_output_directory() - self.update_output_name() self.make_output_path() def all_site_sources(self): diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 10dd1c0..40224fa 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -5,10 +5,58 @@ # You may not use this file except in compliance with the License. # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 # =============================================================================== +import json +import os +import time +from itertools import groupby + import psycopg2 +from sqlalchemy.dialects.postgresql import JSONB, insert +from sqlalchemy.orm import declarative_base, sessionmaker, relationship from backend.persister import BasePersister +from sqlalchemy import Column, ForeignKey, create_engine, UUID, String, Integer +from geoalchemy2 import Geometry + +Base = declarative_base() +# dbname=db.get('dbname'), +# user=db.get('user'), +# password=db.get('password'), +# host=db.get('host'), +# port=db.get('port'), +def session_factory(connection: dict): + user = connection.get("user", "postgres") + password = connection.get("password", "") + host = connection.get("host", "localhost") + port = connection.get("port", 5432) + database = connection.get("dbname", "gis") + + url = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}" + engine = create_engine(url) + SessionFactory = sessionmaker(autocommit=False, autoflush=False, bind=engine) + return SessionFactory + + +class Location(Base): + __tablename__ = "tbl_location" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String) + data_source_uid = Column(String, index=True) + + properties = Column(JSONB) + geometry = Column(Geometry(geometry_type="POINT", srid=4326)) + source_slug = Column(String, ForeignKey("tbl_sources.name")) + + source = relationship("Sources", backref="locations") + + +class Sources(Base): + __tablename__ = "tbl_sources" + id = Column(Integer) + name = Column(String, primary_key=True, index=True) + class GeoServerPersister(BasePersister): def __init__(self, *args, **kwargs): @@ -29,65 +77,87 @@ def _connect(self): """ Connect to a PostgreSQL database on Cloud SQL. """ - - db = self.config.get('geoserver').get('db') - try: - self._connection = psycopg2.connect( - dbname=db.get('dbname'), - user=db.get('user'), - password=db.get('password'), - host=db.get('host'), - port=db.get('port'), - ) - self.log("Successfully connected to the database.") - except psycopg2.Error as e: - self.log(f"Failed to connect to the database: {e}", fg="red") - + sf = session_factory(self.config.get('geoserver').get('db')) + self._connection = sf() def _write_to_db(self, records: list): """ - Write records to a PostgreSQL database. + Write records to a PostgreSQL database in optimized chunks. """ - # if not self._connection: - # self._connect() + sources = {r.source for r in records} + # with self._connection.cursor() as cursor: + # # Upsert sources + # for source in sources: + # sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" + # cursor.execute(sql, (source,)) + # self._connection.commit() + with self._connection as conn: + sql = insert(Sources).values([{"name": source} for source in sources]).on_conflict_do_nothing( + index_elements=[Sources.name],) + conn.execute(sql) + chunk_size = 1000 # Larger chunk size for fewer commits + keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] - sources = {r.source for r in records} - with self._connection.cursor() as cursor: - for source in sources: - # upsert sources - sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" - cursor.execute(sql, (source,)) - self._connection.commit() - - with self._connection.cursor() as cursor: - chunk_size = 100 # Adjust chunk size as needed - # Process records in chunks - keys= ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] - for i in range(0, len(records), chunk_size): - chunk = records[i:i + chunk_size] - print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") - with self._connection.cursor() as cursor: - sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) - VALUES (%s, %s, public.ST_SetSRID(public.ST_MakePoint(%s, %s), 4326), %s) - ON CONFLICT (name) DO UPDATE SET properties = EXCLUDED.properties;""" - values = [ - (record.name, record.to_dict(keys), record.longitude, record.latitude, record.source) - for record in chunk - ] - cursor.executemany(sql, values) - self._connection.commit() - # for record in records: - # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) VALUES (%s,%s, - # public.ST_SetSRID(public.ST_MakePoint(%s,%s), 4326), - # %s)""" - # # print(record) - # values = [record.name, record.properties, - # record.longitude, record.latitude, record.source] - # print(values) - # cursor.execute(sql, values) - # - # self._connection.commit() + newrecords = [] + records = sorted(records, key=lambda r: str(r.id)) + for name, gs in groupby(records, lambda r: str(r.id)): + gs = list(gs) + n = len(gs) + # print(f"Writing {n} records for {name}") + if n>1: + if n > len({r.source for r in gs}): + print("Duplicate source name found. Skipping...", name, [(r.name, r.source) for r in gs]) + continue + newrecords.extend(gs) + # break + # pass + # print("Duplicate source name found. Skipping...", name, [r.source for r in gs]) + # break + + + for i in range(0, len(newrecords), chunk_size): + chunk = newrecords[i:i + chunk_size] + print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") + st = time.time() + + values = [ + { + "name": record.name, + "data_source_uid": record.id, + "properties": record.to_dict(keys), + "geometry": f"SRID=4326;POINT({record.longitude} {record.latitude})", + "source_slug": record.source, + } + for record in chunk + ] + + # stmt = insert(Location).values(values).on_conflict_do_nothing() + linsert = insert(Location) + stmt = linsert.values(values).on_conflict_do_update( + index_elements=[Location.data_source_uid], + set_={"properties": linsert.excluded.properties} + ) + + with self._connection as conn: + conn.execute(stmt) + conn.commit() + print('Chunk write time:', time.time() - st) + # # Pre-serialize properties to reduce processing time + # values = [ + # (record.name, json.dumps(record.to_dict(keys)), record.longitude, record.latitude, record.source) + # for record in chunk + # ] + # + # with self._connection.cursor() as cursor: + # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) + # VALUES (%s, %s, public.ST_SetSRID(public.ST_MakePoint(%s, %s), 4326), %s) + # ON CONFLICT (name) DO UPDATE SET properties = EXCLUDED.properties;""" + # cursor.executemany(sql, values) + # + # self._connection.commit() # Commit once per chunk + # print('Chunk write time:', time.time() - st) + # break # ============= EOF ============================================= From fe5600b293efd560943b6b9be8871a86c2153355 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 21 Apr 2025 09:29:53 -0600 Subject: [PATCH 113/143] added summary stats --- backend/persisters/geoserver.py | 194 +++++++++++++++++++++++++------- frontend/cli.py | 26 +++-- 2 files changed, 170 insertions(+), 50 deletions(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 40224fa..a06b2ba 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -16,7 +16,7 @@ from backend.persister import BasePersister -from sqlalchemy import Column, ForeignKey, create_engine, UUID, String, Integer +from sqlalchemy import Column, ForeignKey, create_engine, UUID, String, Integer, Float, Date, Time from geoalchemy2 import Geometry Base = declarative_base() @@ -52,6 +52,37 @@ class Location(Base): source = relationship("Sources", backref="locations") +class Summary(Base): + __tablename__ = "tbl_summary" + + id = Column(Integer, primary_key=True, index=True) + name = Column(String) + data_source_uid = Column(String, index=True) + + properties = Column(JSONB) + geometry = Column(Geometry(geometry_type="POINT", srid=4326)) + source_slug = Column(String, ForeignKey("tbl_sources.name")) + parameter_slug = Column(String, ForeignKey("tbl_parameters.name")) + + source = relationship("Sources", backref="summaries") + + value = Column(Float) + nrecords = Column(Integer) + min = Column(Float) + max = Column(Float) + mean = Column(Float) + + most_recent_value = Column(Float) + most_recent_date = Column(Date) + most_recent_time = Column(Time) + + +class Parameters(Base): + __tablename__ = "tbl_parameters" + name = Column(String, primary_key=True, index=True) + units = Column(String) + + class Sources(Base): __tablename__ = "tbl_sources" id = Column(Integer) @@ -69,10 +100,19 @@ def dump_sites(self, path: str): db = self.config.get('geoserver').get('db') dbname = db.get('db_name') self.log(f"dumping sites to {dbname}") - self._write_to_db(self.sites) + self._write_to_sites(self.sites) else: self.log("no sites to dump", fg="red") + def dump_summary(self, path: str): + if self.records: + db = self.config.get('geoserver').get('db') + dbname = db.get('db_name') + self.log(f"dumping summary to {dbname}") + self._write_to_summary(self.records) + else: + self.log("no records to dump", fg="red") + def _connect(self): """ Connect to a PostgreSQL database on Cloud SQL. @@ -80,50 +120,84 @@ def _connect(self): sf = session_factory(self.config.get('geoserver').get('db')) self._connection = sf() - def _write_to_db(self, records: list): - """ - Write records to a PostgreSQL database in optimized chunks. - """ + def _write_sources(self, records: list): sources = {r.source for r in records} - # with self._connection.cursor() as cursor: - # # Upsert sources - # for source in sources: - # sql = """INSERT INTO public.tbl_sources (name) VALUES (%s) ON CONFLICT (name) DO NOTHING""" - # cursor.execute(sql, (source,)) - # self._connection.commit() with self._connection as conn: sql = insert(Sources).values([{"name": source} for source in sources]).on_conflict_do_nothing( index_elements=[Sources.name],) conn.execute(sql) + conn.commit() - chunk_size = 1000 # Larger chunk size for fewer commits + def _write_parameters(self): + with self._connection as conn: + sql = insert(Parameters).values([{"name": self.config.parameter, + "units": self.config.analyte_output_units}]).on_conflict_do_nothing( + index_elements=[Parameters.name],) + print(sql) + conn.execute(sql) + conn.commit() + + def _write_to_summary(self, records: list): + self._write_sources(records) + self._write_parameters() + for r in records: + print(r, [r.to_dict()]) keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + def make_stmt(chunk): + values = [ + { + "name": record.location, + "data_source_uid": record.id, + "properties": record.to_dict(keys), + "geometry": f"SRID=4326;POINT({record.longitude} {record.latitude})", + "source_slug": record.source, + "parameter_slug": self.config.parameter, + "nrecords": record.nrecords, + "min": record.min, + "max": record.max, + "mean": record.mean, + "most_recent_value": record.most_recent_value, + "most_recent_date": record.most_recent_date, + "most_recent_time": record.most_recent_time, + } + for record in chunk + ] + + linsert = insert(Summary) + return linsert.values(values).on_conflict_do_update( + index_elements=[Summary.data_source_uid], + set_={"properties": linsert.excluded.properties} + ) - newrecords = [] - records = sorted(records, key=lambda r: str(r.id)) - for name, gs in groupby(records, lambda r: str(r.id)): - gs = list(gs) - n = len(gs) - # print(f"Writing {n} records for {name}") - if n>1: - if n > len({r.source for r in gs}): - print("Duplicate source name found. Skipping...", name, [(r.name, r.source) for r in gs]) - continue - newrecords.extend(gs) - # break - # pass - # print("Duplicate source name found. Skipping...", name, [r.source for r in gs]) - # break - - - for i in range(0, len(newrecords), chunk_size): - chunk = newrecords[i:i + chunk_size] + self._chunk_insert(make_stmt, records) + + def _chunk_insert(self, make_stmt, records: list, chunk_size: int = 1000): + for i in range(0, len(records), chunk_size): + chunk = records[i:i + chunk_size] print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") st = time.time() + stmt = make_stmt(chunk) + with self._connection as conn: + conn.execute(stmt) + conn.commit() + + print('Chunk write time:', time.time() - st) + + def _write_to_sites(self, records: list): + """ + Write records to a PostgreSQL database in optimized chunks. + """ + + self._write_sources(records) + + keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + chunk_size = 1000 # Larger chunk size for fewer commits + + def make_stmt(chunk): values = [ { - "name": record.name, + "name": record.location, "data_source_uid": record.id, "properties": record.to_dict(keys), "geometry": f"SRID=4326;POINT({record.longitude} {record.latitude})", @@ -131,19 +205,61 @@ def _write_to_db(self, records: list): } for record in chunk ] - - # stmt = insert(Location).values(values).on_conflict_do_nothing() linsert = insert(Location) stmt = linsert.values(values).on_conflict_do_update( index_elements=[Location.data_source_uid], set_={"properties": linsert.excluded.properties} ) + return stmt - with self._connection as conn: - conn.execute(stmt) - conn.commit() + self._chunk_insert(make_stmt, records, chunk_size) - print('Chunk write time:', time.time() - st) + # + # newrecords = [] + # records = sorted(records, key=lambda r: str(r.id)) + # for name, gs in groupby(records, lambda r: str(r.id)): + # gs = list(gs) + # n = len(gs) + # # print(f"Writing {n} records for {name}") + # if n>1: + # if n > len({r.source for r in gs}): + # print("Duplicate source name found. Skipping...", name, [(r.name, r.source) for r in gs]) + # continue + # newrecords.extend(gs) + # # break + # # pass + # # print("Duplicate source name found. Skipping...", name, [r.source for r in gs]) + # # break + # + # + # for i in range(0, len(newrecords), chunk_size): + # chunk = newrecords[i:i + chunk_size] + # print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") + # st = time.time() + # + # values = [ + # { + # "name": record.name, + # "data_source_uid": record.id, + # "properties": record.to_dict(keys), + # "geometry": f"SRID=4326;POINT({record.longitude} {record.latitude})", + # "source_slug": record.source, + # } + # for record in chunk + # ] + # + # # stmt = insert(Location).values(values).on_conflict_do_nothing() + # linsert = insert(Location) + # stmt = linsert.values(values).on_conflict_do_update( + # index_elements=[Location.data_source_uid], + # set_={"properties": linsert.excluded.properties} + # ) + # + # with self._connection as conn: + # conn.execute(stmt) + # conn.commit() + # + # print('Chunk write time:', time.time() - st) # # Pre-serialize properties to reduce processing time # values = [ diff --git a/frontend/cli.py b/frontend/cli.py index ac3484b..66a9ae4 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -194,7 +194,7 @@ def cli(): CONFIG_OPTIONS = [ click.option( - "--config", + "--config-path", type=click.Path(exists=True), default=None, help="Path to config file. Default is config.yaml", @@ -245,6 +245,7 @@ def weave( no_wqp, site_limit, dry, + yes ): """ Get parameter timeseries or summary data @@ -322,10 +323,11 @@ def weave( for agency in false_agencies: setattr(config, f"use_source_{agency}", False) - lcs = locals() - if config_agencies: - for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + if config_path is None: + lcs = locals() + if config_agencies: + for agency in config_agencies: + setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) # dates config.start_date = start_date config.end_date = end_date @@ -336,9 +338,10 @@ def weave( if not dry: config.report() - # prompt user to continue - if not click.confirm("Do you want to continue?", default=True): - return + if not yes and not config.yes: + # prompt user to continue + if not click.confirm("Do you want to continue?", default=True): + return if parameter.lower() == "waterlevels": unify_waterlevels(config) @@ -352,7 +355,7 @@ def weave( @add_options(PERSISTER_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -def sites(config, +def sites(config_path, bbox, county, output_dir, no_bernco, @@ -373,11 +376,12 @@ def sites(config, Get locations """ - config = setup_config("sites", config, bbox, county, site_limit, dry) + config = setup_config("sites", config_path, bbox, county, site_limit, dry) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp", "nmose_pod"] - if config is None: + + if config_path is None: lcs = locals() for agency in config_agencies: setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) From 1778deead8c31d2d5dad6fb1c668fba0dd5edfb4 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 21 Apr 2025 10:07:45 -0600 Subject: [PATCH 114/143] fixed most_recent_time --- backend/persisters/geoserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index a06b2ba..b0702a0 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -158,7 +158,7 @@ def make_stmt(chunk): "mean": record.mean, "most_recent_value": record.most_recent_value, "most_recent_date": record.most_recent_date, - "most_recent_time": record.most_recent_time, + "most_recent_time": record.most_recent_time if record.most_recent_time else None, } for record in chunk ] @@ -171,7 +171,7 @@ def make_stmt(chunk): self._chunk_insert(make_stmt, records) - def _chunk_insert(self, make_stmt, records: list, chunk_size: int = 1000): + def _chunk_insert(self, make_stmt, records: list, chunk_size: int = 10): for i in range(0, len(records), chunk_size): chunk = records[i:i + chunk_size] print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") From d62447d19811b25dc101c1072b895fc70f813232 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 21 Apr 2025 10:14:21 -0600 Subject: [PATCH 115/143] changed most_recent to latest, added earliest --- backend/persisters/geoserver.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index b0702a0..bdfb0c8 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -72,9 +72,13 @@ class Summary(Base): max = Column(Float) mean = Column(Float) - most_recent_value = Column(Float) - most_recent_date = Column(Date) - most_recent_time = Column(Time) + latest_value = Column(Float) + latest_date = Column(Date) + latest_time = Column(Time) + + earliest_value = Column(Float) + earliest_date = Column(Date) + earliest_time = Column(Time) class Parameters(Base): @@ -156,9 +160,12 @@ def make_stmt(chunk): "min": record.min, "max": record.max, "mean": record.mean, - "most_recent_value": record.most_recent_value, - "most_recent_date": record.most_recent_date, - "most_recent_time": record.most_recent_time if record.most_recent_time else None, + "latest_value": record.latest_value, + "latest_date": record.latest_date, + "latest_time": record.latest_time if record.latest_time else None, + "earliest_value": record.earliest_value, + "earliest_date": record.earliest_date, + "earliest_time": record.earliest_time if record.earliest_time else None, } for record in chunk ] From 3c8a7512c8c3b41f6b9f7139f6f06c441d567706 Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 21 Apr 2025 10:30:18 -0600 Subject: [PATCH 116/143] fixed backend.logging->logger refactor --- backend/persister.py | 2 +- backend/unifier.py | 32 +------------------------------- 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index af41acd..d20e1ed 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -23,7 +23,7 @@ import psycopg2 from backend import OutputFormat -from backend.logging import Loggable +from backend.logger import Loggable try: diff --git a/backend/unifier.py b/backend/unifier.py index d096568..143f967 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -16,7 +16,7 @@ import shapely from backend.config import Config, get_source, OutputFormat -from backend.logging import setup_logging +from backend.logger import setup_logging from backend.constants import WATERLEVELS from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister from backend.persisters.geoserver import GeoServerPersister @@ -358,36 +358,6 @@ def waterlevel_unification_test(): unify_waterlevels(cfg) -def site_unification_test(): - cfg = Config() - # cfg.county = "chaves" - - cfg.output_summary = False - cfg.output_name = "sitesonly" - cfg.sites_only = True - # cfg.output_summary = True - # cfg.output_single_timeseries = True - - cfg.use_source_bernco = False - cfg.use_source_bor = False - cfg.use_source_cabq = False - cfg.use_source_ebid = False - cfg.use_source_nmbgmr_amp = False - cfg.use_source_nmed_dwb = False - cfg.use_source_nmose_isc_seven_rivers = False - cfg.use_source_nmose_roswell = False - cfg.use_source_nwis = False - cfg.use_source_pvacd = False - cfg.use_source_wqp = False - cfg.use_source_nmose_pod = True - - cfg.use_source_nmed_dwb = False - - cfg.finalize() - - unify_sites(cfg) - - def get_datastream(siteid): import httpx From f3552a4c4aaf015010ad98425f537f547659058f Mon Sep 17 00:00:00 2001 From: jross Date: Mon, 21 Apr 2025 16:38:34 -0600 Subject: [PATCH 117/143] added convex hull to geoserver --- backend/config.py | 2 +- backend/persisters/geoserver.py | 31 ++++++++++++++++++++++++++++++- frontend/cli.py | 11 +++++++---- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/backend/config.py b/backend/config.py index ea8fe1e..96d379d 100644 --- a/backend/config.py +++ b/backend/config.py @@ -68,7 +68,7 @@ ) from .connectors.usgs.source import NWISSiteSource, NWISWaterLevelSource from .connectors.wqp.source import WQPSiteSource, WQPAnalyteSource, WQPWaterLevelSource - +from .logger import Loggable SOURCE_DICT = { "bernco": BernCoSiteSource, diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index bdfb0c8..a6a38de 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -11,6 +11,8 @@ from itertools import groupby import psycopg2 +from shapely.geometry.multipoint import MultiPoint +from shapely.geometry.point import Point from sqlalchemy.dialects.postgresql import JSONB, insert from sqlalchemy.orm import declarative_base, sessionmaker, relationship @@ -91,6 +93,7 @@ class Sources(Base): __tablename__ = "tbl_sources" id = Column(Integer) name = Column(String, primary_key=True, index=True) + convex_hull = Column(Geometry(geometry_type="POLYGON", srid=4326)) class GeoServerPersister(BasePersister): @@ -132,6 +135,32 @@ def _write_sources(self, records: list): conn.execute(sql) conn.commit() + def _write_sources_with_convex_hull(self, records: list): + # sources = {r.source for r in records} + with self._connection as conn: + def key(r): + return str(r.source) + + records = sorted(records, key=key) + for source_name, group in groupby(records, key=key): + group = list(group) + # calculate convex hull for the source from the records + + # Create a MultiPoint object + points = MultiPoint([Point(record.longitude, record.latitude) for record in group]) + + # Calculate the convex hull + sinsert = insert(Sources) + print("Writing source", source_name, points.convex_hull) + sql = sinsert.values([{"name": source_name, + "convex_hull": points.convex_hull.wkt}]).on_conflict_do_update( + index_elements=[Sources.name], + set_={"convex_hull": sinsert.excluded.convex_hull}) + # sql = insert(Sources).values([{"name": source,} for source in sources]).on_conflict_do_nothing( + # index_elements=[Sources.name],) + conn.execute(sql) + conn.commit() + def _write_parameters(self): with self._connection as conn: sql = insert(Parameters).values([{"name": self.config.parameter, @@ -196,7 +225,7 @@ def _write_to_sites(self, records: list): Write records to a PostgreSQL database in optimized chunks. """ - self._write_sources(records) + self._write_sources_with_convex_hull(records) keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] chunk_size = 1000 # Larger chunk size for fewer commits diff --git a/frontend/cli.py b/frontend/cli.py index 2146ab6..68d8d4a 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -243,6 +243,7 @@ def weave( end_date, bbox, county, + wkt, no_bernco, no_bor, no_cabq, @@ -250,6 +251,7 @@ def weave( no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, + no_nmose_pod, no_nmose_roswell, no_nwis, no_pvacd, @@ -261,7 +263,7 @@ def weave( Get parameter timeseries or summary data """ # instantiate config and set up parameter - config = setup_config(parameter, config_path, bbox, county, site_limit, dry) + config = setup_config(parameter, config_path, bbox, county, wkt, site_limit, dry) config.parameter = parameter @@ -325,7 +327,7 @@ def weave( @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) def sites(config_path, - bbox, county, + bbox, county, wkt, output_dir, no_bernco, no_bor, @@ -334,6 +336,7 @@ def sites(config_path, no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, + no_nmose_pod, no_nmose_roswell, no_nwis, no_pvacd, @@ -345,7 +348,7 @@ def sites(config_path, Get sites """ - config = setup_config("sites", config_path, bbox, county, site_limit, dry) + config = setup_config("sites", config_path, bbox, county, wkt, site_limit, dry) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp", "nmose_pod"] @@ -403,7 +406,7 @@ def sources(sources, bbox, wkt, county): click.echo(s) -def setup_config(tag, config_path, bbox, county, site_limit, dry): +def setup_config(tag, config_path, bbox, county, wkt, site_limit, dry): config = Config(path=config_path) if county: From d25716e794c8921fc188db02587e6c326ed9715d Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 23 Apr 2025 15:07:04 -0600 Subject: [PATCH 118/143] fixes and updates after merging pre-production --- README.md | 7 ++- backend/config.py | 15 ++--- backend/persister.py | 70 ++++++++-------------- backend/unifier.py | 6 +- frontend/cli.py | 104 +++++++++++++++++++++------------ requirements.txt | 5 +- tests/test_cli/__init__.py | 22 +++---- tests/test_sources/__init__.py | 6 +- 8 files changed, 118 insertions(+), 117 deletions(-) diff --git a/README.md b/README.md index 715c1c1..0173df4 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,7 @@ A log of the inputs and processes, called `die.log`, is also saved to the output | formation | geologic formation in which the well terminates | string | N | | aquifer | aquifer from which the well draws water | string | N | | well_depth | depth of well | float | N | +| well_depth_units | units of well depth. Defaults to ft | string | N | **CABQ elevation is calculated as [elevation at top of casing] - [stickup height]; if stickup height < 0 the measuring point is assumed to be beneath the ground surface @@ -214,12 +215,12 @@ die sources {parameter} to print the sources that report that parameter to the terminal. -### Wells [In Development] +### Sites Use ``` -die wells +die sites ``` -to print wells to the terminal. +to export site information only \ No newline at end of file diff --git a/backend/config.py b/backend/config.py index 3808ee7..9c54e7c 100644 --- a/backend/config.py +++ b/backend/config.py @@ -68,6 +68,7 @@ ) from .connectors.usgs.source import NWISSiteSource, NWISWaterLevelSource from .connectors.wqp.source import WQPSiteSource, WQPAnalyteSource, WQPWaterLevelSource +from backend.logger import Loggable SOURCE_DICT = { @@ -143,17 +144,13 @@ class Config(Loggable): output_summary: bool = False output_timeseries_unified: bool = False output_timeseries_separated: bool = False - site_file_type: str = "csv" latest_water_level_only: bool = False analyte_output_units: str = MILLIGRAMS_PER_LITER waterlevel_output_units: str = FEET - # use_csv: bool = True - # use_geojson: bool = False - - output_format: OutputFormat = OutputFormat.CSV + sites_output_format: OutputFormat = OutputFormat.CSV yes: bool = True @@ -161,7 +158,6 @@ def __init__(self, model=None, payload=None, path=None): # need to initialize logger super().__init__() - self.bbox = {} if path: payload = self._load_from_yaml(path) @@ -197,7 +193,7 @@ def __init__(self, model=None, payload=None, path=None): "output_name", "dry", "latest_water_level_only", - "output_format", + "sites_output_format", "use_cloud_storage", "yes"): if attr in payload: @@ -289,7 +285,7 @@ def get_config_and_false_agencies(self): def finalize(self): self._update_output_units() - if self.output_format != OutputFormat.GEOSERVER: + if self.sites_output_format != OutputFormat.GEOSERVER: self.update_output_name() self.make_output_directory() @@ -448,11 +444,10 @@ def _report_attributes(title, attrs): "output_summary", "output_timeseries_unified", "output_timeseries_separated", - "site_file_type", "output_horizontal_datum", "output_elevation_units", "use_cloud_storage", - "output_format" + "sites_output_format" ), ) diff --git a/backend/persister.py b/backend/persister.py index 21c5781..efa76c4 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -130,6 +130,30 @@ def write_csv_file(path, func, records): with open(path, "w", newline="") as f: func(csv.writer(f), records) +def write_sites_geojson_file(path, records): + features = [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + record.get("longitude"), + record.get("latitude"), + record.get("elevation"), + ], + }, + "properties": { + k: record.get(k) + for k in record.keys + if k not in ["latitude", "longitude", "elevation"] + }, + } + for record in records + ] + feature_collection = {"type": "FeatureCollection", "features": features} + + with open(path, "w") as f: + json.dump(feature_collection, f, indent=4) def write_memory(func, records, output_format=None): f = io.BytesIO() @@ -169,12 +193,6 @@ def dump_sites(filehandle, records, output_format): ) gdf.to_file(filehandle, driver="GeoJSON") - - - - - - class CloudStoragePersister(BasePersister): extension = "csv" _content: list @@ -243,45 +261,7 @@ class GeoJSONPersister(BasePersister): extension = "geojson" def _write(self, path: str, records: list): - - features = [ - { - "type": "Feature", - "geometry": { - "type": "Point", - "coordinates": [ - record.get("longitude"), - record.get("latitude"), - record.get("elevation"), - ], - }, - "properties": { - k: record.get(k) - for k in record.keys - if k not in ["latitude", "longitude", "elevation"] - }, - } - for record in records - ] - feature_collection = {"type": "FeatureCollection", "features": features} - - with open(path, "w") as f: - json.dump(feature_collection, f, indent=4) - - def _get_gdal_type(self, dtype): - """ - Map pandas dtypes to GDAL-compatible types for the schema. - """ - if pd.api.types.is_integer_dtype(dtype): - return "int" - elif pd.api.types.is_float_dtype(dtype): - return "float" - elif pd.api.types.is_string_dtype(dtype): - return "str" - elif pd.api.types.is_datetime64_any_dtype(dtype): - return "datetime" - else: - return "str" # Default to string for unsupported types + write_sites_geojson_file(path, records) # class ST2Persister(BasePersister): diff --git a/backend/unifier.py b/backend/unifier.py index 639a823..44c49e0 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -394,14 +394,14 @@ def get_datastreams(): print(si, si.id, ds["@iot.id"]) -if __name__ == "__main__": +# if __name__ == "__main__": # test_waterlevel_unification() # root = logging.getLogger() # root.setLevel(logging.DEBUG) # shandler = logging.StreamHandler() # get_sources(Config()) - setup_logging() - site_unification_test() + # setup_logging() + # site_unification_test() # waterlevel_unification_test() # analyte_unification_test() # print(health_check("nwis")) diff --git a/frontend/cli.py b/frontend/cli.py index c621c0d..d7d3c01 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -17,6 +17,7 @@ import click +from backend import OutputFormat from backend.config import Config from backend.constants import PARAMETER_OPTIONS from backend.unifier import unify_sites, unify_waterlevels, unify_analytes @@ -178,7 +179,7 @@ def cli(): ), ] -PERSISTER_OPTIONS = [ +OUTPUT_DIR_OPTIONS = [ click.option( "--output-dir", default=".", @@ -186,15 +187,25 @@ def cli(): ) ] -SITE_OUTPUT_TYPE_OPTIONS = [ +SITES_OUTPUT_FORMATS = sorted([value for value in OutputFormat]) +SITES_OUTPUT_FORMAT_OPTIONS = [ click.option( - "--site-file-type", - type=click.Choice(["csv", "geojson"]), + "--sites_output_format", + type=click.Choice(SITES_OUTPUT_FORMATS), default="csv", - help="Output file format for sites (csv or geoson). Default is csv", + help=f"Output file format for sites: {SITES_OUTPUT_FORMATS}. Default is csv", ) ] +CONFIG_PATH_OPTIONS = [ + click.option( + "--config-path", + type=click.Path(exists=True), + default=None, + help="Path to config file. Default is config.yaml", + ), +] + def add_options(options): def _add_options(func): @@ -211,14 +222,14 @@ def _add_options(func): type=click.Choice(PARAMETER_OPTIONS, case_sensitive=False), required=True, ) -@add_options(CONFIG_OPTIONS) +@add_options(CONFIG_PATH_OPTIONS) @add_options(OUTPUT_OPTIONS) -@add_options(PERSISTER_OPTIONS) +@add_options(OUTPUT_DIR_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -@add_options(SITE_OUTPUT_TYPE_OPTIONS) +@add_options(SITES_OUTPUT_FORMAT_OPTIONS) def weave( parameter, config_path, @@ -228,6 +239,7 @@ def weave( end_date, bbox, county, + wkt, no_bernco, no_bor, no_cabq, @@ -235,18 +247,29 @@ def weave( no_nmbgmr_amp, no_nmed_dwb, no_nmose_isc_seven_rivers, + no_nmose_pod, no_nmose_roswell, no_nwis, no_pvacd, no_wqp, site_limit, dry, - yes): + yes, + sites_output_format): """ Get parameter timeseries or summary data """ # instantiate config and set up parameter - config = setup_config(parameter, config_path, bbox, county, site_limit, dry, site_file_type) + config = setup_config( + tag=parameter, + config_path=config_path, + bbox=bbox, + county=county, + wkt=wkt, + site_limit=site_limit, + dry=dry, + sites_output_format=sites_output_format + ) config.parameter = parameter @@ -296,41 +319,48 @@ def weave( if not click.confirm("Do you want to continue?", default=True): return - if parameter.lower() == "waterlevels": - unify_waterlevels(config) - else: - unify_analytes(config) + if parameter.lower() == "waterlevels": + unify_waterlevels(config) + else: + unify_analytes(config) + return config @cli.command() -@add_options(CONFIG_OPTIONS) +@add_options(CONFIG_PATH_OPTIONS) @add_options(SPATIAL_OPTIONS) -@add_options(PERSISTER_OPTIONS) +@add_options(OUTPUT_DIR_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -def sites(config_path, - bbox, county, - output_dir, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, - yes): +@add_options(SITES_OUTPUT_FORMAT_OPTIONS) +def sites( + config_path, + bbox, + county, + wkt, + output_dir, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_pod, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, + yes, + sites_output_format +): """ Get sites """ - - config = setup_config("sites", config_path, bbox, county, site_limit, dry) + config = setup_config("sites", config_path, bbox, county, wkt, site_limit, dry, sites_output_format) config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", "wqp", "nmose_pod"] @@ -388,7 +418,7 @@ def sources(sources, bbox, wkt, county): click.echo(s) -def setup_config(tag, config_path, bbox, county, site_limit, dry, site_file_type="csv"): +def setup_config(tag, config_path, bbox, county, wkt, site_limit, dry, sites_output_format=OutputFormat.CSV): config = Config(path=config_path) if county: @@ -408,7 +438,7 @@ def setup_config(tag, config_path, bbox, county, site_limit, dry, site_file_type config.site_limit = None config.dry = dry - config.site_file_type = site_file_type + config.sites_output_format = sites_output_format return config diff --git a/requirements.txt b/requirements.txt index 4e9f7c5..6aea17c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,7 @@ geopandas frost_sta_client google-cloud-storage pytest -urllib3>=2.2.0,<3.0.0 \ No newline at end of file +urllib3>=2.2.0,<3.0.0 +Geoalchemy2 +sqlalchemy +psycopg2 \ No newline at end of file diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 0903403..12c3084 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -50,7 +50,7 @@ def _test_weave( self, parameter: str, output: str, - site_output_type: str = "csv", + sites_output_format: str = "csv", site_limit: int = 4, start_date: str = "1990-08-10", end_date: str = "1990-08-11", @@ -91,20 +91,15 @@ def _test_weave( start_date, "--end-date", end_date, + "--sites_output_format", + sites_output_format ] - if site_output_type == "csv": - arguments.append("--site-file-type") - arguments.append(site_output_type) - elif site_output_type == "geojson": - arguments.append("--site-file-type") - arguments.append(site_output_type) - if geographic_filter_name and geographic_filter_value: arguments.extend([f"--{geographic_filter_name}", geographic_filter_value]) arguments.extend(no_agencies) - + # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) @@ -176,10 +171,7 @@ def _test_weave( assert getattr(config, _geographic_filter_name) == "" # 9 - if site_output_type == "csv": - assert getattr(config, "site_file_type") == "csv" - elif site_output_type == "geojson": - assert getattr(config, "site_file_type") == "geojson" + assert getattr(config, "sites_output_format") == sites_output_format def test_weave_summary(self): self._test_weave(parameter=WATERLEVELS, output="summary") @@ -192,12 +184,12 @@ def test_weave_timeseries_separated(self): def test_weave_csv(self): self._test_weave( - parameter=WATERLEVELS, output="summary", site_output_type="csv" + parameter=WATERLEVELS, output="summary", sites_output_format="csv" ) def test_weave_geojson(self): self._test_weave( - parameter=WATERLEVELS, output="summary", site_output_type="geojson" + parameter=WATERLEVELS, output="summary", sites_output_format="geojson" ) def test_weave_bbox(self): diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index 78886bc..5e33ec0 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -162,7 +162,7 @@ def test_summary_csv(self): def test_summary_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_summary = True - self.config.site_file_type = "geojson" + self.config.sites_output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ @@ -191,7 +191,7 @@ def test_timeseries_unified_csv(self): def test_timeseries_unified_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_unified = True - self.config.site_file_type = "geojson" + self.config.sites_output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ @@ -228,7 +228,7 @@ def test_timeseries_separated_csv(self): def test_timeseries_separated_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_separated = True - self.config.site_file_type = "geojson" + self.config.sites_output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ From 8ce5bb97ed79352fb42f8a4000d7a625bcfd4d5a Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Wed, 23 Apr 2025 21:08:19 +0000 Subject: [PATCH 119/143] Formatting changes --- backend/__init__.py | 2 +- backend/config.py | 38 +++++----- backend/connectors/nmose/source.py | 8 ++- backend/connectors/nmose/transformer.py | 2 +- backend/persister.py | 50 +++++++------ backend/persisters/geoserver.py | 95 +++++++++++++++++------- backend/unifier.py | 22 +++--- frontend/cli.py | 96 +++++++++++++++---------- tests/test_cli/__init__.py | 4 +- 9 files changed, 201 insertions(+), 116 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index 2034a38..06a630a 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -4,4 +4,4 @@ class OutputFormat(str, Enum): GEOJSON = "geojson" CSV = "csv" - GEOSERVER = "geoserver" \ No newline at end of file + GEOSERVER = "geoserver" diff --git a/backend/config.py b/backend/config.py index 9c54e7c..f6d8642 100644 --- a/backend/config.py +++ b/backend/config.py @@ -99,9 +99,6 @@ def get_source(source): return klass() - - - class Config(Loggable): site_limit: int = 0 dry: bool = False @@ -183,24 +180,29 @@ def __init__(self, model=None, payload=None, path=None): if value is not None: setattr(self, f"use_source_{sk}", value) - for attr in ("wkt", "county", "bbox", - "output_summary", - "output_timeseries_unified", - "output_timeseries_separated", - "start_date", - "end_date", - "parameter", - "output_name", - "dry", - "latest_water_level_only", - "sites_output_format", - "use_cloud_storage", - "yes"): + for attr in ( + "wkt", + "county", + "bbox", + "output_summary", + "output_timeseries_unified", + "output_timeseries_separated", + "start_date", + "end_date", + "parameter", + "output_name", + "dry", + "latest_water_level_only", + "sites_output_format", + "use_cloud_storage", + "yes", + ): if attr in payload: setattr(self, attr, payload[attr]) def _load_from_yaml(self, path): import yaml + path = os.path.abspath(path) if os.path.exists(path): self.log(f"Loading config from {path}") @@ -447,7 +449,7 @@ def _report_attributes(title, attrs): "output_horizontal_datum", "output_elevation_units", "use_cloud_storage", - "sites_output_format" + "sites_output_format", ), ) @@ -570,4 +572,6 @@ def output_path(self): def get(self, attr): if self._payload: return self._payload.get(attr) + + # ============= EOF ============================================= diff --git a/backend/connectors/nmose/source.py b/backend/connectors/nmose/source.py index ad180ce..5def1bf 100644 --- a/backend/connectors/nmose/source.py +++ b/backend/connectors/nmose/source.py @@ -41,9 +41,11 @@ def get_records(self, *args, **kw) -> List[Dict]: "https://services2.arcgis.com/qXZbWTdPDbTjl7Dy/arcgis/rest/services/OSE_PODs/FeatureServer/0/query" ) - params['where'] = "pod_status = 'ACT' AND pod_basin NOT IN ('SP', 'SD', 'LWD')" - params["outFields"] = ("OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county" - "pod_name,pod_nbr,pod_suffix,pod_file,depth_well,aquifer,elevation") + params["where"] = "pod_status = 'ACT' AND pod_basin NOT IN ('SP', 'SD', 'LWD')" + params["outFields"] = ( + "OBJECTID,pod_basin,pod_status,easting,northing,datum,utm_accura,status,county" + "pod_name,pod_nbr,pod_suffix,pod_file,depth_well,aquifer,elevation" + ) params["outSR"] = 4326 params["f"] = "json" diff --git a/backend/connectors/nmose/transformer.py b/backend/connectors/nmose/transformer.py index e3a2b64..8f26ebb 100644 --- a/backend/connectors/nmose/transformer.py +++ b/backend/connectors/nmose/transformer.py @@ -24,7 +24,7 @@ def _transform(self, record) -> dict: # "name": record["station_nm"], "latitude": geometry["y"], "longitude": geometry["x"], - "elevation": properties['elevation'], + "elevation": properties["elevation"], "elevation_units": "ft", # "horizontal_datum": datum, # "vertical_datum": record["alt_datum_cd"], diff --git a/backend/persister.py b/backend/persister.py index efa76c4..327202c 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -130,31 +130,33 @@ def write_csv_file(path, func, records): with open(path, "w", newline="") as f: func(csv.writer(f), records) + def write_sites_geojson_file(path, records): features = [ - { - "type": "Feature", - "geometry": { - "type": "Point", - "coordinates": [ - record.get("longitude"), - record.get("latitude"), - record.get("elevation"), - ], - }, - "properties": { - k: record.get(k) - for k in record.keys - if k not in ["latitude", "longitude", "elevation"] - }, - } - for record in records - ] + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + record.get("longitude"), + record.get("latitude"), + record.get("elevation"), + ], + }, + "properties": { + k: record.get(k) + for k in record.keys + if k not in ["latitude", "longitude", "elevation"] + }, + } + for record in records + ] feature_collection = {"type": "FeatureCollection", "features": features} with open(path, "w") as f: json.dump(feature_collection, f, indent=4) + def write_memory(func, records, output_format=None): f = io.BytesIO() func(f, records, output_format) @@ -193,6 +195,7 @@ def dump_sites(filehandle, records, output_format): ) gdf.to_file(filehandle, driver="GeoJSON") + class CloudStoragePersister(BasePersister): extension = "csv" _content: list @@ -224,12 +227,19 @@ def finalize(self, output_name: str): else: path, cnt = self._content[0] - #this is a hack. need a better way to specify the output path + # this is a hack. need a better way to specify the output path dirname = os.path.basename(os.path.dirname(path)) path = os.path.join(dirname, os.path.basename(path)) blob = bucket.blob(path) - blob.upload_from_string(cnt, content_type="application/json" if self.config.output_format == OutputFormat.GEOJSON else "text/csv") + blob.upload_from_string( + cnt, + content_type=( + "application/json" + if self.config.output_format == OutputFormat.GEOJSON + else "text/csv" + ), + ) def _make_output_directory(self, output_directory: str): # prevent making root directory, because we are not saving to disk diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index bdfb0c8..9fc261d 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -16,10 +16,22 @@ from backend.persister import BasePersister -from sqlalchemy import Column, ForeignKey, create_engine, UUID, String, Integer, Float, Date, Time +from sqlalchemy import ( + Column, + ForeignKey, + create_engine, + UUID, + String, + Integer, + Float, + Date, + Time, +) from geoalchemy2 import Geometry Base = declarative_base() + + # dbname=db.get('dbname'), # user=db.get('user'), # password=db.get('password'), @@ -101,8 +113,8 @@ def __init__(self, *args, **kwargs): def dump_sites(self, path: str): if self.sites: - db = self.config.get('geoserver').get('db') - dbname = db.get('db_name') + db = self.config.get("geoserver").get("db") + dbname = db.get("db_name") self.log(f"dumping sites to {dbname}") self._write_to_sites(self.sites) else: @@ -110,8 +122,8 @@ def dump_sites(self, path: str): def dump_summary(self, path: str): if self.records: - db = self.config.get('geoserver').get('db') - dbname = db.get('db_name') + db = self.config.get("geoserver").get("db") + dbname = db.get("db_name") self.log(f"dumping summary to {dbname}") self._write_to_summary(self.records) else: @@ -121,22 +133,38 @@ def _connect(self): """ Connect to a PostgreSQL database on Cloud SQL. """ - sf = session_factory(self.config.get('geoserver').get('db')) + sf = session_factory(self.config.get("geoserver").get("db")) self._connection = sf() def _write_sources(self, records: list): sources = {r.source for r in records} with self._connection as conn: - sql = insert(Sources).values([{"name": source} for source in sources]).on_conflict_do_nothing( - index_elements=[Sources.name],) + sql = ( + insert(Sources) + .values([{"name": source} for source in sources]) + .on_conflict_do_nothing( + index_elements=[Sources.name], + ) + ) conn.execute(sql) conn.commit() def _write_parameters(self): with self._connection as conn: - sql = insert(Parameters).values([{"name": self.config.parameter, - "units": self.config.analyte_output_units}]).on_conflict_do_nothing( - index_elements=[Parameters.name],) + sql = ( + insert(Parameters) + .values( + [ + { + "name": self.config.parameter, + "units": self.config.analyte_output_units, + } + ] + ) + .on_conflict_do_nothing( + index_elements=[Parameters.name], + ) + ) print(sql) conn.execute(sql) conn.commit() @@ -146,7 +174,14 @@ def _write_to_summary(self, records: list): self._write_parameters() for r in records: print(r, [r.to_dict()]) - keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + keys = [ + "usgs_site_id", + "alternate_site_id", + "formation", + "aquifer", + "well_depth", + ] + def make_stmt(chunk): values = [ { @@ -165,7 +200,9 @@ def make_stmt(chunk): "latest_time": record.latest_time if record.latest_time else None, "earliest_value": record.earliest_value, "earliest_date": record.earliest_date, - "earliest_time": record.earliest_time if record.earliest_time else None, + "earliest_time": ( + record.earliest_time if record.earliest_time else None + ), } for record in chunk ] @@ -173,15 +210,17 @@ def make_stmt(chunk): linsert = insert(Summary) return linsert.values(values).on_conflict_do_update( index_elements=[Summary.data_source_uid], - set_={"properties": linsert.excluded.properties} + set_={"properties": linsert.excluded.properties}, ) self._chunk_insert(make_stmt, records) def _chunk_insert(self, make_stmt, records: list, chunk_size: int = 10): for i in range(0, len(records), chunk_size): - chunk = records[i:i + chunk_size] - print(f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}") + chunk = records[i : i + chunk_size] + print( + f"Writing chunk {i // chunk_size + 1} of {len(records) // chunk_size + 1}" + ) st = time.time() stmt = make_stmt(chunk) @@ -189,7 +228,7 @@ def _chunk_insert(self, make_stmt, records: list, chunk_size: int = 10): conn.execute(stmt) conn.commit() - print('Chunk write time:', time.time() - st) + print("Chunk write time:", time.time() - st) def _write_to_sites(self, records: list): """ @@ -198,7 +237,13 @@ def _write_to_sites(self, records: list): self._write_sources(records) - keys = ["usgs_site_id", "alternate_site_id", "formation", "aquifer", "well_depth"] + keys = [ + "usgs_site_id", + "alternate_site_id", + "formation", + "aquifer", + "well_depth", + ] chunk_size = 1000 # Larger chunk size for fewer commits def make_stmt(chunk): @@ -215,7 +260,7 @@ def make_stmt(chunk): linsert = insert(Location) stmt = linsert.values(values).on_conflict_do_update( index_elements=[Location.data_source_uid], - set_={"properties": linsert.excluded.properties} + set_={"properties": linsert.excluded.properties}, ) return stmt @@ -268,11 +313,11 @@ def make_stmt(chunk): # # print('Chunk write time:', time.time() - st) - # # Pre-serialize properties to reduce processing time - # values = [ - # (record.name, json.dumps(record.to_dict(keys)), record.longitude, record.latitude, record.source) - # for record in chunk - # ] + # # Pre-serialize properties to reduce processing time + # values = [ + # (record.name, json.dumps(record.to_dict(keys)), record.longitude, record.latitude, record.source) + # for record in chunk + # ] # # with self._connection.cursor() as cursor: # sql = """INSERT INTO public.tbl_location (name, properties, geometry, source_slug) @@ -283,4 +328,6 @@ def make_stmt(chunk): # self._connection.commit() # Commit once per chunk # print('Chunk write time:', time.time() - st) # break + + # ============= EOF ============================================= diff --git a/backend/unifier.py b/backend/unifier.py index 44c49e0..5a1d6ae 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -395,16 +395,16 @@ def get_datastreams(): # if __name__ == "__main__": - # test_waterlevel_unification() - # root = logging.getLogger() - # root.setLevel(logging.DEBUG) - # shandler = logging.StreamHandler() - # get_sources(Config()) - # setup_logging() - # site_unification_test() - # waterlevel_unification_test() - # analyte_unification_test() - # print(health_check("nwis")) - # generate_site_bounds() +# test_waterlevel_unification() +# root = logging.getLogger() +# root.setLevel(logging.DEBUG) +# shandler = logging.StreamHandler() +# get_sources(Config()) +# setup_logging() +# site_unification_test() +# waterlevel_unification_test() +# analyte_unification_test() +# print(health_check("nwis")) +# generate_site_bounds() # ============= EOF ============================================= diff --git a/frontend/cli.py b/frontend/cli.py index d7d3c01..e8ac8c2 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -231,31 +231,32 @@ def _add_options(func): @add_options(DEBUG_OPTIONS) @add_options(SITES_OUTPUT_FORMAT_OPTIONS) def weave( - parameter, - config_path, - output, - output_dir, - start_date, - end_date, - bbox, - county, - wkt, - no_bernco, - no_bor, - no_cabq, - no_ebid, - no_nmbgmr_amp, - no_nmed_dwb, - no_nmose_isc_seven_rivers, - no_nmose_pod, - no_nmose_roswell, - no_nwis, - no_pvacd, - no_wqp, - site_limit, - dry, - yes, - sites_output_format): + parameter, + config_path, + output, + output_dir, + start_date, + end_date, + bbox, + county, + wkt, + no_bernco, + no_bor, + no_cabq, + no_ebid, + no_nmbgmr_amp, + no_nmed_dwb, + no_nmose_isc_seven_rivers, + no_nmose_pod, + no_nmose_roswell, + no_nwis, + no_pvacd, + no_wqp, + site_limit, + dry, + yes, + sites_output_format, +): """ Get parameter timeseries or summary data """ @@ -268,9 +269,9 @@ def weave( wkt=wkt, site_limit=site_limit, dry=dry, - sites_output_format=sites_output_format - ) - + sites_output_format=sites_output_format, + ) + config.parameter = parameter # output type @@ -303,7 +304,7 @@ def weave( lcs = locals() if config_agencies: for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + setattr(config, f"use_source_{agency}", lcs.get(f"no_{agency}", False)) # dates config.start_date = start_date config.end_date = end_date @@ -325,7 +326,6 @@ def weave( unify_analytes(config) return config - @cli.command() @add_options(CONFIG_PATH_OPTIONS) @@ -355,20 +355,33 @@ def sites( site_limit, dry, yes, - sites_output_format + sites_output_format, ): """ Get sites """ - config = setup_config("sites", config_path, bbox, county, wkt, site_limit, dry, sites_output_format) - config_agencies = ["bernco", "bor", "cabq", "ebid", "nmbgmr_amp", "nmed_dwb", - "nmose_isc_seven_rivers", "nmose_roswell", "nwis", "pvacd", - "wqp", "nmose_pod"] + config = setup_config( + "sites", config_path, bbox, county, wkt, site_limit, dry, sites_output_format + ) + config_agencies = [ + "bernco", + "bor", + "cabq", + "ebid", + "nmbgmr_amp", + "nmed_dwb", + "nmose_isc_seven_rivers", + "nmose_roswell", + "nwis", + "pvacd", + "wqp", + "nmose_pod", + ] if config_path is None: lcs = locals() for agency in config_agencies: - setattr(config, f"use_source_{agency}", lcs.get(f'no_{agency}', False)) + setattr(config, f"use_source_{agency}", lcs.get(f"no_{agency}", False)) config.output_dir = output_dir config.sites_only = True @@ -418,7 +431,16 @@ def sources(sources, bbox, wkt, county): click.echo(s) -def setup_config(tag, config_path, bbox, county, wkt, site_limit, dry, sites_output_format=OutputFormat.CSV): +def setup_config( + tag, + config_path, + bbox, + county, + wkt, + site_limit, + dry, + sites_output_format=OutputFormat.CSV, +): config = Config(path=config_path) if county: diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 12c3084..1cb0ba6 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -92,14 +92,14 @@ def _test_weave( "--end-date", end_date, "--sites_output_format", - sites_output_format + sites_output_format, ] if geographic_filter_name and geographic_filter_value: arguments.extend([f"--{geographic_filter_name}", geographic_filter_value]) arguments.extend(no_agencies) - + # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) From cf2c0b3d3b5b0d14dbc20d404c79b1921d61670a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 10:46:00 -0600 Subject: [PATCH 120/143] integration and unification of dev branches --- backend/config.py | 8 +- backend/persister.py | 190 +++++++++++++-------------------- backend/unifier.py | 122 ++++++++++----------- frontend/cli.py | 42 ++++---- tests/test_cli/__init__.py | 71 ++++++------ tests/test_sources/__init__.py | 8 +- 6 files changed, 197 insertions(+), 244 deletions(-) diff --git a/backend/config.py b/backend/config.py index f6d8642..6dd2e0e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -147,7 +147,7 @@ class Config(Loggable): analyte_output_units: str = MILLIGRAMS_PER_LITER waterlevel_output_units: str = FEET - sites_output_format: OutputFormat = OutputFormat.CSV + output_format: str = OutputFormat.CSV yes: bool = True @@ -193,7 +193,7 @@ def __init__(self, model=None, payload=None, path=None): "output_name", "dry", "latest_water_level_only", - "sites_output_format", + "output_format", "use_cloud_storage", "yes", ): @@ -287,7 +287,7 @@ def get_config_and_false_agencies(self): def finalize(self): self._update_output_units() - if self.sites_output_format != OutputFormat.GEOSERVER: + if self.output_format != OutputFormat.GEOSERVER: self.update_output_name() self.make_output_directory() @@ -449,7 +449,7 @@ def _report_attributes(title, attrs): "output_horizontal_datum", "output_elevation_units", "use_cloud_storage", - "sites_output_format", + "output_format", ), ) diff --git a/backend/persister.py b/backend/persister.py index 327202c..b985e7c 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -32,7 +32,65 @@ try: from google.cloud import storage except ImportError: - print("google cloud storage not available") + print("google cloud storage not available") + + +def write_memory(func, records, output_format=None): + f = io.BytesIO() + func(f, records, output_format) + return f.getvalue() + + +def dump_timeseries(path, timeseries: list[list]): + """ + Dumps timeseries records to a CSV file. The timeseries must be a list of + lists, where each inner list contains the records for a single site. In the case + of timeseries separated, the inner list will contain the records for a single site + and this function will be called multiple times, once for each site. + """ + with open(path, "w", newline="") as f: + writer = csv.writer(f) + headers_have_not_been_written = True + for i, records in enumerate(timeseries): + for record in records: + if i == 0 and headers_have_not_been_written: + writer.writerow(record.keys) + headers_have_not_been_written = False + writer.writerow(record.to_row()) + + +def dump_sites_summary(path, records, output_format: OutputFormat): + if output_format == OutputFormat.CSV: + with open(path, "w", newline="") as f: + writer = csv.writer(f) + for i, site in enumerate(records): + if i == 0: + writer.writerow(site.keys) + writer.writerow(site.to_row()) + else: + features = [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [ + getattr(record, "longitude"), + getattr(record, "latitude"), + getattr(record, "elevation"), + ], + }, + "properties": { + k: getattr(record, k) + for k in record.keys + if k not in ["latitude", "longitude", "elevation"] + }, + } + for record in records + ] + feature_collection = {"type": "FeatureCollection", "features": features} + + with open(path, "w") as f: + json.dump(feature_collection, f, indent=4) class BasePersister(Loggable): @@ -40,9 +98,6 @@ class BasePersister(Loggable): Class to persist the data to a file or cloud storage. If persisting to a file, the output directory is created by config._make_output_path() """ - - extension: str = "csv" - def __init__(self, config=None): self.records = [] self.timeseries = [] @@ -61,25 +116,25 @@ def finalize(self, output_name: str): def dump_sites(self, path: str): if self.sites: path = os.path.join(path, "sites") - path = self.add_extension(path) + path = self.add_extension(path, self.config.output_format) self.log(f"dumping sites to {os.path.abspath(path)}") - self._write(path, self.sites) + self._dump_sites_summary(path, self.sites, self.config.output_format) else: self.log("no sites to dump", fg="red") def dump_summary(self, path: str): if self.records: path = os.path.join(path, "summary") - path = self.add_extension(path) + path = self.add_extension(path, self.config.output_format) self.log(f"dumping summary to {os.path.abspath(path)}") - self._write(path, self.records) + self._dump_sites_summary(path, self.records, self.config.output_format) else: self.log("no records to dump", fg="red") def dump_timeseries_unified(self, path: str): if self.timeseries: path = os.path.join(path, "timeseries_unified") - path = self.add_extension(path) + path = self.add_extension(path, OutputFormat.CSV) self.log(f"dumping unified timeseries to {os.path.abspath(path)}") self._dump_timeseries(path, self.timeseries) else: @@ -94,7 +149,7 @@ def dump_timeseries_separated(self, path: str): for records in self.timeseries: site_id = records[0].id path = os.path.join(timeseries_path, str(site_id).replace(" ", "_")) - path = self.add_extension(path) + path = self.add_extension(path, OutputFormat.CSV) self.log(f"dumping {site_id} to {os.path.abspath(path)}") list_of_records = [records] @@ -102,100 +157,26 @@ def dump_timeseries_separated(self, path: str): else: self.log("no timeseries records to dump", fg="red") - def add_extension(self, path: str): - if not self.extension: + def add_extension(self, path: str, extension: OutputFormat): + if not extension: raise NotImplementedError - - ext = self.extension - if self.config.output_format == OutputFormat.CSV: - ext = "csv" - elif self.config.output_format == OutputFormat.GEOJSON: - ext = "geojson" + else: + ext = extension if not path.endswith(ext): path = f"{path}.{ext}" return path - def _write(self, path: str, records): - raise NotImplementedError + def _dump_sites_summary(self, path: str, records: list, output_format: OutputFormat): + dump_sites_summary(path, records, output_format) def _dump_timeseries(self, path: str, timeseries: list): - raise NotImplementedError + dump_timeseries(path, timeseries) def _make_output_directory(self, output_directory: str): os.mkdir(output_directory) -def write_csv_file(path, func, records): - with open(path, "w", newline="") as f: - func(csv.writer(f), records) - - -def write_sites_geojson_file(path, records): - features = [ - { - "type": "Feature", - "geometry": { - "type": "Point", - "coordinates": [ - record.get("longitude"), - record.get("latitude"), - record.get("elevation"), - ], - }, - "properties": { - k: record.get(k) - for k in record.keys - if k not in ["latitude", "longitude", "elevation"] - }, - } - for record in records - ] - feature_collection = {"type": "FeatureCollection", "features": features} - - with open(path, "w") as f: - json.dump(feature_collection, f, indent=4) - - -def write_memory(func, records, output_format=None): - f = io.BytesIO() - func(f, records, output_format) - return f.getvalue() - - -def dump_timeseries(writer, timeseries: list[list]): - """ - Dumps timeseries records to a CSV file. The timeseries must be a list of - lists, where each inner list contains the records for a single site. In the case - of timeseries separated, the inner list will contain the records for a single site - and this function will be called multiple times, once for each site. - """ - headers_have_not_been_written = True - for i, records in enumerate(timeseries): - for record in records: - if i == 0 and headers_have_not_been_written: - writer.writerow(record.keys) - headers_have_not_been_written = False - writer.writerow(record.to_row()) - - -def dump_sites(filehandle, records, output_format): - if output_format == OutputFormat.CSV: - writer = csv.writer(filehandle) - for i, site in enumerate(records): - if i == 0: - writer.writerow(site.keys) - writer.writerow(site.to_row()) - else: - r0 = records[0] - df = pd.DataFrame([r.to_row() for r in records], columns=r0.keys) - - gdf = gpd.GeoDataFrame( - df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326" - ) - gdf.to_file(filehandle, driver="GeoJSON") - - class CloudStoragePersister(BasePersister): extension = "csv" _content: list @@ -245,35 +226,18 @@ def _make_output_directory(self, output_directory: str): # prevent making root directory, because we are not saving to disk pass - def _write(self, path: str, records: list): - content = write_memory(dump_sites, records, self.config.output_format) - self._add_content(path, content) - def _add_content(self, path: str, content: str): self._content.append((path, content)) + def _dump_sites_summary(self, path: str, records: list): + content = write_memory(dump_sites_summary, records, self.config.output_format) + self._add_content(path, content) + + def _dump_timeseries_unified(self, path: str, timeseries: list): content = write_memory(path, dump_timeseries, timeseries) self._add_content(path, content) - -class CSVPersister(BasePersister): - extension = "csv" - - def _write(self, path: str, records: list): - write_csv_file(path, dump_sites, records) - - def _dump_timeseries(self, path: str, timeseries: list): - write_csv_file(path, dump_timeseries, timeseries) - - -class GeoJSONPersister(BasePersister): - extension = "geojson" - - def _write(self, path: str, records: list): - write_sites_geojson_file(path, records) - - # class ST2Persister(BasePersister): # extension = "st2" # diff --git a/backend/unifier.py b/backend/unifier.py index 5a1d6ae..15eef07 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -18,7 +18,7 @@ from backend.config import Config, get_source, OutputFormat from backend.logger import setup_logging from backend.constants import WATERLEVELS -from backend.persister import CSVPersister, GeoJSONPersister, CloudStoragePersister +from backend.persister import BasePersister from backend.persisters.geoserver import GeoServerPersister from backend.source import BaseSiteSource @@ -79,36 +79,36 @@ def unify_sites(config): return True -def _perister_factory(config): - """ - Determines the type of persister to use based on the configuration. The - persister types are: +# def _perister_factory(config): +# """ +# Determines the type of persister to use based on the configuration. The +# persister types are: - - CSVPersister - - CloudStoragePersister - - GeoJSONPersister +# - CSVPersister +# - CloudStoragePersister +# - GeoJSONPersister - Parameters - ------- - config: Config - The configuration object +# Parameters +# ------- +# config: Config +# The configuration object - Returns - ------- - Persister - The persister object to use - """ - persister_klass = CSVPersister - if config.use_cloud_storage: - persister_klass = CloudStoragePersister - elif config.output_format == OutputFormat.CSV: - persister_klass = CSVPersister - elif config.output_format == OutputFormat.GEOJSON: - persister_klass = GeoJSONPersister - elif config.output_format == OutputFormat.GEOSERVER: - persister_klass = GeoServerPersister +# Returns +# ------- +# Persister +# The persister object to use +# """ +# persister_klass = CSVPersister +# if config.use_cloud_storage: +# persister_klass = CloudStoragePersister +# elif config.output_format == OutputFormat.CSV: +# persister_klass = CSVPersister +# elif config.output_format == OutputFormat.GEOJSON: +# persister_klass = GeoJSONPersister +# elif config.output_format == OutputFormat.GEOSERVER: +# persister_klass = GeoServerPersister - return persister_klass(config) +# return persister_klass(config) # def _unify_wrapper(config, func): @@ -118,7 +118,7 @@ def _perister_factory(config): def _site_wrapper( - site_source, parameter_source, sites_summary_persister, timeseries_persister, config + site_source, parameter_source, persister, config ): try: @@ -147,7 +147,7 @@ def _site_wrapper( first_flag = True if config.sites_only: - sites_summary_persister.sites.extend(sites) + persister.sites.extend(sites) else: for site_records in site_source.chunks(sites): if type(site_records) == list: @@ -164,7 +164,7 @@ def _site_wrapper( site_records, use_summarize, start_ind, end_ind ) if summary_records: - sites_summary_persister.records.extend(summary_records) + persister.records.extend(summary_records) sites_with_records_count += len(summary_records) else: continue @@ -181,49 +181,35 @@ def _site_wrapper( sites_with_records_count += len(results) for site, records in results: - timeseries_persister.timeseries.append(records) - sites_summary_persister.sites.append(site) + persister.timeseries.append(records) + persister.sites.append(site) if site_limit: - # print( - # "sites_with_records_count:", - # sites_with_records_count, - # "|", - # "site_limit:", - # site_limit, - # "|", - # "chunk_size:", - # site_source.chunk_size, - # ) - if sites_with_records_count >= site_limit: # remove any extra sites that were gathered. removes 0 if site_limit is not exceeded num_sites_to_remove = sites_with_records_count - site_limit - # print( - # f"removing {num_sites_to_remove} to avoid exceeding the site limit" - # ) # if sites_with_records_count == sit_limit then num_sites_to_remove = 0 # and calling list[:0] will retur an empty list, so subtract # num_sites_to_remove from the length of the list # to remove the last num_sites_to_remove sites if use_summarize: - sites_summary_persister.records = ( - sites_summary_persister.records[ - : len(sites_summary_persister.records) + persister.records = ( + persister.records[ + : len(persister.records) - num_sites_to_remove ] ) else: - timeseries_persister.timeseries = ( - timeseries_persister.timeseries[ - : len(timeseries_persister.timeseries) + persister.timeseries = ( + persister.timeseries[ + : len(persister.timeseries) - num_sites_to_remove ] ) - sites_summary_persister.sites = ( - sites_summary_persister.sites[ - : len(sites_summary_persister.sites) + persister.sites = ( + persister.sites[ + : len(persister.sites) - num_sites_to_remove ] ) @@ -241,30 +227,32 @@ def _unify_parameter( config, sources, ): - sites_summary_persister = _perister_factory(config) - timeseries_persister = CSVPersister() + + if config.output_format == OutputFormat.GEOSERVER: + persister = GeoServerPersister(config) + else: + persister = BasePersister(config) + for site_source, parameter_source in sources: _site_wrapper( site_source, parameter_source, - sites_summary_persister, - timeseries_persister, + persister, config, ) if config.output_summary: - sites_summary_persister.dump_summary(config.output_path) + persister.dump_summary(config.output_path) elif config.output_timeseries_unified: - timeseries_persister.dump_timeseries_unified(config.output_path) - sites_summary_persister.dump_sites(config.output_path) + persister.dump_timeseries_unified(config.output_path) + persister.dump_sites(config.output_path) elif config.sites_only: - sites_summary_persister.dump_sites(config.output_path) + persister.dump_sites(config.output_path) else: # config.output_timeseries_separated - timeseries_persister.dump_timeseries_separated(config.output_path) - sites_summary_persister.dump_sites(config.output_path) + persister.dump_timeseries_separated(config.output_path) + persister.dump_sites(config.output_path) - timeseries_persister.finalize(config.output_name) - sites_summary_persister.finalize(config.output_name) + persister.finalize(config.output_name) def get_sources_in_polygon(polygon): diff --git a/frontend/cli.py b/frontend/cli.py index e8ac8c2..879e5d3 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -170,9 +170,9 @@ def cli(): help="End date in the form 'YYYY', 'YYYY-MM', 'YYYY-MM-DD', 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'", ), ] -OUTPUT_OPTIONS = [ +OUTPUT_TYPE_OPTIONS = [ click.option( - "--output", + "--output-type", type=click.Choice(["summary", "timeseries_unified", "timeseries_separated"]), required=True, help="Output summary file, single unified timeseries file, or separated timeseries files", @@ -187,13 +187,13 @@ def cli(): ) ] -SITES_OUTPUT_FORMATS = sorted([value for value in OutputFormat]) -SITES_OUTPUT_FORMAT_OPTIONS = [ +OUTPUT_FORMATS = sorted([value for value in OutputFormat]) +OUTPUT_FORMAT_OPTIONS = [ click.option( - "--sites_output_format", - type=click.Choice(SITES_OUTPUT_FORMATS), + "--output-format", + type=click.Choice(OUTPUT_FORMATS), default="csv", - help=f"Output file format for sites: {SITES_OUTPUT_FORMATS}. Default is csv", + help=f"Output file format for sites: {OUTPUT_FORMATS}. Default is csv", ) ] @@ -223,17 +223,17 @@ def _add_options(func): required=True, ) @add_options(CONFIG_PATH_OPTIONS) -@add_options(OUTPUT_OPTIONS) +@add_options(OUTPUT_TYPE_OPTIONS) @add_options(OUTPUT_DIR_OPTIONS) @add_options(DT_OPTIONS) @add_options(SPATIAL_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -@add_options(SITES_OUTPUT_FORMAT_OPTIONS) +@add_options(OUTPUT_FORMAT_OPTIONS) def weave( parameter, config_path, - output, + output_type, output_dir, start_date, end_date, @@ -255,7 +255,7 @@ def weave( site_limit, dry, yes, - sites_output_format, + output_format, ): """ Get parameter timeseries or summary data @@ -269,26 +269,26 @@ def weave( wkt=wkt, site_limit=site_limit, dry=dry, - sites_output_format=sites_output_format, + output_format=output_format, ) config.parameter = parameter # output type - if output == "summary": + if output_type == "summary": summary = True timeseries_unified = False timeseries_separated = False - elif output == "timeseries_unified": + elif output_type == "timeseries_unified": summary = False timeseries_unified = True timeseries_separated = False - elif output == "timeseries_separated": + elif output_type == "timeseries_separated": summary = False timeseries_unified = False timeseries_separated = True else: - click.echo(f"Invalid output type: {output}") + click.echo(f"Invalid output type: {output_type}") return config.output_summary = summary @@ -333,7 +333,7 @@ def weave( @add_options(OUTPUT_DIR_OPTIONS) @add_options(ALL_SOURCE_OPTIONS) @add_options(DEBUG_OPTIONS) -@add_options(SITES_OUTPUT_FORMAT_OPTIONS) +@add_options(OUTPUT_FORMAT_OPTIONS) def sites( config_path, bbox, @@ -355,13 +355,13 @@ def sites( site_limit, dry, yes, - sites_output_format, + output_format, ): """ Get sites """ config = setup_config( - "sites", config_path, bbox, county, wkt, site_limit, dry, sites_output_format + "sites", config_path, bbox, county, wkt, site_limit, dry, output_format ) config_agencies = [ "bernco", @@ -439,7 +439,7 @@ def setup_config( wkt, site_limit, dry, - sites_output_format=OutputFormat.CSV, + output_format=OutputFormat.CSV, ): config = Config(path=config_path) @@ -460,7 +460,7 @@ def setup_config( config.site_limit = None config.dry = dry - config.sites_output_format = sites_output_format + config.output_format = output_format.value return config diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 1cb0ba6..b6e3f6a 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -49,8 +49,8 @@ def setup(self): def _test_weave( self, parameter: str, - output: str, - sites_output_format: str = "csv", + output_type: str, + output_format: str = "csv", site_limit: int = 4, start_date: str = "1990-08-10", end_date: str = "1990-08-11", @@ -82,8 +82,8 @@ def _test_weave( arguments = [ parameter, - "--output", - output, + "--output-type", + output_type, "--dry", "--site-limit", str(site_limit), @@ -91,8 +91,8 @@ def _test_weave( start_date, "--end-date", end_date, - "--sites_output_format", - sites_output_format, + "--output-format", + output_format, ] if geographic_filter_name and geographic_filter_value: @@ -102,6 +102,7 @@ def _test_weave( # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) + print(result) # Assert assert result.exit_code == 0 @@ -141,11 +142,11 @@ def _test_weave( # 3 output_types = ["summary", "timeseries_unified", "timeseries_separated"] - for output_type in output_types: - if output_type == output: - assert getattr(config, f"output_{output_type}") is True + for ot in output_types: + if ot == output_type: + assert getattr(config, f"output_{ot}") is True else: - assert getattr(config, f"output_{output_type}") is False + assert getattr(config, f"output_{ot}") is False # 4 assert getattr(config, "site_limit") == 4 @@ -171,86 +172,86 @@ def _test_weave( assert getattr(config, _geographic_filter_name) == "" # 9 - assert getattr(config, "sites_output_format") == sites_output_format + assert getattr(config, "output_format") == output_format def test_weave_summary(self): - self._test_weave(parameter=WATERLEVELS, output="summary") + self._test_weave(parameter=WATERLEVELS, output_type="summary") def test_weave_timeseries_unified(self): - self._test_weave(parameter=WATERLEVELS, output="timeseries_unified") + self._test_weave(parameter=WATERLEVELS, output_type="timeseries_unified") def test_weave_timeseries_separated(self): - self._test_weave(parameter=WATERLEVELS, output="timeseries_separated") + self._test_weave(parameter=WATERLEVELS, output_type="timeseries_separated") def test_weave_csv(self): self._test_weave( - parameter=WATERLEVELS, output="summary", sites_output_format="csv" + parameter=WATERLEVELS, output_type="summary", output_format="csv" ) def test_weave_geojson(self): self._test_weave( - parameter=WATERLEVELS, output="summary", sites_output_format="geojson" + parameter=WATERLEVELS, output_type="summary", output_format="geojson" ) def test_weave_bbox(self): self._test_weave( - parameter=WATERLEVELS, output="summary", bbox="32.0,-106.0,36.0,-102.0" + parameter=WATERLEVELS, output_type="summary", bbox="32.0,-106.0,36.0,-102.0" ) def test_weave_county(self): - self._test_weave(parameter=WATERLEVELS, output="summary", county="Bernalillo") + self._test_weave(parameter=WATERLEVELS, output_type="summary", county="Bernalillo") def test_weave_wkt(self): self._test_weave( parameter=WATERLEVELS, - output="summary", + output_type="summary", wkt="POLYGON((-106.0 32.0, -102.0 32.0, -102.0 36.0, -106.0 36.0, -106.0 32.0))", ) def test_weave_waterlevels(self): - self._test_weave(parameter=WATERLEVELS, output="summary") + self._test_weave(parameter=WATERLEVELS, output_type="summary") def test_weave_arsenic(self): - self._test_weave(parameter=ARSENIC, output="summary") + self._test_weave(parameter=ARSENIC, output_type="summary") def test_weave_bicarbonate(self): - self._test_weave(parameter=BICARBONATE, output="summary") + self._test_weave(parameter=BICARBONATE, output_type="summary") def test_weave_calcium(self): - self._test_weave(parameter=CALCIUM, output="summary") + self._test_weave(parameter=CALCIUM, output_type="summary") def test_weave_carbonate(self): - self._test_weave(parameter=CARBONATE, output="summary") + self._test_weave(parameter=CARBONATE, output_type="summary") def test_weave_chloride(self): - self._test_weave(parameter=CHLORIDE, output="summary") + self._test_weave(parameter=CHLORIDE, output_type="summary") def test_weave_fluoride(self): - self._test_weave(parameter=FLUORIDE, output="summary") + self._test_weave(parameter=FLUORIDE, output_type="summary") def test_weave_magnesium(self): - self._test_weave(parameter=MAGNESIUM, output="summary") + self._test_weave(parameter=MAGNESIUM, output_type="summary") def test_weave_nitrate(self): - self._test_weave(parameter=NITRATE, output="summary") + self._test_weave(parameter=NITRATE, output_type="summary") def test_weave_ph(self): - self._test_weave(parameter=PH, output="summary") + self._test_weave(parameter=PH, output_type="summary") def test_weave_potassium(self): - self._test_weave(parameter=POTASSIUM, output="summary") + self._test_weave(parameter=POTASSIUM, output_type="summary") def test_weave_silica(self): - self._test_weave(parameter=SILICA, output="summary") + self._test_weave(parameter=SILICA, output_type="summary") def test_weave_sodium(self): - self._test_weave(parameter=SODIUM, output="summary") + self._test_weave(parameter=SODIUM, output_type="summary") def test_weave_sulfate(self): - self._test_weave(parameter=SULFATE, output="summary") + self._test_weave(parameter=SULFATE, output_type="summary") def test_weave_tds(self): - self._test_weave(parameter=TDS, output="summary") + self._test_weave(parameter=TDS, output_type="summary") def test_weave_uranium(self): - self._test_weave(parameter=URANIUM, output="summary") + self._test_weave(parameter=URANIUM, output_type="summary") diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index 5e33ec0..dd1b9fe 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -66,7 +66,7 @@ def setup(self): # 2: delete newly created dirs and files path_to_clean = Path(self.config.output_path) print(f"Cleaning and removing {path_to_clean}") - recursively_clean_directory(path_to_clean) + # recursively_clean_directory(path_to_clean) # reset test attributes self.dirs_to_delete = [] @@ -162,7 +162,7 @@ def test_summary_csv(self): def test_summary_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_summary = True - self.config.sites_output_format = "geojson" + self.config.output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ @@ -191,7 +191,7 @@ def test_timeseries_unified_csv(self): def test_timeseries_unified_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_unified = True - self.config.sites_output_format = "geojson" + self.config.output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ @@ -228,7 +228,7 @@ def test_timeseries_separated_csv(self): def test_timeseries_separated_geojson(self): # Arrange -------------------------------------------------------------- self.config.output_timeseries_separated = True - self.config.sites_output_format = "geojson" + self.config.output_format = "geojson" self.config.report() # Act ------------------------------------------------------------------ From 7fe0fa057eaef93b100be4ac3dd121d33fdb4c24 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 10:51:18 -0600 Subject: [PATCH 121/143] remove dirs made by tests --- tests/test_sources/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_sources/__init__.py b/tests/test_sources/__init__.py index dd1b9fe..34d0485 100644 --- a/tests/test_sources/__init__.py +++ b/tests/test_sources/__init__.py @@ -66,7 +66,7 @@ def setup(self): # 2: delete newly created dirs and files path_to_clean = Path(self.config.output_path) print(f"Cleaning and removing {path_to_clean}") - # recursively_clean_directory(path_to_clean) + recursively_clean_directory(path_to_clean) # reset test attributes self.dirs_to_delete = [] From 2c09d46ddd6724f5a5ec1eea386a709fd88aeb27 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 16:52:58 +0000 Subject: [PATCH 122/143] Formatting changes --- backend/persister.py | 9 ++++++--- backend/unifier.py | 33 +++++++++++---------------------- tests/test_cli/__init__.py | 4 +++- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index b985e7c..d4dc898 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -32,7 +32,7 @@ try: from google.cloud import storage except ImportError: - print("google cloud storage not available") + print("google cloud storage not available") def write_memory(func, records, output_format=None): @@ -98,6 +98,7 @@ class BasePersister(Loggable): Class to persist the data to a file or cloud storage. If persisting to a file, the output directory is created by config._make_output_path() """ + def __init__(self, config=None): self.records = [] self.timeseries = [] @@ -167,7 +168,9 @@ def add_extension(self, path: str, extension: OutputFormat): path = f"{path}.{ext}" return path - def _dump_sites_summary(self, path: str, records: list, output_format: OutputFormat): + def _dump_sites_summary( + self, path: str, records: list, output_format: OutputFormat + ): dump_sites_summary(path, records, output_format) def _dump_timeseries(self, path: str, timeseries: list): @@ -232,12 +235,12 @@ def _add_content(self, path: str, content: str): def _dump_sites_summary(self, path: str, records: list): content = write_memory(dump_sites_summary, records, self.config.output_format) self._add_content(path, content) - def _dump_timeseries_unified(self, path: str, timeseries: list): content = write_memory(path, dump_timeseries, timeseries) self._add_content(path, content) + # class ST2Persister(BasePersister): # extension = "st2" # diff --git a/backend/unifier.py b/backend/unifier.py index 15eef07..b070631 100644 --- a/backend/unifier.py +++ b/backend/unifier.py @@ -117,9 +117,7 @@ def unify_sites(config): # persister.save(config.output_path) -def _site_wrapper( - site_source, parameter_source, persister, config -): +def _site_wrapper(site_source, parameter_source, persister, config): try: # TODO: fully develop checks/discoveries below @@ -194,25 +192,16 @@ def _site_wrapper( # num_sites_to_remove from the length of the list # to remove the last num_sites_to_remove sites if use_summarize: - persister.records = ( - persister.records[ - : len(persister.records) - - num_sites_to_remove - ] - ) + persister.records = persister.records[ + : len(persister.records) - num_sites_to_remove + ] else: - persister.timeseries = ( - persister.timeseries[ - : len(persister.timeseries) - - num_sites_to_remove - ] - ) - persister.sites = ( - persister.sites[ - : len(persister.sites) - - num_sites_to_remove - ] - ) + persister.timeseries = persister.timeseries[ + : len(persister.timeseries) - num_sites_to_remove + ] + persister.sites = persister.sites[ + : len(persister.sites) - num_sites_to_remove + ] break except BaseException: @@ -227,7 +216,7 @@ def _unify_parameter( config, sources, ): - + if config.output_format == OutputFormat.GEOSERVER: persister = GeoServerPersister(config) else: diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index b6e3f6a..ecfaee0 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -199,7 +199,9 @@ def test_weave_bbox(self): ) def test_weave_county(self): - self._test_weave(parameter=WATERLEVELS, output_type="summary", county="Bernalillo") + self._test_weave( + parameter=WATERLEVELS, output_type="summary", county="Bernalillo" + ) def test_weave_wkt(self): self._test_weave( From 3ba04ee91324f02bf93895bce853a708d8a7e999 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 10:53:55 -0600 Subject: [PATCH 123/143] rearranged requirements to be alphabetical --- requirements.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6aea17c..2d2a138 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,13 @@ flask +frost_sta_client +Geoalchemy2 +geopandas +google-cloud-storage gunicorn httpx +mypy pandas -geopandas -frost_sta_client -google-cloud-storage +psycopg2 pytest -urllib3>=2.2.0,<3.0.0 -Geoalchemy2 sqlalchemy -psycopg2 \ No newline at end of file +urllib3>=2.2.0,<3.0.0 \ No newline at end of file From 0aee4e73c95c4daa2f0229a950a369e2a6e13979 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 10:57:37 -0600 Subject: [PATCH 124/143] updated CHANGELOG --- CHANGELOG.md | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80bf398..583fa8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased: 0.8.0 +## Unreleased: 0.9.0 + +### Added +- `--sites-only` flag to only retrieve site data +- `--output-format` flag to write out sites/summary tables as csv or geojson. + - options are `csv` or `geojson` +- NM OSE POD data for sites. + - can be removed from output with `--no-nmose-pod` +- `--output-dir` to change the output directory to a location other than `.` (the current working directory) + +### Changed +- `output` to `output-type` for CLI + +### Fixed +- a bug with `--site-limit`. it now exports the number of sets requested by the + +## 0.8.0 ### Added - water level for WQP From acf680a37b92bf4d2150d23732fae81e5bb0eabd Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 11:03:22 -0600 Subject: [PATCH 125/143] NMBGMR water level pagination --- backend/connectors/nmbgmr/source.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 8c5db49..e16ecf7 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -46,8 +46,10 @@ def _make_url(endpoint): if os.getenv("DEBUG") == "1": - return f"http://localhost:8000/latest/{endpoint}" - return f"https://waterdata.nmt.edu//latest/{endpoint}" + url = f"http://localhost:8000/latest/{endpoint}" + else: + url = f"https://waterdata.nmt.edu/latest/{endpoint}" + return url class NMBGMRSiteSource(BaseSiteSource): From ad8b0f18d5d7e5dc7f329ac234ce6fc3ffefb6fb Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 17:05:53 +0000 Subject: [PATCH 126/143] Formatting changes --- backend/connectors/nmbgmr/source.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index e16ecf7..bb45cab 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -170,7 +170,11 @@ def __repr__(self): def _clean_records(self, records): # remove records with no depth to water value - return [r for r in records if r["DepthToWaterBGS"] is not None and r["DateMeasured"] is not None] + return [ + r + for r in records + if r["DepthToWaterBGS"] is not None and r["DateMeasured"] is not None + ] def _extract_parameter_record(self, record, *args, **kw): record[PARAMETER_NAME] = DTW From de46920e63ec4b7a2ceab7ce6138d8287fdb839e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 14:30:55 -0600 Subject: [PATCH 127/143] mypy fixes --- backend/config.py | 4 ++-- backend/persister.py | 4 ++-- backend/persisters/geoserver.py | 15 +++++---------- mypy.ini | 3 ++- requirements.txt | 4 +++- 5 files changed, 14 insertions(+), 16 deletions(-) diff --git a/backend/config.py b/backend/config.py index 6dd2e0e..56decd0 100644 --- a/backend/config.py +++ b/backend/config.py @@ -18,6 +18,8 @@ from datetime import datetime, timedelta from enum import Enum import shapely.wkt +import yaml + from . import OutputFormat from .bounding_polygons import get_county_polygon from .connectors.nmbgmr.source import ( @@ -201,8 +203,6 @@ def __init__(self, model=None, payload=None, path=None): setattr(self, attr, payload[attr]) def _load_from_yaml(self, path): - import yaml - path = os.path.abspath(path) if os.path.exists(path): self.log(f"Loading config from {path}") diff --git a/backend/persister.py b/backend/persister.py index d4dc898..23964ad 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -232,8 +232,8 @@ def _make_output_directory(self, output_directory: str): def _add_content(self, path: str, content: str): self._content.append((path, content)) - def _dump_sites_summary(self, path: str, records: list): - content = write_memory(dump_sites_summary, records, self.config.output_format) + def _dump_sites_summary(self, path: str, records: list, output_format: OutputFormat): + content = write_memory(dump_sites_summary, records, output_format) self._add_content(path, content) def _dump_timeseries_unified(self, path: str, timeseries: list): diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 9fc261d..8885614 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -9,10 +9,11 @@ import os import time from itertools import groupby - +from typing import Type import psycopg2 from sqlalchemy.dialects.postgresql import JSONB, insert -from sqlalchemy.orm import declarative_base, sessionmaker, relationship +from sqlalchemy.orm import declarative_base, sessionmaker, relationship, Mapped + from backend.persister import BasePersister @@ -31,12 +32,6 @@ Base = declarative_base() - -# dbname=db.get('dbname'), -# user=db.get('user'), -# password=db.get('password'), -# host=db.get('host'), -# port=db.get('port'), def session_factory(connection: dict): user = connection.get("user", "postgres") password = connection.get("password", "") @@ -61,7 +56,7 @@ class Location(Base): geometry = Column(Geometry(geometry_type="POINT", srid=4326)) source_slug = Column(String, ForeignKey("tbl_sources.name")) - source = relationship("Sources", backref="locations") + source: Mapped["Sources"] = relationship("Sources", backref="locations", uselist=False) class Summary(Base): @@ -76,7 +71,7 @@ class Summary(Base): source_slug = Column(String, ForeignKey("tbl_sources.name")) parameter_slug = Column(String, ForeignKey("tbl_parameters.name")) - source = relationship("Sources", backref="summaries") + source: Mapped["Sources"] = relationship("Sources", backref="summaries", uselist=False) value = Column(Float) nrecords = Column(Integer) diff --git a/mypy.ini b/mypy.ini index 380b366..4904098 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,3 +1,4 @@ [mypy] ignore_missing_imports = True -exclude = ^(venv|.github|.mypy_cache|.pytest_cache|nmuwd.egg-info|__pycache__|build|tests/archived) \ No newline at end of file +exclude = ^(venv|.github|.mypy_cache|.pytest_cache|nmuwd.egg-info|__pycache__|build|tests/archived) +plugins = sqlalchemy.ext.mypy.plugin diff --git a/requirements.txt b/requirements.txt index 2d2a138..648458d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,7 @@ mypy pandas psycopg2 pytest -sqlalchemy +pyyaml +sqlalchemy[mypy] +types-pyyaml urllib3>=2.2.0,<3.0.0 \ No newline at end of file From ec4689e82bfc371d5712413440f45d0daa687756 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 14:50:56 -0600 Subject: [PATCH 128/143] populate nmbgmr well fields --- backend/connectors/nmbgmr/source.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index bb45cab..19311a9 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -87,18 +87,18 @@ def get_records(self): ) if not config.sites_only: for site in sites: - # print(f"Obtaining well data for {site['properties']['point_id']}") - # well_data = self._execute_json_request( - # _make_url("wells"), - # params={"pointid": site["properties"]["point_id"]}, - # tag="", - # ) - # site["properties"]["formation"] = well_data["formation"] - # site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - # site["properties"]["well_depth_units"] = FEET - site["properties"]["formation"] = None - site["properties"]["well_depth"] = None + print(f"Obtaining well data for {site['properties']['point_id']}") + well_data = self._execute_json_request( + _make_url("wells"), + params={"pointid": site["properties"]["point_id"]}, + tag="", + ) + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] site["properties"]["well_depth_units"] = FEET + # site["properties"]["formation"] = None + # site["properties"]["well_depth"] = None + # site["properties"]["well_depth_units"] = FEET return sites From 8322949d9feb2441547a9acf138070933c102205 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 20:53:17 +0000 Subject: [PATCH 129/143] Formatting changes --- backend/persister.py | 4 +++- backend/persisters/geoserver.py | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/backend/persister.py b/backend/persister.py index 23964ad..bf3d11c 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -232,7 +232,9 @@ def _make_output_directory(self, output_directory: str): def _add_content(self, path: str, content: str): self._content.append((path, content)) - def _dump_sites_summary(self, path: str, records: list, output_format: OutputFormat): + def _dump_sites_summary( + self, path: str, records: list, output_format: OutputFormat + ): content = write_memory(dump_sites_summary, records, output_format) self._add_content(path, content) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index b011c17..9b79f3b 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -34,6 +34,7 @@ Base = declarative_base() + def session_factory(connection: dict): user = connection.get("user", "postgres") password = connection.get("password", "") @@ -58,7 +59,9 @@ class Location(Base): geometry = Column(Geometry(geometry_type="POINT", srid=4326)) source_slug = Column(String, ForeignKey("tbl_sources.name")) - source: Mapped["Sources"] = relationship("Sources", backref="locations", uselist=False) + source: Mapped["Sources"] = relationship( + "Sources", backref="locations", uselist=False + ) class Summary(Base): @@ -73,7 +76,9 @@ class Summary(Base): source_slug = Column(String, ForeignKey("tbl_sources.name")) parameter_slug = Column(String, ForeignKey("tbl_parameters.name")) - source: Mapped["Sources"] = relationship("Sources", backref="summaries", uselist=False) + source: Mapped["Sources"] = relationship( + "Sources", backref="summaries", uselist=False + ) value = Column(Float) nrecords = Column(Integer) @@ -150,6 +155,7 @@ def _write_sources(self, records: list): def _write_sources_with_convex_hull(self, records: list): # sources = {r.source for r in records} with self._connection as conn: + def key(r): return str(r.source) @@ -159,15 +165,19 @@ def key(r): # calculate convex hull for the source from the records # Create a MultiPoint object - points = MultiPoint([Point(record.longitude, record.latitude) for record in group]) + points = MultiPoint( + [Point(record.longitude, record.latitude) for record in group] + ) # Calculate the convex hull sinsert = insert(Sources) print("Writing source", source_name, points.convex_hull) - sql = sinsert.values([{"name": source_name, - "convex_hull": points.convex_hull.wkt}]).on_conflict_do_update( + sql = sinsert.values( + [{"name": source_name, "convex_hull": points.convex_hull.wkt}] + ).on_conflict_do_update( index_elements=[Sources.name], - set_={"convex_hull": sinsert.excluded.convex_hull}) + set_={"convex_hull": sinsert.excluded.convex_hull}, + ) # sql = insert(Sources).values([{"name": source,} for source in sources]).on_conflict_do_nothing( # index_elements=[Sources.name],) conn.execute(sql) From f0c7973f02c38454490fc96f75e03ff961def3ba Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 14:55:07 -0600 Subject: [PATCH 130/143] mypy fix --- backend/persisters/geoserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 9b79f3b..486e3a8 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -161,12 +161,12 @@ def key(r): records = sorted(records, key=key) for source_name, group in groupby(records, key=key): - group = list(group) + listed_group = list(group) # calculate convex hull for the source from the records # Create a MultiPoint object points = MultiPoint( - [Point(record.longitude, record.latitude) for record in group] + [Point(record.longitude, record.latitude) for record in listed_group] ) # Calculate the convex hull From e05ef53d2ae77341f167909a14878b3cf213fb15 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 14:56:19 -0600 Subject: [PATCH 131/143] variable name clarity --- backend/persisters/geoserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 486e3a8..3f09acb 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -161,12 +161,12 @@ def key(r): records = sorted(records, key=key) for source_name, group in groupby(records, key=key): - listed_group = list(group) + source_records = list(group) # calculate convex hull for the source from the records # Create a MultiPoint object points = MultiPoint( - [Point(record.longitude, record.latitude) for record in listed_group] + [Point(record.longitude, record.latitude) for record in source_records] ) # Calculate the convex hull From dab407b8347755f49663f8ac78b0729316ddfe9b Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 20:56:41 +0000 Subject: [PATCH 132/143] Formatting changes --- backend/persisters/geoserver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 486e3a8..f5e83ff 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -166,7 +166,10 @@ def key(r): # Create a MultiPoint object points = MultiPoint( - [Point(record.longitude, record.latitude) for record in listed_group] + [ + Point(record.longitude, record.latitude) + for record in listed_group + ] ) # Calculate the convex hull From 074f62edc0e4131481eca7ae831ef00ecc5931d0 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 20:58:24 +0000 Subject: [PATCH 133/143] Formatting changes --- backend/persisters/geoserver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 3f09acb..4461246 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -166,7 +166,10 @@ def key(r): # Create a MultiPoint object points = MultiPoint( - [Point(record.longitude, record.latitude) for record in source_records] + [ + Point(record.longitude, record.latitude) + for record in source_records + ] ) # Calculate the convex hull From 93a3878e7b60552472d0f2561afec4e89624daec Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 15:19:51 -0600 Subject: [PATCH 134/143] skip well data retrieval for NMBGMR until it can be retrieved in batches --- backend/__init__.py | 10 +++++++-- backend/connectors/nmbgmr/source.py | 30 +++++++++++++++------------ tests/test_cli/__init__.py | 1 - tests/test_sources/test_nmbgmr_amp.py | 17 ++++++++++++++- 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index 06a630a..d531f86 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -1,7 +1,13 @@ from enum import Enum - - +from os import environ class OutputFormat(str, Enum): GEOJSON = "geojson" CSV = "csv" GEOSERVER = "geoserver" + + +def get_bool_env_variable(var) -> bool: + if environ.get(var).lower() in ["true", "1", "yes"]: + return True + else: + return False \ No newline at end of file diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index 19311a9..ec26aae 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -15,6 +15,7 @@ # =============================================================================== import os +from backend import get_bool_env_variable from backend.connectors import NM_STATE_BOUNDING_POLYGON from backend.connectors.nmbgmr.transformer import ( NMBGMRSiteTransformer, @@ -87,19 +88,22 @@ def get_records(self): ) if not config.sites_only: for site in sites: - print(f"Obtaining well data for {site['properties']['point_id']}") - well_data = self._execute_json_request( - _make_url("wells"), - params={"pointid": site["properties"]["point_id"]}, - tag="", - ) - site["properties"]["formation"] = well_data["formation"] - site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] - site["properties"]["well_depth_units"] = FEET - # site["properties"]["formation"] = None - # site["properties"]["well_depth"] = None - # site["properties"]["well_depth_units"] = FEET - + if get_bool_env_variable("IS_TESTING_ENV"): + print(f"Skipping well data for {site['properties']['point_id']} for testing (until well data can be retrieved in batches)") + site["properties"]["formation"] = None + site["properties"]["well_depth"] = None + site["properties"]["well_depth_units"] = FEET + else: + print(f"Obtaining well data for {site['properties']['point_id']}") + well_data = self._execute_json_request( + _make_url("wells"), + params={"pointid": site["properties"]["point_id"]}, + tag="", + ) + site["properties"]["formation"] = well_data["formation"] + site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] + site["properties"]["well_depth_units"] = FEET + return sites diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index ecfaee0..4d342ae 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -102,7 +102,6 @@ def _test_weave( # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) - print(result) # Assert assert result.exit_code == 0 diff --git a/tests/test_sources/test_nmbgmr_amp.py b/tests/test_sources/test_nmbgmr_amp.py index 90bba2c..cb3e031 100644 --- a/tests/test_sources/test_nmbgmr_amp.py +++ b/tests/test_sources/test_nmbgmr_amp.py @@ -1,6 +1,21 @@ +import os +import pytest + from backend.constants import WATERLEVELS, CALCIUM, MILLIGRAMS_PER_LITER, FEET from tests.test_sources import BaseSourceTestClass +os.environ["IS_TESTING_ENV"] = "True" + +@pytest.fixture(autouse=True) +def setup(): + # SETUP CODE ----------------------------------------------------------- + os.environ["IS_TESTING_ENV"] = "True" + + # RUN TESTS ------------------------------------------------------------ + yield + + # TEARDOWN CODE --------------------------------------------------------- + os.environ["IS_TESTING_ENV"] = "False" class TestNMBGMRWaterlevels(BaseSourceTestClass): @@ -13,4 +28,4 @@ class TestNMBGMRAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "nmbgmr_amp" + agency = "nmbgmr_amp" \ No newline at end of file From 373cc9d68ae2216177845d632f619150aabc9766 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Fri, 25 Apr 2025 21:21:30 +0000 Subject: [PATCH 135/143] Formatting changes --- backend/__init__.py | 4 +++- backend/connectors/nmbgmr/source.py | 6 ++++-- tests/test_sources/test_nmbgmr_amp.py | 4 +++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index d531f86..5b3ab77 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -1,5 +1,7 @@ from enum import Enum from os import environ + + class OutputFormat(str, Enum): GEOJSON = "geojson" CSV = "csv" @@ -10,4 +12,4 @@ def get_bool_env_variable(var) -> bool: if environ.get(var).lower() in ["true", "1", "yes"]: return True else: - return False \ No newline at end of file + return False diff --git a/backend/connectors/nmbgmr/source.py b/backend/connectors/nmbgmr/source.py index ec26aae..d01cd11 100644 --- a/backend/connectors/nmbgmr/source.py +++ b/backend/connectors/nmbgmr/source.py @@ -89,7 +89,9 @@ def get_records(self): if not config.sites_only: for site in sites: if get_bool_env_variable("IS_TESTING_ENV"): - print(f"Skipping well data for {site['properties']['point_id']} for testing (until well data can be retrieved in batches)") + print( + f"Skipping well data for {site['properties']['point_id']} for testing (until well data can be retrieved in batches)" + ) site["properties"]["formation"] = None site["properties"]["well_depth"] = None site["properties"]["well_depth_units"] = FEET @@ -103,7 +105,7 @@ def get_records(self): site["properties"]["formation"] = well_data["formation"] site["properties"]["well_depth"] = well_data["well_depth_ftbgs"] site["properties"]["well_depth_units"] = FEET - + return sites diff --git a/tests/test_sources/test_nmbgmr_amp.py b/tests/test_sources/test_nmbgmr_amp.py index cb3e031..b56fd5b 100644 --- a/tests/test_sources/test_nmbgmr_amp.py +++ b/tests/test_sources/test_nmbgmr_amp.py @@ -6,6 +6,7 @@ os.environ["IS_TESTING_ENV"] = "True" + @pytest.fixture(autouse=True) def setup(): # SETUP CODE ----------------------------------------------------------- @@ -17,6 +18,7 @@ def setup(): # TEARDOWN CODE --------------------------------------------------------- os.environ["IS_TESTING_ENV"] = "False" + class TestNMBGMRWaterlevels(BaseSourceTestClass): parameter = WATERLEVELS @@ -28,4 +30,4 @@ class TestNMBGMRAnalyte(BaseSourceTestClass): parameter = CALCIUM units = MILLIGRAMS_PER_LITER - agency = "nmbgmr_amp" \ No newline at end of file + agency = "nmbgmr_amp" From 87e2225a38fda6a419f38814d041adf15e3907e4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Fri, 25 Apr 2025 15:33:53 -0600 Subject: [PATCH 136/143] mypy fix --- backend/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index 5b3ab77..804491c 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -8,8 +8,9 @@ class OutputFormat(str, Enum): GEOSERVER = "geoserver" -def get_bool_env_variable(var) -> bool: - if environ.get(var).lower() in ["true", "1", "yes"]: - return True - else: +def get_bool_env_variable(var: str) -> bool: + env_var = environ.get(var, None) + if env_var is None or env_var.strip().lower() not in ["true", "1", "yes"]: return False + else: + return True From a5d0b5987acf2d2a3290e16875a7c34cbe65cd1b Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 28 Apr 2025 08:29:21 -0600 Subject: [PATCH 137/143] default 'yes' to False to enable prompt --- backend/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 56decd0..0b16ed7 100644 --- a/backend/config.py +++ b/backend/config.py @@ -151,7 +151,7 @@ class Config(Loggable): output_format: str = OutputFormat.CSV - yes: bool = True + yes: bool = False def __init__(self, model=None, payload=None, path=None): # need to initialize logger From a23d1d8bbca9191316c7b8dae4c00042e8f3dfb5 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 28 Apr 2025 13:55:46 -0600 Subject: [PATCH 138/143] add agency to nwis site numbers --- backend/connectors/usgs/source.py | 11 ++++++++--- backend/connectors/usgs/transformer.py | 6 +++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/backend/connectors/usgs/source.py b/backend/connectors/usgs/source.py index 25e4e87..cac4f2a 100644 --- a/backend/connectors/usgs/source.py +++ b/backend/connectors/usgs/source.py @@ -76,11 +76,12 @@ def parse_json(data): for location in data["timeSeries"]: site_code = location["sourceInfo"]["siteCode"][0]["value"] + agency = location["sourceInfo"]["siteCode"][0]["agencyCode"] source_parameter_name = location["variable"]["variableName"] source_parameter_units = location["variable"]["unit"]["unitCode"] for value in location["values"][0]["value"]: record = { - "site_code": site_code, + "site_id": f"{agency}-{site_code}", "source_parameter_name": source_parameter_name, "value": value["value"], "datetime_measured": value["dateTime"], @@ -150,12 +151,16 @@ def __repr__(self): return "NWISWaterLevelSource" def get_records(self, site_record): + # query sites with the agency, which need to be in the form of "{agency}:{site number}" + sites = make_site_list(site_record) + sites_with_colons = [s.replace("-", ":") for s in sites] + params = { "format": "json", "siteType": "GW", "siteStatus": "all", "parameterCd": "72019", - "sites": ",".join(make_site_list(site_record)), + "sites": ",".join(sites_with_colons), } config = self.config @@ -178,7 +183,7 @@ def get_records(self, site_record): return records def _extract_site_records(self, records, site_record): - return [ri for ri in records if ri["site_code"] == site_record.id] + return [ri for ri in records if ri["site_id"] == site_record.id] def _clean_records(self, records): return [ diff --git a/backend/connectors/usgs/transformer.py b/backend/connectors/usgs/transformer.py index 1f61cf5..379b8bd 100644 --- a/backend/connectors/usgs/transformer.py +++ b/backend/connectors/usgs/transformer.py @@ -32,9 +32,13 @@ def _transform(self, record): # if not self.contained(lng, lat): # return + agency = record["agency_cd"] + site_no = record["site_no"] + site_id = f"{agency}-{site_no}" + rec = { "source": "USGS-NWIS", - "id": record["site_no"], + "id": site_id, "name": record["station_nm"], "latitude": lat, "longitude": lng, From 89042a9d722ac7704bc151a9fe34ae0b078450b4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 28 Apr 2025 17:17:31 -0600 Subject: [PATCH 139/143] ose pods are false agencies for all parameters --- backend/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/config.py b/backend/config.py index 0b16ed7..86ef36a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -220,7 +220,6 @@ def get_config_and_false_agencies(self): "ebid", "nmbgmr_amp", "nmose_isc_seven_rivers", - "nmose_pod", "nmose_roswell", "nwis", "pvacd", From ada99cb875ebbeda454435d1da44be84ae5fda92 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 28 Apr 2025 17:18:00 -0600 Subject: [PATCH 140/143] bump to 0.9.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 05b81a8..abfc56f 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.9.0", + version="0.9.1", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 08a0aebe671ab67b686e90365e57619eb8262bb1 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 29 Apr 2025 08:18:14 -0600 Subject: [PATCH 141/143] update documentation --- CHANGELOG.md | 3 ++- README.md | 18 +++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 583fa8d..c92224a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased: 0.9.0 +## 0.9.2 ### Added - `--sites-only` flag to only retrieve site data - `--output-format` flag to write out sites/summary tables as csv or geojson. - options are `csv` or `geojson` + - timeseries data is always written to a csv - NM OSE POD data for sites. - can be removed from output with `--no-nmose-pod` - `--output-dir` to change the output directory to a location other than `.` (the current working directory) diff --git a/README.md b/README.md index 0173df4..0cb15ac 100644 --- a/README.md +++ b/README.md @@ -71,22 +71,22 @@ where `{parameter}` is the name of the parameter whose data is to be retrieved, | **pvacd** | X | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | **wqp** | X | X | X | X | X | X | X | X | X | X | X | X | X | X | X | X | -### Output -The `--output` option is required and used to set the output type: +### Output Type +The `--output-type` option is required and used to set the output type: ``` ---output summary +--output-type summary ``` - A summary table consisting of location information as well as summary statistics for the parameter of interest for every location that has observations. ``` ---output timeseries_unified +--output-type timeseries_unified ``` - A single table consisting of time series data for all locations for the parameter of interest. - A single table of site data that contains information such as latitude, longitude, and elevation ``` ---output timeseries_separated +--output-type timeseries_separated ``` - Separate time series tables for all locations for the parameter of interest. - A single table of site data that contains information such as latitude, longitude, and elevation @@ -181,7 +181,7 @@ The Data Integration Engine enables the user to obtain groundwater level and gro - `--no-pvacd` to exclude Pecos Valley Artesian Convservancy District (PVACD) data - `--no-wqp` to exclude Water Quality Portal (WQP) data -### Geographic Filters +### Geographic Filters [In Development] The following flags can be used to geographically filter data: @@ -193,7 +193,11 @@ The following flags can be used to geographically filter data: -- bbox 'x1 y1, x2 y2' ``` -### Date Filters +``` +-- wkt {wkt polygon or multipolygon} +``` + +### Date Filters [In Development] The following flags can be used to filter by dates: From 9c533d3ef671d0959a57668405fd8e170328d285 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 29 Apr 2025 08:31:12 -0600 Subject: [PATCH 142/143] bump to 0.9.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index abfc56f..3e03efa 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ setup( name="nmuwd", - version="0.9.1", + version="0.9.2", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From 330f1d4147b670f3241c3aaf7c5dacd506f412b4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 29 Apr 2025 08:52:54 -0600 Subject: [PATCH 143/143] use requirements.txt for setup.py --- CHANGELOG.md | 2 +- backend/persister.py | 6 ------ backend/persisters/geoserver.py | 1 - requirements.txt | 1 - setup.py | 8 ++++++-- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c92224a..d6457f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), - `output` to `output-type` for CLI ### Fixed -- a bug with `--site-limit`. it now exports the number of sets requested by the +- a bug with `--site-limit`. it now exports the number of sets requested by the user ## 0.8.0 diff --git a/backend/persister.py b/backend/persister.py index bf3d11c..b470c2b 100644 --- a/backend/persister.py +++ b/backend/persister.py @@ -16,15 +16,9 @@ import csv import io import os -import shutil from pprint import pprint import json -import pandas as pd -import geopandas as gpd -import psycopg2 -from shapely import Point - from backend import OutputFormat from backend.logger import Loggable diff --git a/backend/persisters/geoserver.py b/backend/persisters/geoserver.py index 4461246..d8c07fc 100644 --- a/backend/persisters/geoserver.py +++ b/backend/persisters/geoserver.py @@ -10,7 +10,6 @@ import time from itertools import groupby from typing import Type -import psycopg2 from shapely.geometry.multipoint import MultiPoint from shapely.geometry.point import Point from sqlalchemy.dialects.postgresql import JSONB, insert diff --git a/requirements.txt b/requirements.txt index 648458d..50e80af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,5 @@ pandas psycopg2 pytest pyyaml -sqlalchemy[mypy] types-pyyaml urllib3>=2.2.0,<3.0.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 3e03efa..d3f855e 100644 --- a/setup.py +++ b/setup.py @@ -19,9 +19,13 @@ with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() +# Read dependencies from requirements.txt +with open("requirements.txt", "r", encoding="utf-8") as req_file: + requirements = req_file.read().splitlines() + setup( name="nmuwd", - version="0.9.2", + version="0.9.3", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, @@ -31,7 +35,7 @@ "Programming Language :: Python :: 3", "Operating System :: OS Independent", ], - install_requires=["click", "httpx", "geopandas", "frost_sta_client"], + install_requires=requirements, entry_points={ "console_scripts": [ "die = frontend.cli:cli",