From 7e14b069e12c01e194e29412159b6eff81c59893 Mon Sep 17 00:00:00 2001 From: jakeross Date: Tue, 10 Mar 2026 09:17:43 -0600 Subject: [PATCH 1/2] Assess branch readiness for staging --- .github/app.template.yaml | 9 +- .github/workflows/CD_production.yml | 61 +++- .github/workflows/CD_staging.yml | 57 +++- AGENTS.MD | 7 + README.md | 13 +- alembic/env.py | 2 +- ...build_water_elevation_materialized_view.py | 199 +++++++++++ api/asset.py | 25 +- cli/cli.py | 4 +- core/app.py | 323 +++++++++++------- core/factory.py | 55 +++ core/initializers.py | 51 ++- core/permissions.py | 18 +- db/engine.py | 2 +- docker-compose.yml | 6 + entrypoint.sh | 12 +- main.py | 40 +-- schedule | 22 +- services/asset_helper.py | 8 +- services/env.py | 16 + services/gcs_helper.py | 14 +- services/util.py | 18 +- tests/conftest.py | 74 +++- tests/features/environment.py | 2 +- tests/integration/test_alembic_migrations.py | 29 ++ tests/test_lazy_admin.py | 19 ++ tests/test_request_timing.py | 43 +++ tests/test_thing.py | 10 +- transfers/backfill/backfill.py | 2 +- transfers/transfer.py | 2 +- 30 files changed, 857 insertions(+), 286 deletions(-) create mode 100644 alembic/versions/o8b9c0d1e2f3_rebuild_water_elevation_materialized_view.py create mode 100644 core/factory.py create mode 100644 services/env.py create mode 100644 tests/test_lazy_admin.py create mode 100644 tests/test_request_timing.py diff --git a/.github/app.template.yaml b/.github/app.template.yaml index 44df2f860..2ed7342a3 100644 --- a/.github/app.template.yaml +++ b/.github/app.template.yaml @@ -1,8 +1,13 @@ service: ${SERVICE_NAME} runtime: python313 -entrypoint: gunicorn -w 4 -k uvicorn.workers.UvicornWorker main:app -instance_class: F4 +entrypoint: ${ENTRYPOINT} service_account: "${CLOUD_SQL_USER}.gserviceaccount.com" +instance_class: F4 +inbound_services: + - warmup +automatic_scaling: + min_instances: ${MIN_INSTANCES} + max_instances: ${MAX_INSTANCES} handlers: - url: /.* secure: always diff --git a/.github/workflows/CD_production.yml b/.github/workflows/CD_production.yml index eb104d339..cf7924efd 100644 --- a/.github/workflows/CD_production.yml +++ b/.github/workflows/CD_production.yml @@ -8,7 +8,7 @@ permissions: contents: write jobs: - staging-deploy: + production-deploy: runs-on: ubuntu-latest environment: production @@ -64,9 +64,8 @@ jobs: sudo apt-get install -y gettext-base fi - - name: Render app.yaml + - name: Render App Engine configs env: - SERVICE_NAME: "ocotillo-api" ENVIRONMENT: "production" CLOUD_SQL_INSTANCE_NAME: "${{ secrets.CLOUD_SQL_INSTANCE_NAME }}" CLOUD_SQL_DATABASE: "${{ vars.CLOUD_SQL_DATABASE }}" @@ -87,25 +86,59 @@ jobs: SESSION_SECRET_KEY: "${{ secrets.SESSION_SECRET_KEY }}" APITALLY_CLIENT_ID: "${{ vars.APITALLY_CLIENT_ID }}" run: | + export MAX_INSTANCES="10" + export SERVICE_NAME="ocotillo-api" + export ENTRYPOINT="gunicorn -w 1 -k uvicorn.workers.UvicornWorker main:app" + export MIN_INSTANCES="0" envsubst < .github/app.template.yaml > app.yaml - name: Deploy to Google Cloud run: | - gcloud app deploy app.yaml --quiet --project ${{ vars.GCP_PROJECT_ID }} + gcloud app deploy \ + app.yaml \ + --quiet \ + --project ${{ vars.GCP_PROJECT_ID }} - # Clean up old versions - delete only the oldest version, one created and one destroyed - - name: Clean up oldest version + - name: Clean up oldest versions run: | - OLDEST_VERSION=$(gcloud app versions list --service=ocotillo-api --project=${{ vars.GCP_PROJECT_ID}} --format="value(id)" --sort-by="version.createTime" | head -n 1) - if [ ! -z "$OLDEST_VERSION" ]; then - echo "Deleting oldest version: $OLDEST_VERSION" - gcloud app versions delete $OLDEST_VERSION --service=ocotillo-api --project=${{ vars.GCP_PROJECT_ID }} --quiet - echo "Deleted oldest version: $OLDEST_VERSION" + SERVICE="ocotillo-api" + VERSIONS_JSON="$(gcloud app versions list --service="$SERVICE" --project=${{ vars.GCP_PROJECT_ID }} --format=json --sort-by="version.createTime" 2>/dev/null || printf '[]')" + export VERSIONS_JSON + DELETE_VERSION="$(python - <<'PY' + import json + import os + + versions = json.loads(os.environ.get("VERSIONS_JSON", "[]") or "[]") + if len(versions) <= 1: + print("") + raise SystemExit(0) + + def traffic_split(version): + for key in ("traffic_split", "trafficSplit"): + value = version.get(key) + if value is not None: + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + return 0.0 + + for version in versions: + if traffic_split(version) == 0.0: + print(version.get("id", "")) + break + else: + print("") + PY + )" + if [ -n "$DELETE_VERSION" ]; then + echo "Deleting old non-serving version for $SERVICE: $DELETE_VERSION" + gcloud app versions delete "$DELETE_VERSION" --service="$SERVICE" --project=${{ vars.GCP_PROJECT_ID }} --quiet else - echo "No versions to delete" + echo "No old non-serving versions to delete for $SERVICE" fi - - name: Remove app.yaml + - name: Remove rendered configs run: | rm app.yaml @@ -118,5 +151,5 @@ jobs: # ":" are not alloed in git tags, so replace with "-" - name: Tag commit run: | - git tag -a "production-deploy-$(date -u +%Y-%m-%d)T$(date -u +%H-%M-%S%z)" -m "staging gcloud deployment: $(date -u +%Y-%m-%d)T$(date -u +%H:%M:%S%z)" + git tag -a "production-deploy-$(date -u +%Y-%m-%d)T$(date -u +%H-%M-%S%z)" -m "production gcloud deployment: $(date -u +%Y-%m-%d)T$(date -u +%H:%M:%S%z)" git push origin --tags diff --git a/.github/workflows/CD_staging.yml b/.github/workflows/CD_staging.yml index ec5cad81f..ed73059df 100644 --- a/.github/workflows/CD_staging.yml +++ b/.github/workflows/CD_staging.yml @@ -64,9 +64,8 @@ jobs: sudo apt-get install -y gettext-base fi - - name: Render app.yaml + - name: Render App Engine configs env: - SERVICE_NAME: "ocotillo-api-staging" ENVIRONMENT: "staging" CLOUD_SQL_INSTANCE_NAME: "${{ secrets.CLOUD_SQL_INSTANCE_NAME }}" CLOUD_SQL_DATABASE: "${{ vars.CLOUD_SQL_DATABASE }}" @@ -87,25 +86,59 @@ jobs: SESSION_SECRET_KEY: "${{ secrets.SESSION_SECRET_KEY }}" APITALLY_CLIENT_ID: "${{ vars.APITALLY_CLIENT_ID }}" run: | + export MAX_INSTANCES="10" + export SERVICE_NAME="ocotillo-api-staging" + export ENTRYPOINT="gunicorn -w 1 -k uvicorn.workers.UvicornWorker main:app" + export MIN_INSTANCES="0" envsubst < .github/app.template.yaml > app.yaml - name: Deploy to Google Cloud run: | - gcloud app deploy app.yaml --quiet --project ${{ vars.GCP_PROJECT_ID }} + gcloud app deploy \ + app.yaml \ + --quiet \ + --project ${{ vars.GCP_PROJECT_ID }} - # Clean up old versions - delete only the oldest version, one created and one destroyed - - name: Clean up oldest version + - name: Clean up oldest versions run: | - OLDEST_VERSION=$(gcloud app versions list --service=ocotillo-api-staging --project=${{ vars.GCP_PROJECT_ID}} --format="value(id)" --sort-by="version.createTime" | head -n 1) - if [ ! -z "$OLDEST_VERSION" ]; then - echo "Deleting oldest version: $OLDEST_VERSION" - gcloud app versions delete $OLDEST_VERSION --service=ocotillo-api-staging --project=${{ vars.GCP_PROJECT_ID }} --quiet - echo "Deleted oldest version: $OLDEST_VERSION" + SERVICE="ocotillo-api-staging" + VERSIONS_JSON="$(gcloud app versions list --service="$SERVICE" --project=${{ vars.GCP_PROJECT_ID }} --format=json --sort-by="version.createTime" 2>/dev/null || printf '[]')" + export VERSIONS_JSON + DELETE_VERSION="$(python - <<'PY' + import json + import os + + versions = json.loads(os.environ.get("VERSIONS_JSON", "[]") or "[]") + if len(versions) <= 1: + print("") + raise SystemExit(0) + + def traffic_split(version): + for key in ("traffic_split", "trafficSplit"): + value = version.get(key) + if value is not None: + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + return 0.0 + + for version in versions: + if traffic_split(version) == 0.0: + print(version.get("id", "")) + break + else: + print("") + PY + )" + if [ -n "$DELETE_VERSION" ]; then + echo "Deleting old non-serving version for $SERVICE: $DELETE_VERSION" + gcloud app versions delete "$DELETE_VERSION" --service="$SERVICE" --project=${{ vars.GCP_PROJECT_ID }} --quiet else - echo "No versions to delete" + echo "No old non-serving versions to delete for $SERVICE" fi - - name: Remove app.yaml + - name: Remove rendered configs run: | rm app.yaml diff --git a/AGENTS.MD b/AGENTS.MD index ae0bc08da..afeebcd94 100644 --- a/AGENTS.MD +++ b/AGENTS.MD @@ -25,6 +25,13 @@ these transfers, keep the following rules in mind to avoid hour-long runs: - Data migrations should be safe to re-run without creating duplicate rows or corrupting data. - Use upserts or duplicate checks and update source fields only after successful inserts. +## 4. Do a cleanup and code analysis pass after code changes +- After completing any code modification, do a cleanup and code analysis pass adjusted to the size and risk of the change. +- Check for obvious regressions, dead code, inconsistent config/docs/tests, and adjacent issues introduced by the change. +- Fix any concrete issues you find in that pass instead of stopping at implementation. +- After code cleanup, run `black` on the touched Python files and run `flake8` on the touched Python files before wrapping up. +- Run targeted validation for the modified area after cleanup; use broader validation when the change affects shared boot, deploy, or database paths. + Following this playbook keeps ETL runs measured in seconds/minutes instead of hours. EOF ## Activate python venv diff --git a/README.md b/README.md index 7e35d3ec1..44ec7bcd4 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ supports research, field operations, and public data delivery for the Bureau of ## 🗺️ OGC API - Features The API exposes OGC API - Features endpoints under `/ogcapi` using `pygeoapi`. +In App Engine deployments, `/admin` and `/ogcapi` are served from the same +application as the primary API. The service is intended to scale to zero +outside business hours and be kept warm during the workday with Cloud Scheduler +hits to `/_ah/warmup`. ### Landing & metadata @@ -147,7 +151,7 @@ Minimum vars to set in `.env` for local development: * `POSTGRES_HOST` (`localhost` for local psql/pytest against mapped Docker port) * `POSTGRES_PORT` (`5432`) * `MODE` (`development` recommended locally) -* `SESSION_SECRET_KEY` +* `SESSION_SECRET_KEY` (required if you want to use `/admin`) Auth-related vars (required when auth is enabled, optional when `AUTHENTIK_DISABLE_AUTHENTICATION=1`): * `AUTHENTIK_DISABLE_AUTHENTICATION` @@ -199,15 +203,18 @@ docker compose up --build Notes: * Requires Docker Desktop. -* By default, spins up two containers: `db` (PostGIS/PostgreSQL) and `app` (FastAPI API service). +* By default, spins up two containers: + * `db` for PostGIS/PostgreSQL + * `app` for the primary API, admin UI, and OGC API on `http://localhost:8000` * `db` initializes both application databases in the same Postgres service: * `ocotilloapi_dev` * `ocotilloapi_test` -* `alembic upgrade head` runs on app startup after `docker compose up`. +* `alembic upgrade head` runs in the `app` container on startup. * Compose uses hardcoded DB names: * dev: `ocotilloapi_dev` * test: `ocotilloapi_test` (created by init SQL in `docker/db/init/01-create-test-db.sql`) * The database listens on port `5432` both inside the container and on your host. Ensure `POSTGRES_PORT=5432` and `POSTGRES_DB=ocotilloapi_dev` in your `.env` to run local commands against the Docker dev DB (e.g., `uv run pytest`, `uv run python -m transfers.transfer`). +* `SESSION_SECRET_KEY` only needs to be set in `.env` if you plan to use `/admin`; without it, the API and `/ogcapi` still boot, but `/admin` will be unavailable. #### Staging Data diff --git a/alembic/env.py b/alembic/env.py index 62deed2df..944f00e11 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -7,7 +7,7 @@ from dotenv import load_dotenv from sqlalchemy import create_engine, engine_from_config, pool, text -from services.util import get_bool_env +from services.env import get_bool_env # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/alembic/versions/o8b9c0d1e2f3_rebuild_water_elevation_materialized_view.py b/alembic/versions/o8b9c0d1e2f3_rebuild_water_elevation_materialized_view.py new file mode 100644 index 000000000..390ae86fd --- /dev/null +++ b/alembic/versions/o8b9c0d1e2f3_rebuild_water_elevation_materialized_view.py @@ -0,0 +1,199 @@ +"""rebuild water elevation materialized view + +Revision ID: o8b9c0d1e2f3 +Revises: n7a8b9c0d1e2 +Create Date: 2026-03-10 15:30:00.000000 +""" + +from typing import Sequence, Union + +from alembic import op +from sqlalchemy import inspect, text + +# revision identifiers, used by Alembic. +revision: str = "o8b9c0d1e2f3" +down_revision: Union[str, Sequence[str], None] = "n7a8b9c0d1e2" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +METERS_TO_FEET = 3.28084 + +LATEST_LOCATION_CTE = """ +SELECT DISTINCT ON (lta.thing_id) + lta.thing_id, + lta.location_id, + lta.effective_start +FROM location_thing_association AS lta +WHERE lta.effective_end IS NULL +ORDER BY lta.thing_id, lta.effective_start DESC +""".strip() + + +def _create_water_elevation_view() -> str: + return f""" + CREATE MATERIALIZED VIEW ogc_water_elevation_wells AS + WITH latest_location AS ( +{LATEST_LOCATION_CTE} + ), + ranked_obs AS ( + SELECT + fe.thing_id, + o.id AS observation_id, + o.observation_datetime, + CASE + WHEN lower(trim(o.unit)) IN ('m', 'meter', 'meters', 'metre', 'metres') THEN + (o.value * {METERS_TO_FEET}) - COALESCE(o.measuring_point_height, 0) + WHEN lower(trim(o.unit)) IN ('ft', 'foot', 'feet') THEN + o.value - COALESCE(o.measuring_point_height, 0) + ELSE + NULL + END AS depth_to_water_below_ground_surface + FROM observation AS o + JOIN sample AS s ON s.id = o.sample_id + JOIN field_activity AS fa ON fa.id = s.field_activity_id + JOIN field_event AS fe ON fe.id = fa.field_event_id + JOIN thing AS t ON t.id = fe.thing_id + WHERE + t.thing_type = 'water well' + AND fa.activity_type = 'groundwater level' + AND o.value IS NOT NULL + AND o.observation_datetime IS NOT NULL + AND lower(trim(o.unit)) IN ( + 'm', + 'meter', + 'meters', + 'metre', + 'metres', + 'ft', + 'foot', + 'feet' + ) + ), + latest_obs AS ( + SELECT + ro.*, + ROW_NUMBER() OVER ( + PARTITION BY ro.thing_id + ORDER BY ro.observation_datetime DESC, ro.observation_id DESC + ) AS rn + FROM ranked_obs AS ro + ) + SELECT + t.id AS id, + t.name, + t.thing_type, + lo.observation_id, + lo.observation_datetime, + l.elevation AS elevation_m, + lo.depth_to_water_below_ground_surface AS depth_to_water_below_ground_surface_ft, + ((l.elevation * {METERS_TO_FEET}) - lo.depth_to_water_below_ground_surface) + AS water_elevation_ft, + l.point + FROM latest_obs AS lo + JOIN thing AS t ON t.id = lo.thing_id + JOIN latest_location AS ll ON ll.thing_id = t.id + JOIN location AS l ON l.id = ll.location_id + WHERE lo.rn = 1 + """ + + +def _create_water_elevation_view_pre_feet_fix() -> str: + return f""" + CREATE MATERIALIZED VIEW ogc_water_elevation_wells AS + WITH latest_location AS ( +{LATEST_LOCATION_CTE} + ), + ranked_obs AS ( + SELECT + fe.thing_id, + o.id AS observation_id, + o.observation_datetime, + (o.value - COALESCE(o.measuring_point_height, 0)) + AS depth_to_water_below_ground_surface, + ROW_NUMBER() OVER ( + PARTITION BY fe.thing_id + ORDER BY o.observation_datetime DESC, o.id DESC + ) AS rn + FROM observation AS o + JOIN sample AS s ON s.id = o.sample_id + JOIN field_activity AS fa ON fa.id = s.field_activity_id + JOIN field_event AS fe ON fe.id = fa.field_event_id + JOIN thing AS t ON t.id = fe.thing_id + WHERE + t.thing_type = 'water well' + AND fa.activity_type = 'groundwater level' + AND o.value IS NOT NULL + AND o.observation_datetime IS NOT NULL + ) + SELECT + t.id AS id, + t.name, + t.thing_type, + ro.observation_id, + ro.observation_datetime, + l.elevation, + ro.depth_to_water_below_ground_surface, + ( + l.elevation - ro.depth_to_water_below_ground_surface + ) AS water_elevation, + l.point + FROM ranked_obs AS ro + JOIN thing AS t ON t.id = ro.thing_id + JOIN latest_location AS ll ON ll.thing_id = t.id + JOIN location AS l ON l.id = ll.location_id + WHERE ro.rn = 1 + """ + + +def _required_tables_present() -> None: + bind = op.get_bind() + inspector = inspect(bind) + existing_tables = set(inspector.get_table_names(schema="public")) + required_tables = { + "thing", + "location", + "location_thing_association", + "observation", + "sample", + "field_activity", + "field_event", + } + + if not required_tables.issubset(existing_tables): + missing = sorted(t for t in required_tables if t not in existing_tables) + raise RuntimeError( + "Cannot create ogc_water_elevation_wells. Missing required tables: " + + ", ".join(missing) + ) + + +def _rebuild(create_sql: str, comment: str) -> None: + op.execute(text("DROP MATERIALIZED VIEW IF EXISTS ogc_water_elevation_wells")) + op.execute(text(create_sql)) + op.execute( + text( + "COMMENT ON MATERIALIZED VIEW ogc_water_elevation_wells IS " f"'{comment}'" + ) + ) + op.execute( + text( + "CREATE UNIQUE INDEX ux_ogc_water_elevation_wells_id " + "ON ogc_water_elevation_wells (id)" + ) + ) + + +def upgrade() -> None: + _required_tables_present() + _rebuild( + _create_water_elevation_view(), + "Latest water elevation per well with explicit units: " + "elevation_m, depth_to_water_below_ground_surface_ft, water_elevation_ft.", + ) + + +def downgrade() -> None: + _rebuild( + _create_water_elevation_view_pre_feet_fix(), + "Latest water elevation per well (elevation minus depth to water below ground surface).", + ) diff --git a/api/asset.py b/api/asset.py index 6e5b8fde9..90afdf9c0 100644 --- a/api/asset.py +++ b/api/asset.py @@ -32,19 +32,18 @@ from schemas.asset import AssetResponse, CreateAsset, UpdateAsset from services.audit_helper import audit_add from services.crud_helper import model_patcher, model_deleter -from services.query_helper import simple_get_by_id -from services.gcs_helper import ( - get_storage_bucket, - gcs_upload, - gcs_remove, - check_asset_exists, - add_signed_url, -) from services.exceptions_helper import PydanticStyleException +from services.query_helper import simple_get_by_id router = APIRouter(prefix="/asset", tags=["asset"]) +def get_storage_bucket(): + from services.gcs_helper import get_storage_bucket as get_gcs_storage_bucket + + return get_gcs_storage_bucket() + + def database_error_handler(payload: CreateAsset, error: ProgrammingError) -> None: """ Handle errors raised by the database when adding or updating a asset. @@ -83,6 +82,8 @@ async def upload_asset( bucket=Depends(get_storage_bucket), file: UploadFile = File(...), ) -> dict: + from services.gcs_helper import gcs_upload + uri, blob_name = gcs_upload(file, bucket) return { "uri": uri, @@ -105,6 +106,8 @@ async def add_asset( # check to see if an asset entry already exists for # this storage path and thing_id + from services.gcs_helper import check_asset_exists + existing_asset = check_asset_exists(session, storage_path, thing_id=thing_id) if existing_asset: # If an asset already exists, return it @@ -161,6 +164,8 @@ async def list_assets( def transformer(records: list[Asset]): if thing_id is not None: + from services.gcs_helper import add_signed_url + bucket = get_storage_bucket() records = [add_signed_url(ai, bucket) for ai in records] return records @@ -178,6 +183,8 @@ async def get_asset( """ Retrieve an asset by its ID. """ + from services.gcs_helper import add_signed_url + asset = simple_get_by_id(session, Asset, asset_id) add_signed_url(asset, bucket) @@ -220,6 +227,8 @@ async def remove_asset( session: session_dependency, bucket=Depends(get_storage_bucket), ): + from services.gcs_helper import gcs_remove + asset = simple_get_by_id(session, Asset, asset_id) gcs_remove(asset.uri, bucket) diff --git a/cli/cli.py b/cli/cli.py index a7777fd1a..134c3538e 100644 --- a/cli/cli.py +++ b/cli/cli.py @@ -24,8 +24,8 @@ import typer from dotenv import load_dotenv -# CLI should honor local `.env` values, even if shell/container vars already exist. -load_dotenv(override=True) +# CLI should load `.env` defaults without clobbering an explicitly prepared environment. +load_dotenv(override=False) os.environ.setdefault("OCO_LOG_CONTEXT", "cli") cli = typer.Typer(help="Command line interface for managing the application.") diff --git a/core/app.py b/core/app.py index 978419f6e..43fd705ac 100644 --- a/core/app.py +++ b/core/app.py @@ -14,10 +14,14 @@ # limitations under the License. # =============================================================================== import os +import asyncio +import time +import logging from contextlib import asynccontextmanager from typing import AsyncGenerator from fastapi import FastAPI +from fastapi import Request from fastapi.openapi.docs import ( get_swagger_ui_html, get_swagger_ui_oauth2_redirect_html, @@ -26,6 +30,8 @@ from .settings import settings +logger = logging.getLogger(__name__) + @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: @@ -37,147 +43,204 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: seed_all(10, skip_if_exists=True) - yield - + app.state.instance_ready_at = time.perf_counter() + app.state.first_request_pending = True + logger.info( + "instance startup complete", + extra={ + "event": "instance_startup_complete", + "startup_ms": round( + (app.state.instance_ready_at - app.state.process_boot_started_at) + * 1000, + 2, + ), + }, + ) -app = FastAPI( - title="Sample Location API", - description="API for managing sample locations", - version=settings.version, - lifespan=lifespan, -) + yield -# --- full OpenAPI schema --- -def full_openapi(): - if app.openapi_schema: - return app.openapi_schema - schema = get_openapi( - title="Ocotillo API (Full)", +def create_base_app() -> FastAPI: + app = FastAPI( + title="Sample Location API", + description="API for managing sample locations", version=settings.version, - description="Full API schema (authorized users)", - routes=app.routes, - ) - app.openapi_schema = schema - return app.openapi_schema - - -# --- public OpenAPI schema --- -def public_openapi(): - schema = get_openapi( - title="Ocotillo API (Public)", - version="0.0.1", - description="Public API schema (anonymous users)", - routes=app.routes, + lifespan=lifespan, ) - - # Keep only operations where the endpoint function is marked public - new_paths = {} - for path, path_item in schema["paths"].items(): - new_methods = {} - for method, operation in path_item.items(): - # Recover the actual route handler - - route = next( + app.state.process_boot_started_at = time.perf_counter() + app.state.instance_ready_at = None + app.state.first_request_pending = True + app.state.request_timing_lock = asyncio.Lock() + + @app.middleware("http") + async def log_request_timing(request: Request, call_next): + request_started_at = time.perf_counter() + async with app.state.request_timing_lock: + is_first_request = app.state.first_request_pending + app.state.first_request_pending = False + status_code = 500 + try: + response = await call_next(request) + status_code = response.status_code + return response + finally: + request_duration_ms = round( + (time.perf_counter() - request_started_at) * 1000, 2 + ) + startup_ms = round( ( - r - for r in app.routes - if r.path == path and method.upper() in r.methods - ), - None, + (app.state.instance_ready_at or request_started_at) + - app.state.process_boot_started_at + ) + * 1000, + 2, + ) + uptime_before_request_ms = round( + ( + ( + request_started_at + - (app.state.instance_ready_at or request_started_at) + ) + ) + * 1000, + 2, + ) + request_kind = "cold" if is_first_request else "warm" + + logger.info( + "request timing", + extra={ + "event": "request_timing", + "request_kind": request_kind, + "method": request.method, + "path": request.url.path, + "status_code": status_code, + "request_duration_ms": request_duration_ms, + "startup_ms": startup_ms, + "uptime_before_request_ms": uptime_before_request_ms, + }, ) - if not route: - continue - - endpoint = getattr(route, "endpoint", None) - if getattr(endpoint, "_is_public", False): - # Strip security info for public docs - operation["security"] = [] - new_methods[method] = operation - - if new_methods: - new_paths[path] = new_methods - - schema["paths"] = new_paths - - # --- Collect all referenced schemas recursively --- - referenced = set() - - def collect_refs(obj): - if isinstance(obj, dict): - for k, v in obj.items(): - if ( - k == "$ref" - and isinstance(v, str) - and v.startswith("#/components/schemas/") - ): - referenced.add(v.split("/")[-1]) - else: - collect_refs(v) - elif isinstance(obj, list): - for item in obj: - collect_refs(item) - - # Step 1: Collect refs from paths - collect_refs(schema["paths"]) - - # Step 2: Recursively resolve inside components - visited = set() - to_visit = set(referenced) - - while to_visit: - name = to_visit.pop() - if name in visited: - continue - visited.add(name) - - model = schema.get("components", {}).get("schemas", {}).get(name) - if not model: - continue - - collect_refs(model) - # Add only new schemas we haven’t visited yet - to_visit |= referenced - visited - - # Step 3: Filter components.schemas to only referenced ones - if "components" in schema and "schemas" in schema["components"]: - schema["components"]["schemas"] = { - n: m for n, m in schema["components"]["schemas"].items() if n in referenced - } - - # 4. Drop security schemes entirely for the public spec - if "components" in schema and "securitySchemes" in schema["components"]: - schema["components"].pop("securitySchemes", None) - return schema - - -# set the public schema as the default -app.openapi = public_openapi - -CLIENT_ID = os.environ.get("AUTHENTIK_CLIENT_ID") - - -@app.get("/docs-auth", include_in_schema=False) -async def custom_swagger_ui(): - return get_swagger_ui_html( - openapi_url="/openapi-auth.json", - title="Swagger UI", - oauth2_redirect_url="/docs-auth/oauth2-redirect", - init_oauth={ - "clientId": CLIENT_ID, - "usePkceWithAuthorizationCodeGrant": True, # if you use PKCE - }, - ) + def full_openapi(): + if app.openapi_schema: + return app.openapi_schema + schema = get_openapi( + title="Ocotillo API (Full)", + version=settings.version, + description="Full API schema (authorized users)", + routes=app.routes, + ) + app.openapi_schema = schema + return app.openapi_schema -@app.get("/openapi-auth.json", include_in_schema=False) -async def get_openapi_auth(): - return full_openapi() + def public_openapi(): + schema = get_openapi( + title="Ocotillo API (Public)", + version="0.0.1", + description="Public API schema (anonymous users)", + routes=app.routes, + ) + + # Keep only operations where the endpoint function is marked public. + new_paths = {} + for path, path_item in schema["paths"].items(): + new_methods = {} + for method, operation in path_item.items(): + route = next( + ( + r + for r in app.routes + if r.path == path and method.upper() in r.methods + ), + None, + ) + if not route: + continue + + endpoint = getattr(route, "endpoint", None) + if getattr(endpoint, "_is_public", False): + operation["security"] = [] + new_methods[method] = operation + + if new_methods: + new_paths[path] = new_methods + + schema["paths"] = new_paths + + referenced = set() + + def collect_refs(obj): + if isinstance(obj, dict): + for key, value in obj.items(): + if ( + key == "$ref" + and isinstance(value, str) + and value.startswith("#/components/schemas/") + ): + referenced.add(value.split("/")[-1]) + else: + collect_refs(value) + elif isinstance(obj, list): + for item in obj: + collect_refs(item) + + collect_refs(schema["paths"]) + + visited = set() + to_visit = set(referenced) + while to_visit: + name = to_visit.pop() + if name in visited: + continue + visited.add(name) + model = schema.get("components", {}).get("schemas", {}).get(name) + if not model: + continue -@app.get("/docs-auth/oauth2-redirect", include_in_schema=False) -async def swagger_ui_redirect(): - return get_swagger_ui_oauth2_redirect_html() + collect_refs(model) + to_visit |= referenced - visited + + if "components" in schema and "schemas" in schema["components"]: + schema["components"]["schemas"] = { + name: model + for name, model in schema["components"]["schemas"].items() + if name in referenced + } + + if "components" in schema and "securitySchemes" in schema["components"]: + schema["components"].pop("securitySchemes", None) + return schema + + app.openapi = public_openapi + + client_id = os.environ.get("AUTHENTIK_CLIENT_ID") + + @app.get("/docs-auth", include_in_schema=False) + async def custom_swagger_ui(): + return get_swagger_ui_html( + openapi_url="/openapi-auth.json", + title="Swagger UI", + oauth2_redirect_url="/docs-auth/oauth2-redirect", + init_oauth={ + "clientId": client_id, + "usePkceWithAuthorizationCodeGrant": True, + }, + ) + + @app.get("/openapi-auth.json", include_in_schema=False) + async def get_openapi_auth(): + return full_openapi() + + @app.get("/docs-auth/oauth2-redirect", include_in_schema=False) + async def swagger_ui_redirect(): + return get_swagger_ui_oauth2_redirect_html() + + @app.get("/_ah/warmup", include_in_schema=False) + async def warmup(): + return {"status": "ok"} + + return app def public_route(func): diff --git a/core/factory.py b/core/factory.py new file mode 100644 index 000000000..69bcfba7e --- /dev/null +++ b/core/factory.py @@ -0,0 +1,55 @@ +import os + +from dotenv import load_dotenv + +from core.app import create_base_app +from core.initializers import ( + configure_apitally_middleware, + configure_cors_middleware, + configure_lazy_admin, + configure_session_middleware, + register_api_routes, +) + +_runtime_initialized = False + + +def initialize_runtime() -> None: + global _runtime_initialized + + if _runtime_initialized: + return + + load_dotenv(override=False) + dsn = os.environ.get("SENTRY_DSN") + if dsn: + import sentry_sdk + + sentry_sdk.init( + dsn=dsn, + traces_sample_rate=float( + os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.1") + ), + profiles_sample_rate=float( + os.environ.get("SENTRY_PROFILES_SAMPLE_RATE", "0.0") + ), + profile_lifecycle="trace", + send_default_pii=True, + ) + + _runtime_initialized = True + + +def create_api_app(): + initialize_runtime() + app = create_base_app() + register_api_routes(app) + from core.pygeoapi import mount_pygeoapi + + mount_pygeoapi(app) + if os.environ.get("SESSION_SECRET_KEY"): + configure_session_middleware(app) + configure_cors_middleware(app) + configure_apitally_middleware(app) + configure_lazy_admin(app) + return app diff --git a/core/initializers.py b/core/initializers.py index ba932b9b5..98da4e8ee 100644 --- a/core/initializers.py +++ b/core/initializers.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== +import asyncio import os from pathlib import Path @@ -20,6 +21,7 @@ from sqlalchemy import text, select from sqlalchemy.dialects.postgresql import insert from sqlalchemy.exc import DatabaseError +from starlette.responses import PlainTextResponse from db import Base from db.engine import session_ctx @@ -193,11 +195,10 @@ def init_lexicon(path: str = None) -> None: session.commit() -def register_routes(app): - if getattr(app.state, "routes_registered", False): +def register_api_routes(app): + if getattr(app.state, "api_routes_registered", False): return - from admin.auth_routes import router as admin_auth_router from api.group import router as group_router from api.contact import router as contact_router from api.location import router as location_router @@ -215,15 +216,12 @@ def register_routes(app): from api.search import router as search_router from api.geospatial import router as geospatial_router from api.ngwmn import router as ngwmn_router - from core.pygeoapi import mount_pygeoapi app.include_router(asset_router) - app.include_router(admin_auth_router) app.include_router(author_router) app.include_router(contact_router) app.include_router(geospatial_router) app.include_router(group_router) - mount_pygeoapi(app) app.include_router(lexicon_router) app.include_router(location_router) app.include_router(observation_router) @@ -234,11 +232,10 @@ def register_routes(app): app.include_router(thing_router) app.include_router(ngwmn_router) add_pagination(app) - app.state.routes_registered = True + app.state.api_routes_registered = True -def configure_middleware(app): - from starlette.middleware.cors import CORSMiddleware +def configure_session_middleware(app): from starlette.middleware.sessions import SessionMiddleware if not getattr(app.state, "session_middleware_configured", False): @@ -248,6 +245,10 @@ def configure_middleware(app): app.add_middleware(SessionMiddleware, secret_key=session_secret_key) app.state.session_middleware_configured = True + +def configure_cors_middleware(app): + from starlette.middleware.cors import CORSMiddleware + if not getattr(app.state, "cors_middleware_configured", False): app.add_middleware( CORSMiddleware, @@ -258,6 +259,8 @@ def configure_middleware(app): ) app.state.cors_middleware_configured = True + +def configure_apitally_middleware(app): apitally_client_id = os.environ.get("APITALLY_CLIENT_ID") if apitally_client_id and not getattr( app.state, "apitally_middleware_configured", False @@ -278,14 +281,44 @@ def configure_middleware(app): app.state.apitally_middleware_configured = True +def configure_middleware(app): + configure_session_middleware(app) + configure_cors_middleware(app) + configure_apitally_middleware(app) + + def configure_admin(app): if getattr(app.state, "admin_configured", False): return from admin import create_admin + from admin.auth_routes import router as admin_auth_router + app.include_router(admin_auth_router) create_admin(app) app.state.admin_configured = True +def configure_lazy_admin(app): + if getattr(app.state, "lazy_admin_configured", False): + return + + app.state.admin_configure_lock = asyncio.Lock() + + @app.middleware("http") + async def ensure_admin_initialized(request, call_next): + if request.url.path.startswith("/admin"): + if not getattr(app.state, "session_middleware_configured", False): + return PlainTextResponse( + "Admin requires SESSION_SECRET_KEY to be configured.", + status_code=503, + ) + async with app.state.admin_configure_lock: + if not getattr(app.state, "admin_configured", False): + configure_admin(app) + return await call_next(request) + + app.state.lazy_admin_configured = True + + # ============= EOF ============================================= diff --git a/core/permissions.py b/core/permissions.py index b5ce731ad..952e844f4 100644 --- a/core/permissions.py +++ b/core/permissions.py @@ -13,8 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # =============================================================================== -# import os import os +from functools import lru_cache from typing import Optional, List, Union, cast, Callable import httpx @@ -36,18 +36,20 @@ if AUTHENTIK_ISSUER and not auth_disabled: JWKS_URL = f"{AUTHENTIK_ISSUER}jwks/" - # Fetch JWKS (could also cache this) - def get_jwks(): - resp = httpx.get(JWKS_URL) - resp.raise_for_status() - return resp.json() - jwks = get_jwks() +@lru_cache(maxsize=1) +def get_jwks(): + if not AUTHENTIK_ISSUER or auth_disabled: + return {} + + resp = httpx.get(JWKS_URL, timeout=10.0) + resp.raise_for_status() + return resp.json() def get_public_key(token): unverified_header = jwt.get_unverified_header(token) - for key in jwks["keys"]: + for key in get_jwks().get("keys", []): if key["kid"] == unverified_header["kid"]: return RSAAlgorithm.from_jwk(key) raise HTTPException(status_code=401, detail="Invalid signing key") diff --git a/db/engine.py b/db/engine.py index 6e1bfd17e..eb8413065 100644 --- a/db/engine.py +++ b/db/engine.py @@ -29,7 +29,7 @@ ) from sqlalchemy.util import await_only -from services.util import get_bool_env +from services.env import get_bool_env # Load .env file - don't override env vars already set (e.g., by test framework) load_dotenv(override=False) diff --git a/docker-compose.yml b/docker-compose.yml index 9a557f827..5331fe3d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -33,6 +33,12 @@ services: - POSTGRES_PORT=5432 - MODE=${MODE} - AUTHENTIK_DISABLE_AUTHENTICATION=${AUTHENTIK_DISABLE_AUTHENTICATION} + - SESSION_SECRET_KEY=${SESSION_SECRET_KEY} + - PYGEOAPI_POSTGRES_HOST=db + - PYGEOAPI_POSTGRES_PORT=5432 + - PYGEOAPI_POSTGRES_DB=ocotilloapi_dev + - PYGEOAPI_POSTGRES_USER=${POSTGRES_USER} + - PYGEOAPI_POSTGRES_PASSWORD=${POSTGRES_PASSWORD} ports: - 8000:8000 depends_on: diff --git a/entrypoint.sh b/entrypoint.sh index 3fd13d48c..18e0badc9 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -4,6 +4,9 @@ set -eu DB_HOST="${POSTGRES_HOST:-db}" DB_PORT="${POSTGRES_PORT:-5432}" DB_NAME="${POSTGRES_DB:-postgres}" +APP_MODULE="${APP_MODULE:-main:app}" +APP_PORT="${APP_PORT:-8000}" +RUN_MIGRATIONS="${RUN_MIGRATIONS:-true}" # Wait for PostgreSQL to be ready until PGPASSWORD="$POSTGRES_PASSWORD" pg_isready -h "$DB_HOST" -p "$DB_PORT" -U "$POSTGRES_USER" -d "$DB_NAME"; do @@ -12,7 +15,10 @@ until PGPASSWORD="$POSTGRES_PASSWORD" pg_isready -h "$DB_HOST" -p "$DB_PORT" -U done echo "PostgreSQL is ready!" -echo "Applying migrations..." -alembic upgrade head +if [ "$RUN_MIGRATIONS" = "true" ]; then + echo "Applying migrations..." + alembic upgrade head +fi + echo "Starting the application..." -uvicorn main:app --host 0.0.0.0 --port 8000 --reload +uvicorn "$APP_MODULE" --host 0.0.0.0 --port "$APP_PORT" --reload diff --git a/main.py b/main.py index fac816f26..5f2bcb6bf 100644 --- a/main.py +++ b/main.py @@ -1,43 +1,7 @@ -import os +from core.factory import create_api_app -from dotenv import load_dotenv -from core.initializers import configure_admin, configure_middleware, register_routes - -load_dotenv() -DSN = os.environ.get("SENTRY_DSN") - -if DSN: - import sentry_sdk - - sentry_sdk.init( - dsn=DSN, - # Set traces_sample_rate to 1.0 to capture 100% - # of transactions for performance monitoring. - traces_sample_rate=1.0, - # Set profiles_sample_rate to 1.0 to profile 100% - # of sampled transactions. - # We recommend adjusting this value in production. - profiles_sample_rate=1.0, - # Set profile_lifecycle to "trace" to automatically - # run the profiler on when there is an active transaction - profile_lifecycle="trace", - # Add data like request headers and IP for users, - # see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info - send_default_pii=True, - ) - - -def create_app(): - from core.app import app as core_app - - register_routes(core_app) - configure_middleware(core_app) - configure_admin(core_app) - return core_app - - -app = create_app() +app = create_api_app() if __name__ == "__main__": diff --git a/schedule b/schedule index cadb867ef..4b43cb086 100644 --- a/schedule +++ b/schedule @@ -1,9 +1,19 @@ +Use Cloud Scheduler to keep the primary API warm during business hours without +paying for a dedicated instance overnight. -this is used to add a schedule. -This schedule is used to keeping the api runingl - -gcloud scheduler jobs create http keep-alive-job \ - --schedule="*/10 8-18 * * 1-5" \ +Production: +gcloud scheduler jobs create http ocotillo-api-business-hours-warmup \ + --location=us-west4 \ + --schedule="*/5 8-18 * * 1-5" \ + --time-zone="America/Denver" \ --uri="https://ocotillo-api-dot-waterdatainitiative-271000.appspot.com/_ah/warmup" \ - --http-method=GET \ No newline at end of file + --http-method=GET + +Staging: +gcloud scheduler jobs create http ocotillo-api-staging-business-hours-warmup \ + --location=us-west4 \ + --schedule="*/5 8-18 * * 1-5" \ + --time-zone="America/Denver" \ + --uri="https://ocotillo-api-staging-dot-waterdatainitiative-271000.appspot.com/_ah/warmup" \ + --http-method=GET diff --git a/services/asset_helper.py b/services/asset_helper.py index 83c48509d..51a4654fb 100644 --- a/services/asset_helper.py +++ b/services/asset_helper.py @@ -5,19 +5,21 @@ # You may not use this file except in compliance with the License. # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 # =============================================================================== -from typing import BinaryIO +from typing import TYPE_CHECKING, BinaryIO -from google.cloud.storage import Bucket from sqlalchemy.orm import Session from db import AssetThingAssociation, Thing, Asset from services.gcs_helper import gcs_upload +if TYPE_CHECKING: + from google.cloud.storage import Bucket + def upload_and_associate( session: Session, ff: BinaryIO, - bucket: Bucket, + bucket: "Bucket", thing: Thing, name: str, **asset_args, diff --git a/services/env.py b/services/env.py new file mode 100644 index 000000000..7bc06caa1 --- /dev/null +++ b/services/env.py @@ -0,0 +1,16 @@ +import os + + +def to_bool(value: str) -> bool | str: + """Convert common string environment values to booleans.""" + if isinstance(value, bool): + return value + if value.lower() in ("true", "1", "yes"): + return True + if value.lower() in ("false", "0", "no"): + return False + return value + + +def get_bool_env(key, default=False): + return to_bool(os.getenv(key, default)) diff --git a/services/gcs_helper.py b/services/gcs_helper.py index 4a45fa509..237af5cb5 100644 --- a/services/gcs_helper.py +++ b/services/gcs_helper.py @@ -20,7 +20,6 @@ from hashlib import md5 from fastapi import UploadFile -from google.oauth2 import service_account from sqlalchemy import select from core.settings import settings @@ -29,10 +28,11 @@ GCS_BUCKET_NAME = os.environ.get("GCS_BUCKET_NAME") GCS_BUCKET_BASE_URL = f"https://storage.cloud.google.com/{GCS_BUCKET_NAME}/uploads" -from google.cloud import storage +def get_storage_client(): + from google.cloud import storage + from google.oauth2 import service_account -def get_storage_client() -> storage.Client: if settings.mode == "production": key_base64 = os.environ.get("GCS_SERVICE_ACCOUNT_KEY") decoded = base64.b64decode(key_base64).decode("utf-8") @@ -51,7 +51,7 @@ def get_storage_client() -> storage.Client: return client -def get_storage_bucket(client=None, bucket: str = None) -> storage.Bucket: +def get_storage_bucket(client=None, bucket: str = None): if client is None: client = get_storage_client() @@ -70,7 +70,7 @@ def make_blob_name_and_uri(file): return blob_name, uri -def gcs_upload(file: UploadFile, bucket: storage.Bucket = None): +def gcs_upload(file: UploadFile, bucket=None): if bucket is None: bucket = get_storage_bucket() @@ -87,12 +87,12 @@ def gcs_upload(file: UploadFile, bucket: storage.Bucket = None): return uri, blob_name -def gcs_remove(uri: str, bucket: storage.Bucket): +def gcs_remove(uri: str, bucket): blob = bucket.blob(uri) blob.delete() -def add_signed_url(asset: Asset, bucket: storage.Bucket): +def add_signed_url(asset: Asset, bucket): asset.signed_url = bucket.blob(asset.storage_path).generate_signed_url( version="v4", expiration=datetime.timedelta(minutes=15), diff --git a/services/util.py b/services/util.py index 7a3df7eed..374666e90 100644 --- a/services/util.py +++ b/services/util.py @@ -1,9 +1,9 @@ import json import logging -import os import time from datetime import datetime from zoneinfo import ZoneInfo + import httpx import pyproj from shapely.ops import transform @@ -47,22 +47,6 @@ def _get_json( return None -def to_bool(value: str) -> bool | str: - """Convert a string to a boolean.""" - if isinstance(value, bool): - return value - if value.lower() in ("true", "1", "yes"): - return True - elif value.lower() in ("false", "0", "no"): - return False - - return value - - -def get_bool_env(key, default=False): - return to_bool(os.getenv(key, default)) - - def transform_srid(geometry, source_srid, target_srid): """ geometry must be a shapely geometry object, like Point, Polygon, or MultiPolygon diff --git a/tests/conftest.py b/tests/conftest.py index a5f037b61..2705e72a6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,10 +5,12 @@ from alembic import command from alembic.config import Config from dotenv import load_dotenv +from sqlalchemy import delete +from sqlalchemy import inspect as sa_inspect from core.initializers import init_lexicon, init_parameter from db import * -from db.engine import session_ctx +from db.engine import engine, session_ctx from db.initialization import ( recreate_public_schema, sync_search_vector_triggers, @@ -41,13 +43,31 @@ def _alembic_config() -> Config: def _reset_schema() -> None: + engine.dispose() with session_ctx() as session: recreate_public_schema(session) + engine.dispose() def _sync_search_vectors() -> None: + engine.dispose() with session_ctx() as session: sync_search_vector_triggers(session) + engine.dispose() + + +def _delete_if_present(session, obj) -> None: + if obj is None: + return + + state = sa_inspect(obj) + identity = state.identity + if identity is None: + return + + persistent = session.get(type(obj), identity[0] if len(identity) == 1 else identity) + if persistent is not None: + session.delete(persistent) @pytest.fixture(scope="session", autouse=True) @@ -55,6 +75,7 @@ def _setup_test_db(): """Reset schema once per session; tests share DB state, so keep isolation in fixtures.""" _reset_schema() command.upgrade(_alembic_config(), "head") + engine.dispose() _sync_search_vectors() init_lexicon() init_parameter() @@ -123,11 +144,11 @@ def location(): session.add(note) session.commit() session.refresh(loc) + location_id = loc.id yield loc - session.delete(note) - session.delete(loc) + session.execute(delete(Location).where(Location.id == location_id)) session.commit() @@ -141,8 +162,9 @@ def second_location(): ) session.add(location) session.commit() + location_id = location.id yield location - session.delete(location) + session.execute(delete(Location).where(Location.id == location_id)) session.commit() @@ -281,8 +303,9 @@ def second_well_screen(water_well_thing): ) session.add(screen) session.commit() + screen_id = screen.id yield screen - session.delete(screen) + session.execute(delete(WellScreen).where(WellScreen.id == screen_id)) session.commit() @@ -298,8 +321,9 @@ def thing_id_link(water_well_thing): ) session.add(id_link) session.commit() + link_id = id_link.id yield id_link - session.delete(id_link) + session.execute(delete(ThingIdLink).where(ThingIdLink.id == link_id)) session.commit() @@ -315,8 +339,9 @@ def second_thing_id_link(water_well_thing): ) session.add(id_link) session.commit() + link_id = id_link.id yield id_link - session.delete(id_link) + session.execute(delete(ThingIdLink).where(ThingIdLink.id == link_id)) session.commit() @@ -339,9 +364,14 @@ def spring_thing(location): assoc.thing_id = spring.id session.add(assoc) session.commit() + spring_id = spring.id yield spring - session.delete(spring) - session.delete(assoc) + session.execute( + delete(LocationThingAssociation).where( + LocationThingAssociation.thing_id == spring_id + ) + ) + session.execute(delete(Thing).where(Thing.id == spring_id)) session.commit() @@ -364,9 +394,14 @@ def second_spring_thing(location): assoc.thing_id = spring.id session.add(assoc) session.commit() + spring_id = spring.id yield spring - session.delete(spring) - session.delete(assoc) + session.execute( + delete(LocationThingAssociation).where( + LocationThingAssociation.thing_id == spring_id + ) + ) + session.execute(delete(Thing).where(Thing.id == spring_id)) session.commit() @@ -386,8 +421,9 @@ def sensor(): ) session.add(sensor) session.commit() + sensor_id = sensor.id yield sensor - session.delete(sensor) + session.execute(delete(Sensor).where(Sensor.id == sensor_id)) session.commit() @@ -407,8 +443,9 @@ def second_sensor(): ) session.add(sensor) session.commit() + sensor_id = sensor.id yield sensor - session.delete(sensor) + session.execute(delete(Sensor).where(Sensor.id == sensor_id)) session.commit() @@ -697,8 +734,13 @@ def asset_with_associated_thing(water_well_thing): session.refresh(association) yield asset - session.delete(asset) - session.delete(association) + session.execute( + delete(AssetThingAssociation).where( + AssetThingAssociation.asset_id == asset.id, + AssetThingAssociation.thing_id == water_well_thing.id, + ) + ) + session.execute(delete(Asset).where(Asset.id == asset.id)) session.commit() @@ -718,7 +760,7 @@ def second_asset(): session.commit() session.refresh(asset) yield asset - session.delete(asset) + session.execute(delete(Asset).where(Asset.id == asset.id)) session.commit() diff --git a/tests/features/environment.py b/tests/features/environment.py index 4f3a6d2b5..9813c38fc 100644 --- a/tests/features/environment.py +++ b/tests/features/environment.py @@ -53,7 +53,7 @@ ) from db.engine import session_ctx from db.initialization import recreate_public_schema, sync_search_vector_triggers -from services.util import get_bool_env +from services.env import get_bool_env def add_context_object_container(name): diff --git a/tests/integration/test_alembic_migrations.py b/tests/integration/test_alembic_migrations.py index 92036c779..67c8b6ce7 100644 --- a/tests/integration/test_alembic_migrations.py +++ b/tests/integration/test_alembic_migrations.py @@ -223,6 +223,35 @@ def test_postgis_extension_enabled(self): assert postgis == "postgis", "PostGIS extension not enabled" + def test_water_elevation_materialized_view_has_expected_columns(self): + """Water elevation materialized view should match the feet-normalized schema.""" + with session_ctx() as session: + result = session.execute( + text( + """ + SELECT attname + FROM pg_attribute + WHERE attrelid = 'ogc_water_elevation_wells'::regclass + AND attnum > 0 + AND NOT attisdropped + ORDER BY attnum + """ + ) + ) + columns = [row[0] for row in result.fetchall()] + + assert columns == [ + "id", + "name", + "thing_type", + "observation_id", + "observation_datetime", + "elevation_m", + "depth_to_water_below_ground_surface_ft", + "water_elevation_ft", + "point", + ] + # ============================================================================= # Foreign Key Integrity Tests diff --git a/tests/test_lazy_admin.py b/tests/test_lazy_admin.py new file mode 100644 index 000000000..5b70ed884 --- /dev/null +++ b/tests/test_lazy_admin.py @@ -0,0 +1,19 @@ +import os + +from core.factory import create_api_app +from fastapi.testclient import TestClient + + +def test_admin_is_lazy_loaded_on_first_admin_request(): + os.environ["SESSION_SECRET_KEY"] = "test-session-secret-key" + app = create_api_app() + + assert not any(route.path.startswith("/admin") for route in app.routes) + assert getattr(app.state, "admin_configured", False) is False + + with TestClient(app) as client: + response = client.get("/admin", follow_redirects=False) + + assert response.status_code in {200, 302, 307} + assert app.state.admin_configured is True + assert any(route.path.startswith("/admin") for route in app.routes) diff --git a/tests/test_request_timing.py b/tests/test_request_timing.py new file mode 100644 index 000000000..ae6b255ed --- /dev/null +++ b/tests/test_request_timing.py @@ -0,0 +1,43 @@ +import logging + +from fastapi.testclient import TestClient + +from core.app import create_base_app + + +def test_request_timing_logs_cold_then_warm(caplog): + app = create_base_app() + + @app.get("/ping") + async def ping(): + return {"status": "ok"} + + with caplog.at_level(logging.INFO, logger="core.app"): + with TestClient(app) as client: + assert client.get("/ping").status_code == 200 + assert client.get("/ping").status_code == 200 + + startup_logs = [ + record for record in caplog.records if record.msg == "instance startup complete" + ] + request_logs = [ + record for record in caplog.records if record.msg == "request timing" + ] + + assert len(startup_logs) == 1 + assert len(request_logs) == 2 + + assert startup_logs[0].event == "instance_startup_complete" + assert startup_logs[0].startup_ms >= 0 + + assert request_logs[0].request_kind == "cold" + assert request_logs[0].path == "/ping" + assert request_logs[0].status_code == 200 + assert request_logs[0].request_duration_ms >= 0 + assert request_logs[0].startup_ms >= 0 + + assert request_logs[1].request_kind == "warm" + assert request_logs[1].path == "/ping" + assert request_logs[1].status_code == 200 + assert request_logs[1].request_duration_ms >= 0 + assert request_logs[1].uptime_before_request_ms >= 0 diff --git a/tests/test_thing.py b/tests/test_thing.py index 6cba4800b..dac8c124e 100644 --- a/tests/test_thing.py +++ b/tests/test_thing.py @@ -16,6 +16,7 @@ from datetime import date, timezone import pytest +from sqlalchemy import delete from core.dependencies import ( admin_function, @@ -120,9 +121,12 @@ def test_measuring_point_properties_skip_null_history(): assert well.measuring_point_height == 2.5 assert well.measuring_point_description == "old mp" - session.delete(new_history) - session.delete(old_history) - session.delete(well) + session.execute( + delete(MeasuringPointHistory).where( + MeasuringPointHistory.thing_id == well.id + ) + ) + session.execute(delete(Thing).where(Thing.id == well.id)) session.commit() diff --git a/transfers/backfill/backfill.py b/transfers/backfill/backfill.py index fc7f50268..289f07d53 100644 --- a/transfers/backfill/backfill.py +++ b/transfers/backfill/backfill.py @@ -29,7 +29,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) -from services.util import get_bool_env +from services.env import get_bool_env from transfers.logger import logger diff --git a/transfers/transfer.py b/transfers/transfer.py index 49e36e9a9..419d4870a 100644 --- a/transfers/transfer.py +++ b/transfers/transfer.py @@ -52,7 +52,7 @@ from db.engine import session_ctx from db.initialization import recreate_public_schema, sync_search_vector_triggers -from services.util import get_bool_env +from services.env import get_bool_env from transfers.aquifer_system_transfer import transfer_aquifer_systems from transfers.geologic_formation_transfer import transfer_geologic_formations from transfers.permissions_transfer import transfer_permissions From c6c1997085495f3e1562769c227b2e613237f423 Mon Sep 17 00:00:00 2001 From: jirhiker <2035568+jirhiker@users.noreply.github.com> Date: Tue, 10 Mar 2026 15:18:09 +0000 Subject: [PATCH 2/2] Formatting changes --- main.py | 1 - tests/integration/test_alembic_migrations.py | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 5f2bcb6bf..8a56d312e 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ from core.factory import create_api_app - app = create_api_app() diff --git a/tests/integration/test_alembic_migrations.py b/tests/integration/test_alembic_migrations.py index 67c8b6ce7..5bba21c20 100644 --- a/tests/integration/test_alembic_migrations.py +++ b/tests/integration/test_alembic_migrations.py @@ -226,18 +226,14 @@ def test_postgis_extension_enabled(self): def test_water_elevation_materialized_view_has_expected_columns(self): """Water elevation materialized view should match the feet-normalized schema.""" with session_ctx() as session: - result = session.execute( - text( - """ + result = session.execute(text(""" SELECT attname FROM pg_attribute WHERE attrelid = 'ogc_water_elevation_wells'::regclass AND attnum > 0 AND NOT attisdropped ORDER BY attnum - """ - ) - ) + """)) columns = [row[0] for row in result.fetchall()] assert columns == [