From e261127bfd66d72d84f17d2a0560a82f2c400599 Mon Sep 17 00:00:00 2001 From: devsjc <47188100+devsjc@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:29:33 +0000 Subject: [PATCH 1/3] feat(sql): Speed up LatestForecast via better indexing --- .../sql/migrations/00004_predictions.sql | 2 +- .../postgres/sql/queries/predictions.sql | 34 ++++++++++++------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/internal/server/postgres/sql/migrations/00004_predictions.sql b/internal/server/postgres/sql/migrations/00004_predictions.sql index dff9c19..d47c093 100644 --- a/internal/server/postgres/sql/migrations/00004_predictions.sql +++ b/internal/server/postgres/sql/migrations/00004_predictions.sql @@ -125,7 +125,7 @@ CREATE TABLE pred.forecasts ( PRIMARY KEY (forecast_uuid), UNIQUE (geometry_uuid, source_type_id, forecaster_id, init_time_utc) ); -CREATE INDEX ON pred.forecasts USING GIST (target_period); +CREATE INDEX idx_forecasts_filter ON pred.forecasts USING GIST (geometry_uuid, source_type_id, target_period); /* * Table to store predicted generation values. diff --git a/internal/server/postgres/sql/queries/predictions.sql b/internal/server/postgres/sql/queries/predictions.sql index ed50d17..f9ccbce 100644 --- a/internal/server/postgres/sql/queries/predictions.sql +++ b/internal/server/postgres/sql/queries/predictions.sql @@ -106,23 +106,33 @@ INSERT INTO pred.predicted_generation_values ( * and source type made by all forecasters. Only forecasts that are older than the pivot time * minus the specified horizon are considered. */ +WITH latest_forecasts AS ( + SELECT DISTINCT ON (f.forecaster_id) + f.forecast_uuid, + f.init_time_utc, + f.source_type_id, + f.geometry_uuid, + f.forecaster_id + FROM pred.forecasts AS f + WHERE f.geometry_uuid = $1 + AND f.source_type_id = $2 + AND f.init_time_utc + <= sqlc.arg(pivot_timestamp)::TIMESTAMP - MAKE_INTERVAL(mins => sqlc.arg(horizon_mins)::INTEGER) + AND f.target_period @> sqlc.arg(pivot_timestamp)::TIMESTAMP + ORDER BY + f.forecaster_id ASC, + f.forecast_uuid DESC +) +-- Only join to forecaster table to sort by name once forecasts have been filtered SELECT DISTINCT ON (fr.forecaster_name) - f.forecast_uuid, - f.init_time_utc, - f.source_type_id, - f.geometry_uuid, + lf.*, fr.forecaster_name, fr.forecaster_version, - UUIDV7_EXTRACT_TIMESTAMP(f.forecast_uuid) AS created_at_utc -FROM pred.forecasts AS f + UUIDV7_EXTRACT_TIMESTAMP(lf.forecast_uuid) AS created_at_utc +FROM latest_forecasts AS lf INNER JOIN pred.forecasters AS fr USING (forecaster_id) -WHERE f.geometry_uuid = $1 - AND f.source_type_id = $2 - AND f.init_time_utc <= sqlc.arg(pivot_timestamp)::TIMESTAMP - MAKE_INTERVAL(mins => sqlc.arg(horizon_mins)::INTEGER) - AND f.target_period @> sqlc.arg(pivot_timestamp)::TIMESTAMP ORDER BY - fr.forecaster_name ASC, - f.init_time_utc DESC; + fr.forecaster_name ASC; -- name: ListForecasts :many /* ListForecasts retrieves all the forecasts for a given location, source type, and forecaster From 89315bc48adbb235dc5b7c7fc2e7d9d06b69ca0a Mon Sep 17 00:00:00 2001 From: devsjc <47188100+devsjc@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:10:00 +0000 Subject: [PATCH 2/3] feat(sql): Enable pg_cron --- Makefile | 2 +- internal/server/postgres/infra/Containerfile | 3 +- .../server/postgres/infra/init_partman_bgw.sh | 53 ------------------- .../server/postgres/infra/init_postgis.sh | 17 ------ internal/server/postgres/package_test.go | 8 ++- .../sql/migrations/00002_locations.sql | 10 +++- .../sql/migrations/00003_observations.sql | 12 +++++ .../sql/migrations/00004_predictions.sql | 1 + 8 files changed, 30 insertions(+), 76 deletions(-) delete mode 100644 internal/server/postgres/infra/init_partman_bgw.sh delete mode 100644 internal/server/postgres/infra/init_postgis.sh diff --git a/Makefile b/Makefile index 9745d49..7aea39d 100644 --- a/Makefile +++ b/Makefile @@ -197,7 +197,7 @@ gen.proto.python: ${PROTOC} .PHONY: run.db # Run an instance of Postgres with the required extensions run.db: docker build -f internal/server/postgres/infra/Containerfile internal/server/postgres/infra -t data-platform-pgdb:local - docker run --rm -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres -p "5400:5432" data-platform-pgdb:local + docker run --rm -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres -p "5400:5432" data-platform-pgdb:local postgres -c 'shared_preload_libraries=pg_cron' -c 'cron.database_name=postgres' .PHONY: run.notebook # Run a python notebook to inspect the API run.notebook: gen.proto.python diff --git a/internal/server/postgres/infra/Containerfile b/internal/server/postgres/infra/Containerfile index b9b2a6c..be7f97e 100644 --- a/internal/server/postgres/infra/Containerfile +++ b/internal/server/postgres/infra/Containerfile @@ -6,7 +6,6 @@ RUN apt-get update \ && apt-get install -y --no-install-recommends \ postgresql-16-postgis-3 \ postgresql-16-postgis-3-scripts \ + && apt-get install -y --no-install-recommends postgresql-16-cron \ && rm -rf /var/lib/apt/lists/* -COPY init_postgis.sh /docker-entrypoint-initdb.d/10_init_postgis.sh -COPY init_partman_bgw.sh /docker-entrypoint-initdb.d/99_init_partman_bgw.sh diff --git a/internal/server/postgres/infra/init_partman_bgw.sh b/internal/server/postgres/infra/init_partman_bgw.sh deleted file mode 100644 index 2261112..0000000 --- a/internal/server/postgres/infra/init_partman_bgw.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash -# /docker-entrypoint-initdb.d/99_init_partman_bgw.sh - -# Exit immediately if a command exits with a non-zero status. -set -e - -echo "[+] Configuring pg_partman background worker (BGW) in postgresql.conf" - -CONF_FILE="${PGDATA}/postgresql.conf" -LIB_NAME='pg_partman_bgw' - -# Check if the library is already configured (idempotency) -if grep -Eq "^shared_preload_libraries\s*=\s*'.*${LIB_NAME}" "$CONF_FILE"; then - echo " '${LIB_NAME}' already found in shared_preload_libraries." -# Check if shared_preload_libraries line exists and is commented out -elif grep -q "^#shared_preload_libraries" "$CONF_FILE"; then - echo " Uncommenting shared_preload_libraries and adding '${LIB_NAME}'" - sed -ri "s!^#shared_preload_libraries\s*=\s*'?(.*?)'?\s*(#.*)?!shared_preload_libraries = '${LIB_NAME}' \2!" "$CONF_FILE" - echo " Successfully uncommented and set '${LIB_NAME}'." -# Check if shared_preload_libraries exists and is uncommented -elif grep -q "^shared_preload_libraries" "$CONF_FILE"; then - echo " Appending '${LIB_NAME}' to existing shared_preload_libraries..." - # Use the safer method: extract current value, build new value, replace line. - # 1. Extract current value (content inside the quotes) - current_val=$(grep "^shared_preload_libraries" "$CONF_FILE" | sed -n "s/^shared_preload_libraries\s*=\s*'\([^']*\)'.*/\1/p") - # 2. Construct new value - if [ -z "$current_val" ] || [ "$current_val" = '' ]; then - # If current value is empty, just set the new library - new_val="'${LIB_NAME}'" - else - # If current value exists, append the new library with a comma - new_val="'${current_val},${LIB_NAME}'" - fi - # 3. Replace the whole line using the new value - sed -ri "s!^shared_preload_libraries\s*=.*!shared_preload_libraries = ${new_val}!" "$CONF_FILE" - echo " Successfully appended '${LIB_NAME}'." -else - # shared_preload_libraries line does not exist, add it - echo " Adding shared_preload_libraries setting with '${LIB_NAME}'..." - echo "" >> "$CONF_FILE" # Add a newline for separation - echo "shared_preload_libraries = '${LIB_NAME}'" >> "$CONF_FILE" - echo " Successfully added '${LIB_NAME}'." -fi - -export PGUSER="$POSTGRES_USER" -# Load pg_partman into $POSTGRES_DB -for DB in "$POSTGRES_DB"; do - echo "Loading pg_partman extensions into $DB" - "${psql[@]}" --dbname="$DB" <<-'EOSQL' - CREATE SCHEMA IF NOT EXISTS partman; - CREATE EXTENSION IF NOT EXISTS pg_partman WITH SCHEMA partman; -EOSQL -done diff --git a/internal/server/postgres/infra/init_postgis.sh b/internal/server/postgres/infra/init_postgis.sh deleted file mode 100644 index cc9876f..0000000 --- a/internal/server/postgres/infra/init_postgis.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -set -e -export PGUSER="$POSTGRES_USER" -# Create the 'template_postgis' template db -"${psql[@]}" <<- 'EOSQL' -CREATE DATABASE template_postgis IS_TEMPLATE true; -EOSQL -# Load PostGIS into both template_database and $POSTGRES_DB -for DB in template_postgis "$POSTGRES_DB"; do - echo "[+] Loading PostGIS extensions into $DB" - "${psql[@]}" --dbname="$DB" <<-'EOSQL' - CREATE EXTENSION IF NOT EXISTS postgis; - CREATE EXTENSION IF NOT EXISTS postgis_topology; - CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; - CREATE EXTENSION IF NOT EXISTS postgis_tiger_geocoder; -EOSQL -done diff --git a/internal/server/postgres/package_test.go b/internal/server/postgres/package_test.go index 26ffd37..844c85e 100644 --- a/internal/server/postgres/package_test.go +++ b/internal/server/postgres/package_test.go @@ -58,7 +58,13 @@ func setupTestMain(ctx context.Context, m *testing.M) (code int, err error) { "POSTGRES_PASSWORD": "postgres", "POSTGRES_DB": "postgres", }, - Cmd: []string{"postgres", "-c", "fsync=off"}, + Cmd: []string{ + "postgres", + "-c", + "fsync=off", + "-c", + "shared_preload_libraries=pg_cron", + }, ExposedPorts: []string{"5432/tcp"}, WaitingFor: wait.ForAll( wait.ForLog( diff --git a/internal/server/postgres/sql/migrations/00002_locations.sql b/internal/server/postgres/sql/migrations/00002_locations.sql index 883e480..7463f2d 100644 --- a/internal/server/postgres/sql/migrations/00002_locations.sql +++ b/internal/server/postgres/sql/migrations/00002_locations.sql @@ -16,9 +16,12 @@ * One geometry can have multiple sources, e.g. the UK nation geometry can have solar, wind, etc. */ -CREATE SCHEMA loc; CREATE EXTENSION IF NOT EXISTS btree_gist; -CREATE EXTENSION IF NOT EXISTS postgis; +CREATE EXTENSION IF NOT EXISTS postgis WITH SCHEMA public; +CREATE SCHEMA IF NOT EXISTS topology; +CREATE EXTENSION IF NOT EXISTS postgis_topology WITH SCHEMA topology; + +CREATE SCHEMA loc; /*- Lookups -----------------------------------------------------------------------------------*/ @@ -164,3 +167,6 @@ CREATE INDEX ON loc.sources_mv USING gist (sys_period); -- +goose Down DROP SCHEMA loc CASCADE; +DROP EXTENSION IF EXISTS postgis_topology; +DROP EXTENSION IF EXISTS postgis; +DROP EXTENSION IF EXISTS btree_gist; diff --git a/internal/server/postgres/sql/migrations/00003_observations.sql b/internal/server/postgres/sql/migrations/00003_observations.sql index fa69247..5c00203 100644 --- a/internal/server/postgres/sql/migrations/00003_observations.sql +++ b/internal/server/postgres/sql/migrations/00003_observations.sql @@ -8,6 +8,10 @@ * with these providers provide access to the data in order to test the accuracy of predictions. */ +CREATE SCHEMA IF NOT EXISTS partman; +CREATE EXTENSION IF NOT EXISTS pg_partman WITH SCHEMA partman; +CREATE EXTENSION IF NOT EXISTS pg_cron; + CREATE SCHEMA obs; /*- Tables ----------------------------------------------------------------------------------*/ @@ -83,6 +87,14 @@ SET infinite_time_partitions = TRUE WHERE parent_table = 'obs.observed_generation_values'; SELECT partman.run_maintenance('obs.observed_generation_values'); +-- Schedule regular maintenance for the partitioned observed generation values table. +SELECT cron.schedule('partman-maintenance', '@hourly', $$CALL partman.run_maintenance_proc()$$); + -- +goose Down +SELECT cron.unschedule('partman-maintenance'); DROP SCHEMA obs CASCADE; + +DROP EXTENSION IF EXISTS pg_cron CASCADE; +DROP EXTENSION IF EXISTS pg_partman CASCADE; +DROP SCHEMA IF EXISTS partman CASCADE; diff --git a/internal/server/postgres/sql/migrations/00004_predictions.sql b/internal/server/postgres/sql/migrations/00004_predictions.sql index d47c093..59505a4 100644 --- a/internal/server/postgres/sql/migrations/00004_predictions.sql +++ b/internal/server/postgres/sql/migrations/00004_predictions.sql @@ -200,3 +200,4 @@ SELECT partman.run_maintenance('pred.predicted_generation_values'); -- +goose Down DROP SCHEMA pred CASCADE; + From c9122bf6c64d4c7e636c3a20fbc912308de8e9bf Mon Sep 17 00:00:00 2001 From: devsjc <47188100+devsjc@users.noreply.github.com> Date: Mon, 19 Jan 2026 16:43:12 +0000 Subject: [PATCH 3/3] fix(sql): Clean up cron table --- internal/server/postgres/sql/migrations/00003_observations.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/server/postgres/sql/migrations/00003_observations.sql b/internal/server/postgres/sql/migrations/00003_observations.sql index 5c00203..0a2bc8d 100644 --- a/internal/server/postgres/sql/migrations/00003_observations.sql +++ b/internal/server/postgres/sql/migrations/00003_observations.sql @@ -89,6 +89,7 @@ WHERE parent_table = 'obs.observed_generation_values'; SELECT partman.run_maintenance('obs.observed_generation_values'); -- Schedule regular maintenance for the partitioned observed generation values table. SELECT cron.schedule('partman-maintenance', '@hourly', $$CALL partman.run_maintenance_proc()$$); +SELECT cron.schedule('cron-details-cleanup', '0 12 * * *', $$DELETE FROM cron.job_run_details WHERE end_time < now() - interval '7 days'$$); -- +goose Down