diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a464c7..c89b3fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Removed +## [1.4.5] - 2026-05-06 + +### Changed + +- Updated IATI Design System to 4.9.0 + +### Fixed + +- Bug where the dataset's cached URLs were not being blanked after dataset expiry. (Resolves #137) +- Bug where `most_recent_head_attempt.error_occurred` was being set to `null` instead of `false`. (Resolves #136). + ## [1.4.4] - 2026-04-22 ### Added diff --git a/README.md b/README.md index 11a87e3..6da6280 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ The `.env` file is used when running things locally to store environment variabl Running the app successfully requires a Postgres database and a connection to an Azure blob storage account. There is a docker compose setup which can be used to start an instance of each service locally, that can be run with: -``` +```bash docker compose up -d ``` @@ -69,18 +69,35 @@ The example `.env` file (`.env-example`) is configured to use the above docker c Once the docker compose setup is running, you can run the dataset updater part of the app with (this will download the datasets and upload them to Azurite): -``` +```bash dotenv run python src/iati_bulk_data_service.py -- --operation checker --single-run --run-for-n-datasets=50 ``` You can run the zipper operation with: -``` +```bash dotenv run python src/iati_bulk_data_service.py -- --operation zipper --single-run ``` It will store the ZIP files in the directory defined in the `ZIP_WORKING_DIR` environment variable. +The full range of command line arguments is listed below: + +``` +usage: iati_bulk_data_service.py [-h] --operation {checker,zipper,registry-changes-processor} [--single-run] [--run-for-n-datasets RUN_FOR_N_DATASETS] [--run-for-single-reporting-org RUN_FOR_SINGLE_REPORTING_ORG] [--skip-safety] + +options: + -h, --help show this help message and exit + --operation {checker,zipper,registry-changes-processor} + Operation to run: checker, downloader, registry-changes-processor + --single-run Perform a single run, then exit + --run-for-n-datasets RUN_FOR_N_DATASETS + Run on the first N datasets from registration service (useful for testing) + --run-for-single-reporting-org RUN_FOR_SINGLE_REPORTING_ORG + Run only for the datasets belonging to the specified reporting org short name (useful for testing) + --skip-safety Skip safety checks during the run (useful for testing) +``` + To shutdown the docker compose setup, use (the Azure Service Bus emulator appears to be a bit sensitive to Ctrl-C shutdowns, so always best to shutdown with `docker compose down`): diff --git a/db-migrations/20260505_01_7kh1j.rollback.sql b/db-migrations/20260505_01_7kh1j.rollback.sql new file mode 100644 index 0000000..73e7585 --- /dev/null +++ b/db-migrations/20260505_01_7kh1j.rollback.sql @@ -0,0 +1,5 @@ +alter table iati_datasets + alter column most_recent_head_attempt_error_occurred drop default; + +alter table iati_datasets + alter column most_recent_get_attempt_error_occurred drop default; diff --git a/db-migrations/20260505_01_7kh1j.sql b/db-migrations/20260505_01_7kh1j.sql new file mode 100644 index 0000000..1e731e4 --- /dev/null +++ b/db-migrations/20260505_01_7kh1j.sql @@ -0,0 +1,16 @@ +-- +-- depends: 20250827_01_Dt6Ow + +alter table iati_datasets + alter column most_recent_head_attempt_error_occurred set default false; + +alter table iati_datasets + alter column most_recent_get_attempt_error_occurred set default false; + +update iati_datasets + set most_recent_head_attempt_error_occurred = false + where most_recent_head_attempt_error_occurred is null; + +update iati_datasets + set most_recent_get_attempt_error_occurred = false + where most_recent_get_attempt_error_occurred is null; diff --git a/pyproject.toml b/pyproject.toml index b7ab0e5..4a485dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bulk-data-service" -version = "1.4.4" +version = "1.4.5" requires-python = ">= 3.12.6" readme = "README.md" dependencies = [ diff --git a/src/bulk_data_service/dataset.py b/src/bulk_data_service/dataset.py index d28370b..d4ac228 100644 --- a/src/bulk_data_service/dataset.py +++ b/src/bulk_data_service/dataset.py @@ -43,9 +43,11 @@ def create_empty_dataset() -> dict[str, Any]: empty_ds = { k: None for k in DATASET_REGISTRATION_FIELDS + DATASET_NON_REGISTRATION_FIELDS - } # type: dict[str, str | None] + } # type: dict[str, str | bool | None] empty_ds["most_recent_get_attempt_error_details"] = make_http_attempt_error_details() + empty_ds["most_recent_get_attempt_error_occurred"] = False empty_ds["most_recent_head_attempt_error_details"] = make_http_attempt_error_details() + empty_ds["most_recent_head_attempt_error_occurred"] = False return empty_ds diff --git a/src/bulk_data_service/dataset_remover.py b/src/bulk_data_service/dataset_remover.py index 3137183..c605321 100644 --- a/src/bulk_data_service/dataset_remover.py +++ b/src/bulk_data_service/dataset_remover.py @@ -77,6 +77,10 @@ def remove_download_for_expired_dataset( "last good download from Bulk Data Service".format(bds_dataset["id"], max_hours) ) + bds_dataset["last_known_good_dataset_cached_dataset_xml_url"] = None + bds_dataset["last_known_good_dataset_cached_dataset_xml_etag"] = None + bds_dataset["last_known_good_dataset_cached_dataset_zip_url"] = None + bds_dataset["last_known_good_dataset_cached_dataset_zip_etag"] = None bds_dataset["last_known_good_dataset_downloaded"] = None bds_dataset["last_known_good_dataset_hash"] = None bds_dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] = None diff --git a/tests/integration/test_dataset_add.py b/tests/integration/test_dataset_add.py index c17226f..bbf47e4 100644 --- a/tests/integration/test_dataset_add.py +++ b/tests/integration/test_dataset_add.py @@ -101,6 +101,8 @@ def test_add_downloadable_dataset_for_various_encodings( check_most_recent_http_attempt_for_success("get", datasets_in_bds[dataset_id]) + assert datasets_in_bds[dataset_id]["most_recent_head_attempt_error_occurred"] is False + check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) check_dataset_fields( diff --git a/tests/integration/test_dataset_expiry.py b/tests/integration/test_dataset_expiry.py index d90bb8c..d847eb1 100644 --- a/tests/integration/test_dataset_expiry.py +++ b/tests/integration/test_dataset_expiry.py @@ -36,6 +36,11 @@ def test_dataset_expiry_after_72_hours_failed_downloads(get_and_clear_up_context dataset = datasets_in_bds[uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")] assert len(datasets_in_bds) == 1 + + assert dataset["last_known_good_dataset_cached_dataset_xml_url"] is None + assert dataset["last_known_good_dataset_cached_dataset_xml_etag"] is None + assert dataset["last_known_good_dataset_cached_dataset_zip_url"] is None + assert dataset["last_known_good_dataset_cached_dataset_zip_etag"] is None assert dataset["last_known_good_dataset_downloaded"] is None assert dataset["last_known_good_dataset_hash"] is None assert dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] is None diff --git a/tests/unit/test_dataset_registration.py b/tests/unit/test_dataset_registration.py index be1ae91..00c9d77 100644 --- a/tests/unit/test_dataset_registration.py +++ b/tests/unit/test_dataset_registration.py @@ -5,6 +5,7 @@ import pytest +from bulk_data_service.dataset import create_empty_dataset from dataset_registration.iati_registry_ckan import clean_datasets_metadata, convert_datasets_metadata @@ -42,6 +43,12 @@ def test_incomplete_necessary_data_from_ckan(field_blanker, attribute_value): assert(len(ckan_datasets) == 0) +def test_create_empty_dataset_error_occurred_defaults_to_false(): + ds = create_empty_dataset() + assert ds["most_recent_head_attempt_error_occurred"] is False + assert ds["most_recent_get_attempt_error_occurred"] is False + + @pytest.mark.parametrize("resources_value", [None, [], {"url": None}]) def test_missing_url_from_ckan(resources_value): diff --git a/web/404.html b/web/404.html index 494ec48..d1f2bba 100644 --- a/web/404.html +++ b/web/404.html @@ -4,7 +4,7 @@ IATI Bulk Data Service - + diff --git a/web/index-template.html b/web/index-template.html index 874d784..5ca98a1 100644 --- a/web/index-template.html +++ b/web/index-template.html @@ -4,7 +4,7 @@ IATI Bulk Data Service - +