From d14296da03bc0634894bda7be69a78e502204236 Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Tue, 5 May 2026 15:28:59 +0100 Subject: [PATCH 1/9] test: make dataset expiry test check all fields --- tests/integration/test_dataset_expiry.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/test_dataset_expiry.py b/tests/integration/test_dataset_expiry.py index d90bb8c..d847eb1 100644 --- a/tests/integration/test_dataset_expiry.py +++ b/tests/integration/test_dataset_expiry.py @@ -36,6 +36,11 @@ def test_dataset_expiry_after_72_hours_failed_downloads(get_and_clear_up_context dataset = datasets_in_bds[uuid.UUID("c8a40aa5-9f31-4bcf-a36f-51c1fc2cc159")] assert len(datasets_in_bds) == 1 + + assert dataset["last_known_good_dataset_cached_dataset_xml_url"] is None + assert dataset["last_known_good_dataset_cached_dataset_xml_etag"] is None + assert dataset["last_known_good_dataset_cached_dataset_zip_url"] is None + assert dataset["last_known_good_dataset_cached_dataset_zip_etag"] is None assert dataset["last_known_good_dataset_downloaded"] is None assert dataset["last_known_good_dataset_hash"] is None assert dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] is None From 312ed1cf3b7801cc088a17f57a8b4a4e161feeaf Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Tue, 5 May 2026 15:30:11 +0100 Subject: [PATCH 2/9] fix: ensure cache URL/ETag fields blanked expiry This commit ensures that when the cached copy of a dataset expires due to failure to download the dataset over the specified period of time that the cached URL and ETag fields are blanked out. Resolves https://github.com/IATI/bulk-data-service/issues/137 --- src/bulk_data_service/dataset_remover.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bulk_data_service/dataset_remover.py b/src/bulk_data_service/dataset_remover.py index 3137183..c605321 100644 --- a/src/bulk_data_service/dataset_remover.py +++ b/src/bulk_data_service/dataset_remover.py @@ -77,6 +77,10 @@ def remove_download_for_expired_dataset( "last good download from Bulk Data Service".format(bds_dataset["id"], max_hours) ) + bds_dataset["last_known_good_dataset_cached_dataset_xml_url"] = None + bds_dataset["last_known_good_dataset_cached_dataset_xml_etag"] = None + bds_dataset["last_known_good_dataset_cached_dataset_zip_url"] = None + bds_dataset["last_known_good_dataset_cached_dataset_zip_etag"] = None bds_dataset["last_known_good_dataset_downloaded"] = None bds_dataset["last_known_good_dataset_hash"] = None bds_dataset["last_known_good_dataset_hash_excluding_generated_timestamp"] = None From d6eb1711c2a0781c2b54af935420bf763c3f5dd6 Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 6 May 2026 09:37:50 +0100 Subject: [PATCH 3/9] test: checks *_error_occurred flags created false This commit adds two tests which checks that the creation of a dataset record sets the most_recent_*_attempt_error_occurred flags to false, and that after a successful check for a dataset the most_recent_head_attempt_error_occurred flag is false (the get flag was already checked). --- tests/integration/test_dataset_add.py | 2 ++ tests/unit/test_dataset_registration.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/tests/integration/test_dataset_add.py b/tests/integration/test_dataset_add.py index c17226f..bbf47e4 100644 --- a/tests/integration/test_dataset_add.py +++ b/tests/integration/test_dataset_add.py @@ -101,6 +101,8 @@ def test_add_downloadable_dataset_for_various_encodings( check_most_recent_http_attempt_for_success("get", datasets_in_bds[dataset_id]) + assert datasets_in_bds[dataset_id]["most_recent_head_attempt_error_occurred"] is False + check_last_known_good_dataset_values_are_set(datasets_in_bds[dataset_id]) check_dataset_fields( diff --git a/tests/unit/test_dataset_registration.py b/tests/unit/test_dataset_registration.py index be1ae91..00c9d77 100644 --- a/tests/unit/test_dataset_registration.py +++ b/tests/unit/test_dataset_registration.py @@ -5,6 +5,7 @@ import pytest +from bulk_data_service.dataset import create_empty_dataset from dataset_registration.iati_registry_ckan import clean_datasets_metadata, convert_datasets_metadata @@ -42,6 +43,12 @@ def test_incomplete_necessary_data_from_ckan(field_blanker, attribute_value): assert(len(ckan_datasets) == 0) +def test_create_empty_dataset_error_occurred_defaults_to_false(): + ds = create_empty_dataset() + assert ds["most_recent_head_attempt_error_occurred"] is False + assert ds["most_recent_get_attempt_error_occurred"] is False + + @pytest.mark.parametrize("resources_value", [None, [], {"url": None}]) def test_missing_url_from_ckan(resources_value): From 65f75a7056b1ba5e416ac7e85f0c87cfe650a38a Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 6 May 2026 09:41:18 +0100 Subject: [PATCH 4/9] fix: adds db migrations to set error flag columns This DB migration both sets a default of false for the error flags and sets all existing null values to false (if there are any existing values with null, it is because the relevant check hasn't been done over given the time frame, so no error has occurred). --- db-migrations/20260505_01_7kh1j.rollback.sql | 5 +++++ db-migrations/20260505_01_7kh1j.sql | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 db-migrations/20260505_01_7kh1j.rollback.sql create mode 100644 db-migrations/20260505_01_7kh1j.sql diff --git a/db-migrations/20260505_01_7kh1j.rollback.sql b/db-migrations/20260505_01_7kh1j.rollback.sql new file mode 100644 index 0000000..73e7585 --- /dev/null +++ b/db-migrations/20260505_01_7kh1j.rollback.sql @@ -0,0 +1,5 @@ +alter table iati_datasets + alter column most_recent_head_attempt_error_occurred drop default; + +alter table iati_datasets + alter column most_recent_get_attempt_error_occurred drop default; diff --git a/db-migrations/20260505_01_7kh1j.sql b/db-migrations/20260505_01_7kh1j.sql new file mode 100644 index 0000000..1e731e4 --- /dev/null +++ b/db-migrations/20260505_01_7kh1j.sql @@ -0,0 +1,16 @@ +-- +-- depends: 20250827_01_Dt6Ow + +alter table iati_datasets + alter column most_recent_head_attempt_error_occurred set default false; + +alter table iati_datasets + alter column most_recent_get_attempt_error_occurred set default false; + +update iati_datasets + set most_recent_head_attempt_error_occurred = false + where most_recent_head_attempt_error_occurred is null; + +update iati_datasets + set most_recent_get_attempt_error_occurred = false + where most_recent_get_attempt_error_occurred is null; From 7584f2d3477361d3db7108e206a14f82cd60f514 Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 6 May 2026 09:43:00 +0100 Subject: [PATCH 5/9] fix: create new datasets with error flags = false This commit changes the dataset creation code so that a new dataset object has its error occurred flags set to false. Resolves #136. --- src/bulk_data_service/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bulk_data_service/dataset.py b/src/bulk_data_service/dataset.py index d28370b..d4ac228 100644 --- a/src/bulk_data_service/dataset.py +++ b/src/bulk_data_service/dataset.py @@ -43,9 +43,11 @@ def create_empty_dataset() -> dict[str, Any]: empty_ds = { k: None for k in DATASET_REGISTRATION_FIELDS + DATASET_NON_REGISTRATION_FIELDS - } # type: dict[str, str | None] + } # type: dict[str, str | bool | None] empty_ds["most_recent_get_attempt_error_details"] = make_http_attempt_error_details() + empty_ds["most_recent_get_attempt_error_occurred"] = False empty_ds["most_recent_head_attempt_error_details"] = make_http_attempt_error_details() + empty_ds["most_recent_head_attempt_error_occurred"] = False return empty_ds From 70e3c74aacc951edaea9c2ddd9b257ecf84e7fe0 Mon Sep 17 00:00:00 2001 From: Simon K <6615834+simon-20@users.noreply.github.com> Date: Wed, 6 May 2026 09:50:16 +0100 Subject: [PATCH 6/9] feat: update IATI Design System to 4.9.0 --- web/404.html | 2 +- web/index-template.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/web/404.html b/web/404.html index 494ec48..d1f2bba 100644 --- a/web/404.html +++ b/web/404.html @@ -4,7 +4,7 @@