From 5f1b531a860c9891e1f605bc0b1ebf7be0b59626 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:05:30 +0100 Subject: [PATCH 1/6] Add regex check for file version --- checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml | 1 + checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml | 1 + checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml | 1 + checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml | 1 + checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml | 1 + 5 files changed, 5 insertions(+) diff --git a/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml index 5b49889..48b279c 100644 --- a/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-1.0.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/1.0.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml index 53915f1..e186a9d 100644 --- a/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-1.1.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/1.1.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml index c392120..793003f 100644 --- a/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.0.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.0.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml index 223ae69..146b4ec 100644 --- a/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.1.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.1.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-general-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform diff --git a/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml b/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml index 4bf2c74..62b45d5 100644 --- a/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml +++ b/checksit/data/specs/groups/ncas-amof-2.2.0/amof-file-name.yml @@ -5,4 +5,5 @@ file-name-format: instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ data_product: __vocabs__:AMF_CVs/2.2.0/AMF_product:product rule_checks: + file_version: regex-rule:ncas-radar-file-version platform: rule-func:ceda-platform||rule-func-warning:ncas-platform From ddc5b7e73039ac313d35977306c98e20c7524686 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:05:54 +0100 Subject: [PATCH 2/6] Add regex for ncas file versions --- checksit/rules/rules.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/checksit/rules/rules.py b/checksit/rules/rules.py index ed403d7..dc9148f 100644 --- a/checksit/rules/rules.py +++ b/checksit/rules/rules.py @@ -118,6 +118,14 @@ def __init__(self): "regex-rule": r"[^@\s]+@ncas.ac.uk", "example": "sam.jones@ncas.ac.uk", }, + "ncas-general-file-version": { + "regex-rule": r"v[0-9]+(\.[0-9]+)", + "example": "v1.0", + }, + "ncas-radar-file-version": { + "regex-rule": r"v[0-9]+(\.[0-9]+){2,}", + "example": "v1.0.0", + }, } def _map_type_rule(self, type_rule: str) -> type: From c9bc4bef5728918449e1b41bd6ff27fdc6791b7a Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:06:31 +0100 Subject: [PATCH 3/6] Add file name checks for ncas-radar --- checksit/check.py | 1 + .../data/specs/groups/ncas-radar-1.0.0/file-name.yml | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml diff --git a/checksit/check.py b/checksit/check.py index faf65eb..cda9954 100644 --- a/checksit/check.py +++ b/checksit/check.py @@ -481,6 +481,7 @@ def _get_ncas_specs( version_number = f"{version_number}.0" template = "off" spec_names = [ + "file-name", "coordinate-variables", "dimensions", "global-attrs", diff --git a/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml b/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml new file mode 100644 index 0000000..d9a479b --- /dev/null +++ b/checksit/data/specs/groups/ncas-radar-1.0.0/file-name.yml @@ -0,0 +1,9 @@ +file-name-format: + func: checksit.generic.check_file_name + params: + vocab_checks: + instrument: __URL__raw.githubusercontent.com/ncasuk/ncas-data-instrument-vocabs/__latest__/AMF_CVs/AMF_ncas_instrument.json:ncas_instrument:__all__ + rule_checks: + data_product: rule-func:match-one-of:birdbath|vol|rhi|ppi|radar-velocity + platform: rule-func:ceda-platform||rule-func-warning:ncas-platform + file_version: regex-rule:ncas-radar-file-version From 4557e1ae82a441469809035b1e11ee388680c3e6 Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:07:03 +0100 Subject: [PATCH 4/6] Change data product and file version checks in ncas file names --- checksit/generic.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/checksit/generic.py b/checksit/generic.py index 3753558..acfe095 100644 --- a/checksit/generic.py +++ b/checksit/generic.py @@ -737,16 +737,39 @@ def check_file_name( errors.append( f"[file name]: Invalid file name format - unknown data product '{file_name_parts[3]}'" ) + elif "data_product" in rule_checks.keys(): + dp_rules_check = rules.check( + rule_checks["data_product"], + file_name_parts[3], + label="[file name]: Invalid file name format -", + ) + if dp_rules_check != ([], []): + rule_errors, rule_warnings = dp_rules_check + if rule_errors != []: + errors.extend(rule_errors) + if rule_warnings != []: + warnings.extend(rule_warnings) else: msg = "No data product vocab defined in specs" raise KeyError(msg) # check version number format version_component = file_name_parts[-1].split(".nc")[0] - if not re.match(r"^v\d.\d$", version_component): - errors.append( - f"[file name]: Invalid file name format - incorrect file version number '{version_component}'" + if "file_version" in rule_checks.keys(): + file_version_check = rules.check( + rule_checks["file_version"], + version_component, + label="[file name]: Invalid file name format -", ) + if file_version_check != ([], []): + rule_errors, rule_warnings = file_version_check + if rule_errors != []: + errors.extend(rule_errors) + if rule_warnings != []: + warnings.extend(rule_warnings) + else: + msg = "No file version rule defined in specs" + raise KeyError(msg) # check number of options - max length of splitted file name if len(file_name_parts) > 8: From 70a9833f9264a31f60e56e7c17b1fab0d00cd6bb Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:15:34 +0100 Subject: [PATCH 5/6] Add new regex rules to docs --- docs/source/dev/where_does_checksit_do_it.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/dev/where_does_checksit_do_it.rst b/docs/source/dev/where_does_checksit_do_it.rst index f014590..d9071f7 100644 --- a/docs/source/dev/where_does_checksit_do_it.rst +++ b/docs/source/dev/where_does_checksit_do_it.rst @@ -144,6 +144,10 @@ checks, managed by the ``Rules`` class in ``checksit/rules/rules.py``. There are - ``r"-?\d+(\.\d+)?\sm"`` * - "ncas-email" - ``r"[^@\s]+@ncas.ac.uk"`` + * - "ncas-general-file-version" + - ``r"v[0-9]+(\.[0-9]+)"`` + * - "ncas-radar-file-version" + - ``r"v[0-9]+(\.[0-9]+){2,}"`` where ``NOT_APPLICABLE_RULES`` cover phrases such as "Not Available", "Not applicable", "N/A" and From c274c971d89b1516913db09396ec455c4a02e7ab Mon Sep 17 00:00:00 2001 From: Joshua Hampton Date: Wed, 6 May 2026 10:32:12 +0100 Subject: [PATCH 6/6] Add file-version to test spec --- tests/test_generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index efe0ebf..623ea3a 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -411,7 +411,8 @@ def test_check_file_name(): "data_product": "__vocabs__:tests/test_products:test_products" } rule_checks = { - "platform": "rule-func:match-one-of:plat1|plat2" + "platform": "rule-func:match-one-of:plat1|plat2", + "file_version": r"regex:^v[0-9]+(\.[0-9]+)$", } file_name = "inst3_plat1_20220101_prod1_v1.0.nc" errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks) @@ -445,7 +446,7 @@ def test_check_file_name(): # Test that the function correctly identifies invalid version number format file_name = "inst1_plat1_20220101_prod1_v10.nc" errors, warnings = cg.check_file_name(file_name, vocab_checks, rule_checks) - assert errors == ["[file name]: Invalid file name format - incorrect file version number 'v10'"] + assert errors == ["[file name]: Invalid file name format - Value 'v10' does not match regular expression: '^v[0-9]+(\\.[0-9]+)$'."] assert warnings == [] # Test that the function correctly identifies too many options in file name