From 8c23309c50b2a086f53615476797057116a98547 Mon Sep 17 00:00:00 2001 From: Gerit Wagner Date: Fri, 3 Apr 2026 09:18:11 +0200 Subject: [PATCH] add flag indicating mismatch between documentation and functionality --- docs/generate_indices.py | 2 + docs/source/dev_docs/linter_development.rst | 4 ++ search_query/constants.py | 56 ++++++++++++++++++++- test/wos/test_wos_parser_v_1.py | 2 +- 4 files changed, 61 insertions(+), 3 deletions(-) diff --git a/docs/generate_indices.py b/docs/generate_indices.py index 9d55e737..259e38a1 100644 --- a/docs/generate_indices.py +++ b/docs/generate_indices.py @@ -56,6 +56,8 @@ def generate_rst_file(error: QueryErrorCode) -> None: "", f"**Message**: ``{error.message}``", "", + f"**Documentation / interface conformance**: ``{error.docs_interface_flag}``", + "", error.docs.strip() if error.docs.strip() else "**Description**: " + error.message, diff --git a/docs/source/dev_docs/linter_development.rst b/docs/source/dev_docs/linter_development.rst index ac69c24f..5f3e4fc1 100644 --- a/docs/source/dev_docs/linter_development.rst +++ b/docs/source/dev_docs/linter_development.rst @@ -19,6 +19,10 @@ Each linter must override the `validate_tokens()` method and the `validate_query Best practices -------------- - **Use standardized linter messages** defined in `constants.QueryErrorCode`. + Each message tuple includes a documentation/interface conformance flag: + ``DOCS_INTERFACE_OK`` when implementation behavior conforms to documented rules, and + ``DOCS_INTERFACE_MISMATCH`` when diagnostics enforce a documented rule that does not + fully match observed database behavior. - **Add details** in messages for guidance (e.g., invalid format, missing logic). - Ensure **valid token sequences** using the `VALID_TOKEN_SEQUENCES` dictionary. - Consider using **utility methods** provided by `linter_base.py`: diff --git a/search_query/constants.py b/search_query/constants.py index 3bd861af..adf7f101 100644 --- a/search_query/constants.py +++ b/search_query/constants.py @@ -54,6 +54,8 @@ class ListTokenTypes(Enum): GENERAL_ERROR_POSITION = -1 +DOCS_INTERFACE_OK = "docs-interface-ok" +DOCS_INTERFACE_MISMATCH = "docs-interface-mismatch" @dataclass @@ -208,6 +210,7 @@ class QueryErrorCode(Enum): "EBSCO_0001", "wildcard-unsupported", "Unsupported wildcard in search string.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -229,6 +232,7 @@ class QueryErrorCode(Enum): "EBSCO_0002", "invalid-character", "Search term contains invalid character", + DOCS_INTERFACE_OK, "", ) @@ -245,6 +249,7 @@ class QueryErrorCode(Enum): "FIELD_0001", "field-unsupported", "Search field is not supported for this database", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -265,6 +270,7 @@ class QueryErrorCode(Enum): "FIELD_0002", "field-missing", "Search field is missing", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -285,6 +291,7 @@ class QueryErrorCode(Enum): "FIELD_0003", "field-extracted", "Recommend explicitly specifying the search field in the string", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -309,6 +316,7 @@ class QueryErrorCode(Enum): "FIELD_0004", "field-implicit", "Search field is implicitly specified", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -337,6 +345,7 @@ class QueryErrorCode(Enum): "LINT_2001", "deprecated-syntax", "deprecated-syntax", + DOCS_INTERFACE_OK, """This message indicates that the query uses deprecated syntax. **Typical fix**: Update the query to use the latest syntax by running @@ -354,12 +363,14 @@ class QueryErrorCode(Enum): "PARSE_0001", "tokenizing-failed", "Fatal error during tokenization", + DOCS_INTERFACE_OK, """**Typical fix**: Check the query syntax and ensure it is correctly formatted.""", ) UNBALANCED_PARENTHESES = ( "PARSE_0002", "unbalanced-parentheses", "Parentheses are unbalanced in the query", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -380,6 +391,7 @@ class QueryErrorCode(Enum): "PARSE_0003", "unbalanced-quotes", "Quotes are unbalanced in the query", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -403,6 +415,7 @@ class QueryErrorCode(Enum): # Note: provide details like # ([token_type] followed by [token_type] is not allowed) "The sequence of tokens is invalid." "", + DOCS_INTERFACE_OK, """**Problematic query**: .. code-block:: texts @@ -423,6 +436,7 @@ class QueryErrorCode(Enum): "PARSE_0006", "invalid-syntax", "Query contains invalid syntax", + DOCS_INTERFACE_OK, """**Problematic query**: .. code-block:: text @@ -449,6 +463,7 @@ class QueryErrorCode(Enum): "PARSE_0007", "query-in-quotes", "The whole Search string is in quotes.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -469,6 +484,7 @@ class QueryErrorCode(Enum): "PARSE_0008", "unsupported-prefix", "Unsupported prefix in search query", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -489,6 +505,7 @@ class QueryErrorCode(Enum): "PARSE_0009", "unsupported-suffix", "Unsupported suffix in search query", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -510,6 +527,7 @@ class QueryErrorCode(Enum): "PARSE_0010", "unsupported-prefix-platform-identifier", "Query starts with platform identifier", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -531,6 +549,7 @@ class QueryErrorCode(Enum): "PARSE_1001", "list-query-missing-root-node", "List format query without root node (typically containing operators)", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -554,6 +573,7 @@ class QueryErrorCode(Enum): "PARSE_1002", "list-query-invalid-reference", "Invalid list reference in list query", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -587,6 +607,7 @@ class QueryErrorCode(Enum): "PUBMED_0001", "nested-query-with-field", "A Nested query cannot have a search field.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -608,6 +629,7 @@ class QueryErrorCode(Enum): "PUBMED_0002", "character-replacement", "Character replacement", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -629,6 +651,7 @@ class QueryErrorCode(Enum): "PUBMED_0003", "invalid-wildcard-use", "Invalid use of the wildcard operator *", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -654,6 +677,7 @@ class QueryErrorCode(Enum): "QUALITY_0001", "query-structure-unnecessarily-complex", "Query structure is more complex than necessary", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -684,6 +708,7 @@ class QueryErrorCode(Enum): "QUALITY_0002", "date-filter-in-subquery", "Date filter in subquery", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -707,6 +732,7 @@ class QueryErrorCode(Enum): "QUALITY_0003", "journal-filter-in-subquery", "Journal (or publication name) filter in subquery", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -728,6 +754,7 @@ class QueryErrorCode(Enum): "QUALITY_0004", "unnecessary-parentheses", "Unnecessary parentheses in queries", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -749,6 +776,7 @@ class QueryErrorCode(Enum): "QUALITY_0005", "redundant-term", "Redundant term in the query", + DOCS_INTERFACE_OK, """ **Problematic query (AND)**: @@ -793,6 +821,7 @@ class QueryErrorCode(Enum): "QUALITY_0006", "potential-wildcard-use", "Potential wildcard use", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -816,6 +845,7 @@ class QueryErrorCode(Enum): "STRUCT_0001", "implicit-precedence", "Operator changed at the same level (explicit parentheses are recommended)", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -838,6 +868,7 @@ class QueryErrorCode(Enum): "STRUCT_0002", "operator-capitalization", "Operators should be capitalized", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -859,6 +890,7 @@ class QueryErrorCode(Enum): "STRUCT_0003", "boolean-operator-readability", "Boolean operator readability", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -877,6 +909,7 @@ class QueryErrorCode(Enum): "STRUCT_0004", "invalid-proximity-use", "Invalid use of the proximity operator", + DOCS_INTERFACE_OK, """ Proximity operators must have a non-negative integer as the distance. @@ -902,6 +935,7 @@ class QueryErrorCode(Enum): "TERM_0001", "non-standard-quotes", "Non-standard quotes", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -922,6 +956,7 @@ class QueryErrorCode(Enum): "TERM_0002", "year-format-invalid", "Invalid year format.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -942,6 +977,7 @@ class QueryErrorCode(Enum): "TERM_0003", "doi-format-invalid", "Invalid DOI format.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -962,6 +998,7 @@ class QueryErrorCode(Enum): "TERM_0004", "isbn-format-invalid", "Invalid ISBN format.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -998,6 +1035,7 @@ class QueryErrorCode(Enum): "WOS_0001", "too-many-terms", "Too many search terms in the query", + DOCS_INTERFACE_OK, """ **Explanation:** The query contains too many search terms, which may lead to performance issues or exceed platform limits. @@ -1022,7 +1060,8 @@ class QueryErrorCode(Enum): NEAR_DISTANCE_TOO_LARGE = ( "WOS_0002", "near-distance-too-large", - "NEAR distance is too large (max: 15).", + "NEAR distance is too large (max: 15). Note: This is based on WOS documentation (TODO:URL). The web interface accepts larger NEAR distances.", + DOCS_INTERFACE_MISMATCH, """ **Problematic query**: @@ -1044,6 +1083,7 @@ class QueryErrorCode(Enum): "WOS_0003", "year-without-terms", "A search for publication years must include at least another search term.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1065,6 +1105,7 @@ class QueryErrorCode(Enum): "WOS_0004", "implicit-near-value", "The value of NEAR operator is implicit", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1086,6 +1127,7 @@ class QueryErrorCode(Enum): "WOS_0005", "year-span-violation", "Year span must be five or less.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1107,6 +1149,7 @@ class QueryErrorCode(Enum): "WOS_0006", "wildcard-in-year", "Wildcard characters (*, ?, $) not supported in year search.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1127,6 +1170,7 @@ class QueryErrorCode(Enum): "WOS_0007", "wildcard-left-short-length", "Left-hand wildcard must be followed by at least three characters.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1147,6 +1191,7 @@ class QueryErrorCode(Enum): "WOS_0008", "wildcard-right-short-length", "Right-hand wildcard must preceded by at least three characters.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1169,6 +1214,7 @@ class QueryErrorCode(Enum): "WOS_0009", "wildcard-after-special-char", "Wildcard cannot be preceded by special characters.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1189,6 +1235,7 @@ class QueryErrorCode(Enum): "WOS_0010", "wildcard-standalone", "Wildcard cannot be standalone.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1210,6 +1257,7 @@ class QueryErrorCode(Enum): "WOS_0011", "wildcard-unsupported", "Unsupported wildcard in search string.", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1230,6 +1278,7 @@ class QueryErrorCode(Enum): "WOS_0012", "invalid-character", "Search term contains invalid character", + DOCS_INTERFACE_OK, """ **Problematic query**: @@ -1249,8 +1298,11 @@ class QueryErrorCode(Enum): ) # pylint: disable=too-many-arguments - def __init__(self, code: str, label: str, message: str, docs: str) -> None: + def __init__( + self, code: str, label: str, message: str, docs_interface_flag: str, docs: str + ) -> None: self.code = code self.label = label self.message = message + self.docs_interface_flag = docs_interface_flag self.docs = docs diff --git a/test/wos/test_wos_parser_v_1.py b/test/wos/test_wos_parser_v_1.py index 5689e325..f4a926f0 100644 --- a/test/wos/test_wos_parser_v_1.py +++ b/test/wos/test_wos_parser_v_1.py @@ -349,7 +349,7 @@ def test_tokenization(query_str: str, expected_tokens: list) -> None: { "code": "WOS_0002", "label": "near-distance-too-large", - "message": "NEAR distance is too large (max: 15).", + "message": "NEAR distance is too large (max: 15). Note: This is based on WOS documentation (TODO:URL). The web interface accepts larger NEAR distances.", "is_fatal": True, "position": [(9, 16)], "details": "NEAR distance 20 is larger than the maximum allowed value of 15.",