From f421face52e3b4d3e214c9af245317799af470a3 Mon Sep 17 00:00:00 2001 From: "Marko K. S." Date: Sun, 28 Jun 2026 02:07:26 +0200 Subject: [PATCH] Fix: Improve YINI parser error reporting and strictness Enhances the parser by validating plain keys, correctly decoding backticked keys, and enforcing proper placement of `@yini` directives and shebangs. Provides more precise syntax error messages with line and column details. --- AGENTS.md | 2 + src/yini_parser/api/load.py | 68 ++++++++++++++++- src/yini_parser/core/yini_builder_visitor.py | 40 +++++++++- tests/test_comments.py | 34 +++++++++ tests/test_keys.py | 80 ++++++++++++++++++++ tests/test_parser_mode.py | 25 ++++++ 6 files changed, 243 insertions(+), 6 deletions(-) create mode 100644 tests/test_keys.py diff --git a/AGENTS.md b/AGENTS.md index 49d713b..0b1f129 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,6 +4,8 @@ > Read this before making any changes to the codebase. > If any instruction in this file is unclear, ambiguous, or conflicts with the repository state, stop and ask the human maintainer before proceeding. +See also: [Shared AI agent instructions for the YINI project family](../AGENTS.md) + ## Project Overview - **Name:** yini-parser-python diff --git a/src/yini_parser/api/load.py b/src/yini_parser/api/load.py index 5ee67fd..17ca398 100644 --- a/src/yini_parser/api/load.py +++ b/src/yini_parser/api/load.py @@ -4,7 +4,8 @@ from pathlib import Path from typing import Any -from antlr4 import CommonTokenStream, InputStream +from antlr4 import CommonTokenStream, InputStream, Token +from antlr4.error.ErrorListener import ErrorListener from yini_parser.api.errors import YiniParseError @@ -39,15 +40,28 @@ def load(path: str | Path, strict: bool = False) -> dict[str, Any]: def _parse_input_stream(input_stream: InputStream, strict: bool) -> dict[str, Any]: lexer = YiniLexer(input_stream) + lexer_errors = _SyntaxErrorCollector() + lexer.removeErrorListeners() + lexer.addErrorListener(lexer_errors) + stream = CommonTokenStream(lexer) + _normalize_shebang_comment_tokens(stream) + parser = YiniParser(stream) + parser_errors = _SyntaxErrorCollector() + parser.removeErrorListeners() + parser.addErrorListener(parser_errors) tree = parser.yini() - if parser.getNumberOfSyntaxErrors() > 0: - # raise ValueError(f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s).") + syntax_errors = lexer_errors.errors + parser_errors.errors + if syntax_errors: + line, column, message = syntax_errors[0] raise YiniParseError( - f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s)." + f"Failed to parse YINI input: {len(syntax_errors)} syntax error(s). " + f"First error: {message}", + line=line, + column=column, ) visitor = YiniBuilderVisitor(strict=strict) @@ -66,3 +80,49 @@ def _ensure_final_newline(text: str) -> str: return text + "\n" return text + + +class _SyntaxErrorCollector(ErrorListener): + def __init__(self) -> None: + super().__init__() + self.errors: list[tuple[int, int, str]] = [] + + def syntaxError( # noqa: N802 + self, + recognizer: Any, + offendingSymbol: Any, + line: int, + column: int, + msg: str, + e: Exception | None, + ) -> None: + self.errors.append((line, column + 1, msg)) + + +def _normalize_shebang_comment_tokens(stream: CommonTokenStream) -> None: + """ + Keep a leading shebang available to the prolog rule, but treat later + shebang-looking lines as ordinary ignored line content. + """ + + stream.fill() + + seen_meaningful_token = False + + for token in stream.tokens: + if token.type == Token.EOF: + continue + + if token.type == YiniLexer.NL: + continue + + if token.type == YiniLexer.SHEBANG: + if seen_meaningful_token: + token.type = YiniLexer.NL + else: + seen_meaningful_token = True + continue + + seen_meaningful_token = True + + stream.seek(0) diff --git a/src/yini_parser/core/yini_builder_visitor.py b/src/yini_parser/core/yini_builder_visitor.py index b9b98d3..f992f9d 100644 --- a/src/yini_parser/core/yini_builder_visitor.py +++ b/src/yini_parser/core/yini_builder_visitor.py @@ -5,6 +5,7 @@ # src/yini_parser/core/yini_builder_visitor.py from __future__ import annotations +import re import warnings from typing import Any @@ -13,12 +14,16 @@ from ..grammar.generated.YiniParser import YiniParser from ..grammar.generated.YiniParserVisitor import YiniParserVisitor from ..utils.antlr import ctx_location +from ..utils.text import strip_backticks from .section_headers import parse_section_head from .value_decoders import decode_string_token, parse_number_literal from .validator import YiniValidator +_SIMPLE_KEY_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + class YiniBuilderVisitor(YiniParserVisitor): """ Builds a Python dictionary from the parsed YINI tree. @@ -46,6 +51,7 @@ def __init__(self, strict: bool = False) -> None: self._section_names: list[str] = [] self._ignored_section_level: int | None = None self._top_level_section_count = 0 + self._seen_content = False self._validator = YiniValidator(strict=strict) # self._root_member_count = 0 @@ -83,6 +89,7 @@ def visit_yini(self, ctx: YiniParser.YiniContext) -> dict[str, Any]: def visit_stmt(self, ctx: YiniParser.StmtContext) -> Any: section_token = ctx.SECTION_HEAD() if section_token is not None: + self._seen_content = True symbol = section_token.getSymbol() line = symbol.line column = symbol.column + 1 @@ -131,6 +138,7 @@ def visit_stmt(self, ctx: YiniParser.StmtContext) -> Any: assignment_ctx = ctx.assignment() if assignment_ctx is not None: + self._seen_content = True return self.visit(assignment_ctx) bad_member_ctx = ctx.bad_member() @@ -175,6 +183,15 @@ def visit_directive(self, ctx: YiniParser.DirectiveContext) -> None: return None def visit_yini_directive(self, ctx: YiniParser.Yini_directiveContext) -> None: + line, column = ctx_location(ctx) + + if self._seen_content: + raise YiniParseError( + "@yini directives must appear before sections or members.", + line=line, + column=column, + ) + mode_ctx = ctx.yini_mode_declaration() # Plain @yini is valid and declares no mode. @@ -291,7 +308,7 @@ def visit_assignment(self, ctx: YiniParser.AssignmentContext) -> None: return None def visit_member(self, ctx: YiniParser.MemberContext) -> tuple[str, Any]: - key = ctx.KEY().getText() + key = self._decode_key_token(ctx.KEY(), description="key") value_ctx = ctx.value() if value_ctx is None: @@ -535,7 +552,7 @@ def visit_object_member( - The canonical inline object member separator remains `:`. """ - key = ctx.KEY().getText() + key = self._decode_key_token(ctx.KEY(), description="inline object key") value = self.visit(ctx.value()) separator_ctx = ctx.object_member_separator() @@ -590,6 +607,25 @@ def _current_container(self) -> dict[str, Any]: return self._section_stack[-1] return self._root + def _decode_key_token(self, token: Any, *, description: str) -> str: + raw_key = token.getText() + key = strip_backticks(raw_key) + + if raw_key.startswith("`"): + return key + + if not _SIMPLE_KEY_RE.fullmatch(raw_key): + symbol = token.getSymbol() + raise YiniParseError( + f"Invalid {description} {raw_key!r}. " + "Use letters, digits, and underscores, and do not start with a digit. " + "Use backticks for keys that need spaces or punctuation.", + line=symbol.line, + column=symbol.column + 1, + ) + + return key + def _enter_section_with_parsed( self, level: int, diff --git a/tests/test_comments.py b/tests/test_comments.py index baaeb0b..92d6170 100644 --- a/tests/test_comments.py +++ b/tests/test_comments.py @@ -1,5 +1,8 @@ from __future__ import annotations +import pytest + +from yini_parser.api.errors import YiniParseError from yini_parser.api.load import loads @@ -236,3 +239,34 @@ def test_parses_full_line_semicolon_comment() -> None: "name": "Demo App", }, } + + +def test_shebang_after_yini_directive_is_treated_as_comment() -> None: + text = """ +@yini +#!/usr/bin/env yini + +^ App +name = "Demo App" +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "name": "Demo App", + }, + } + + +def test_rejects_unterminated_block_comment() -> None: + text = """ +@yini + +^ App +name = "Demo App" +/* This block comment never closes. +""".lstrip() + + with pytest.raises(YiniParseError): + loads(text) diff --git a/tests/test_keys.py b/tests/test_keys.py new file mode 100644 index 0000000..15d165a --- /dev/null +++ b/tests/test_keys.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import pytest + +from yini_parser.api.errors import YiniParseError +from yini_parser.api.load import loads + + +def test_backticked_member_keys_decode_to_inner_text() -> None: + text = """ +^ Keys +`Description of Project` = "val" +`Amanda's Project` = "owned" +`Owner Team` = "Core" +""".lstrip() + + result = loads(text) + + assert result == { + "Keys": { + "Description of Project": "val", + "Amanda's Project": "owned", + "Owner Team": "Core", + }, + } + + +def test_empty_backticked_member_key_is_allowed() -> None: + text = """ +^ Keys +`` = "val" +name = "empty backticked key" +""".lstrip() + + result = loads(text) + + assert result == { + "Keys": { + "": "val", + "name": "empty backticked key", + }, + } + + +def test_backticked_inline_object_keys_decode_to_inner_text() -> None: + text = """ +^ App +labels = { `Display Name`: "Demo", `Owner Team`: "Core" } +""".lstrip() + + result = loads(text) + + assert result == { + "App": { + "labels": { + "Display Name": "Demo", + "Owner Team": "Core", + }, + }, + } + + +def test_rejects_plain_key_starting_with_digit() -> None: + text = """ +^ Keys +1key = "val" +""".lstrip() + + with pytest.raises(YiniParseError, match="Invalid key"): + loads(text) + + +def test_rejects_plain_key_with_dot() -> None: + text = """ +^ Keys +my.key = "val" +""".lstrip() + + with pytest.raises(YiniParseError, match="Invalid key"): + loads(text) diff --git a/tests/test_parser_mode.py b/tests/test_parser_mode.py index d3d0193..544cf1a 100644 --- a/tests/test_parser_mode.py +++ b/tests/test_parser_mode.py @@ -155,3 +155,28 @@ def test_yini_mode_declaration_rejects_unknown_mode() -> None: with pytest.raises(YiniParseError): loads(text) + + +def test_yini_directive_after_member_is_rejected() -> None: + text = """ +^ App +name = "Demo App" + +@yini +""".lstrip() + + with pytest.raises(YiniParseError, match="@yini directives must appear"): + loads(text) + + +def test_yini_directive_after_section_is_rejected() -> None: + text = """ +^ App + +@yini + +name = "Demo App" +""".lstrip() + + with pytest.raises(YiniParseError, match="@yini directives must appear"): + loads(text)