Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
> Read this before making any changes to the codebase.
> If any instruction in this file is unclear, ambiguous, or conflicts with the repository state, stop and ask the human maintainer before proceeding.

See also: [Shared AI agent instructions for the YINI project family](../AGENTS.md)

## Project Overview

- **Name:** yini-parser-python
Expand Down
68 changes: 64 additions & 4 deletions src/yini_parser/api/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from pathlib import Path
from typing import Any

from antlr4 import CommonTokenStream, InputStream
from antlr4 import CommonTokenStream, InputStream, Token
from antlr4.error.ErrorListener import ErrorListener

from yini_parser.api.errors import YiniParseError

Expand Down Expand Up @@ -39,15 +40,28 @@ def load(path: str | Path, strict: bool = False) -> dict[str, Any]:

def _parse_input_stream(input_stream: InputStream, strict: bool) -> dict[str, Any]:
lexer = YiniLexer(input_stream)
lexer_errors = _SyntaxErrorCollector()
lexer.removeErrorListeners()
lexer.addErrorListener(lexer_errors)

stream = CommonTokenStream(lexer)
_normalize_shebang_comment_tokens(stream)

parser = YiniParser(stream)
parser_errors = _SyntaxErrorCollector()
parser.removeErrorListeners()
parser.addErrorListener(parser_errors)

tree = parser.yini()

if parser.getNumberOfSyntaxErrors() > 0:
# raise ValueError(f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s).")
syntax_errors = lexer_errors.errors + parser_errors.errors
if syntax_errors:
line, column, message = syntax_errors[0]
raise YiniParseError(
f"Failed to parse YINI input: {parser.getNumberOfSyntaxErrors()} syntax error(s)."
f"Failed to parse YINI input: {len(syntax_errors)} syntax error(s). "
f"First error: {message}",
line=line,
column=column,
)

visitor = YiniBuilderVisitor(strict=strict)
Expand All @@ -66,3 +80,49 @@ def _ensure_final_newline(text: str) -> str:
return text + "\n"

return text


class _SyntaxErrorCollector(ErrorListener):
def __init__(self) -> None:
super().__init__()
self.errors: list[tuple[int, int, str]] = []

def syntaxError( # noqa: N802
self,
recognizer: Any,
offendingSymbol: Any,
line: int,
column: int,
msg: str,
e: Exception | None,
) -> None:
self.errors.append((line, column + 1, msg))


def _normalize_shebang_comment_tokens(stream: CommonTokenStream) -> None:
"""
Keep a leading shebang available to the prolog rule, but treat later
shebang-looking lines as ordinary ignored line content.
"""

stream.fill()

seen_meaningful_token = False

for token in stream.tokens:
if token.type == Token.EOF:
continue

if token.type == YiniLexer.NL:
continue

if token.type == YiniLexer.SHEBANG:
if seen_meaningful_token:
token.type = YiniLexer.NL
else:
seen_meaningful_token = True
continue

seen_meaningful_token = True

stream.seek(0)
40 changes: 38 additions & 2 deletions src/yini_parser/core/yini_builder_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# src/yini_parser/core/yini_builder_visitor.py
from __future__ import annotations

import re
import warnings
from typing import Any

Expand All @@ -13,12 +14,16 @@
from ..grammar.generated.YiniParser import YiniParser
from ..grammar.generated.YiniParserVisitor import YiniParserVisitor
from ..utils.antlr import ctx_location
from ..utils.text import strip_backticks

from .section_headers import parse_section_head
from .value_decoders import decode_string_token, parse_number_literal
from .validator import YiniValidator


_SIMPLE_KEY_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")


class YiniBuilderVisitor(YiniParserVisitor):
"""
Builds a Python dictionary from the parsed YINI tree.
Expand Down Expand Up @@ -46,6 +51,7 @@ def __init__(self, strict: bool = False) -> None:
self._section_names: list[str] = []
self._ignored_section_level: int | None = None
self._top_level_section_count = 0
self._seen_content = False
self._validator = YiniValidator(strict=strict)
# self._root_member_count = 0

Expand Down Expand Up @@ -83,6 +89,7 @@ def visit_yini(self, ctx: YiniParser.YiniContext) -> dict[str, Any]:
def visit_stmt(self, ctx: YiniParser.StmtContext) -> Any:
section_token = ctx.SECTION_HEAD()
if section_token is not None:
self._seen_content = True
symbol = section_token.getSymbol()
line = symbol.line
column = symbol.column + 1
Expand Down Expand Up @@ -131,6 +138,7 @@ def visit_stmt(self, ctx: YiniParser.StmtContext) -> Any:

assignment_ctx = ctx.assignment()
if assignment_ctx is not None:
self._seen_content = True
return self.visit(assignment_ctx)

bad_member_ctx = ctx.bad_member()
Expand Down Expand Up @@ -175,6 +183,15 @@ def visit_directive(self, ctx: YiniParser.DirectiveContext) -> None:
return None

def visit_yini_directive(self, ctx: YiniParser.Yini_directiveContext) -> None:
line, column = ctx_location(ctx)

if self._seen_content:
raise YiniParseError(
"@yini directives must appear before sections or members.",
line=line,
column=column,
)

mode_ctx = ctx.yini_mode_declaration()

# Plain @yini is valid and declares no mode.
Expand Down Expand Up @@ -291,7 +308,7 @@ def visit_assignment(self, ctx: YiniParser.AssignmentContext) -> None:
return None

def visit_member(self, ctx: YiniParser.MemberContext) -> tuple[str, Any]:
key = ctx.KEY().getText()
key = self._decode_key_token(ctx.KEY(), description="key")
value_ctx = ctx.value()

if value_ctx is None:
Expand Down Expand Up @@ -535,7 +552,7 @@ def visit_object_member(
- The canonical inline object member separator remains `:`.
"""

key = ctx.KEY().getText()
key = self._decode_key_token(ctx.KEY(), description="inline object key")
value = self.visit(ctx.value())

separator_ctx = ctx.object_member_separator()
Expand Down Expand Up @@ -590,6 +607,25 @@ def _current_container(self) -> dict[str, Any]:
return self._section_stack[-1]
return self._root

def _decode_key_token(self, token: Any, *, description: str) -> str:
raw_key = token.getText()
key = strip_backticks(raw_key)

if raw_key.startswith("`"):
return key

if not _SIMPLE_KEY_RE.fullmatch(raw_key):
symbol = token.getSymbol()
raise YiniParseError(
f"Invalid {description} {raw_key!r}. "
"Use letters, digits, and underscores, and do not start with a digit. "
"Use backticks for keys that need spaces or punctuation.",
line=symbol.line,
column=symbol.column + 1,
)

return key

def _enter_section_with_parsed(
self,
level: int,
Expand Down
34 changes: 34 additions & 0 deletions tests/test_comments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from __future__ import annotations

import pytest

from yini_parser.api.errors import YiniParseError
from yini_parser.api.load import loads


Expand Down Expand Up @@ -236,3 +239,34 @@ def test_parses_full_line_semicolon_comment() -> None:
"name": "Demo App",
},
}


def test_shebang_after_yini_directive_is_treated_as_comment() -> None:
text = """
@yini
#!/usr/bin/env yini

^ App
name = "Demo App"
""".lstrip()

result = loads(text)

assert result == {
"App": {
"name": "Demo App",
},
}


def test_rejects_unterminated_block_comment() -> None:
text = """
@yini

^ App
name = "Demo App"
/* This block comment never closes.
""".lstrip()

with pytest.raises(YiniParseError):
loads(text)
80 changes: 80 additions & 0 deletions tests/test_keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import annotations

import pytest

from yini_parser.api.errors import YiniParseError
from yini_parser.api.load import loads


def test_backticked_member_keys_decode_to_inner_text() -> None:
text = """
^ Keys
`Description of Project` = "val"
`Amanda's Project` = "owned"
`Owner Team` = "Core"
""".lstrip()

result = loads(text)

assert result == {
"Keys": {
"Description of Project": "val",
"Amanda's Project": "owned",
"Owner Team": "Core",
},
}


def test_empty_backticked_member_key_is_allowed() -> None:
text = """
^ Keys
`` = "val"
name = "empty backticked key"
""".lstrip()

result = loads(text)

assert result == {
"Keys": {
"": "val",
"name": "empty backticked key",
},
}


def test_backticked_inline_object_keys_decode_to_inner_text() -> None:
text = """
^ App
labels = { `Display Name`: "Demo", `Owner Team`: "Core" }
""".lstrip()

result = loads(text)

assert result == {
"App": {
"labels": {
"Display Name": "Demo",
"Owner Team": "Core",
},
},
}


def test_rejects_plain_key_starting_with_digit() -> None:
text = """
^ Keys
1key = "val"
""".lstrip()

with pytest.raises(YiniParseError, match="Invalid key"):
loads(text)


def test_rejects_plain_key_with_dot() -> None:
text = """
^ Keys
my.key = "val"
""".lstrip()

with pytest.raises(YiniParseError, match="Invalid key"):
loads(text)
25 changes: 25 additions & 0 deletions tests/test_parser_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,28 @@ def test_yini_mode_declaration_rejects_unknown_mode() -> None:

with pytest.raises(YiniParseError):
loads(text)


def test_yini_directive_after_member_is_rejected() -> None:
text = """
^ App
name = "Demo App"

@yini
""".lstrip()

with pytest.raises(YiniParseError, match="@yini directives must appear"):
loads(text)


def test_yini_directive_after_section_is_rejected() -> None:
text = """
^ App

@yini

name = "Demo App"
""".lstrip()

with pytest.raises(YiniParseError, match="@yini directives must appear"):
loads(text)
Loading