diff --git a/.gitignore b/.gitignore index 4fac5f9..3ef5d09 100644 --- a/.gitignore +++ b/.gitignore @@ -111,6 +111,7 @@ ENV/ .codex .claude .agents +PLAN.md # tmp pytest-* diff --git a/.releaserc.json b/.releaserc.json index aa93cc6..e84d9bb 100644 --- a/.releaserc.json +++ b/.releaserc.json @@ -146,6 +146,62 @@ } ], "countMatches": true + }, + { + "files": ["packages/aix/pyproject.toml"], + "from": "version = \".*\" # semantic-release", + "to": "version = \"${nextRelease.version}\" # semantic-release", + "results": [ + { + "file": "packages/aix/pyproject.toml", + "hasChanged": true, + "numMatches": 1, + "numReplacements": 1 + } + ], + "countMatches": true + }, + { + "files": ["packages/aix/pyproject.toml"], + "from": " \"astx == .*\", # semantic-release", + "to": " \"astx == ${nextRelease.version}\", # semantic-release", + "results": [ + { + "file": "packages/aix/pyproject.toml", + "hasChanged": true, + "numMatches": 1, + "numReplacements": 1 + } + ], + "countMatches": true + }, + { + "files": ["packages/aix/pyproject.toml"], + "from": " \"pyirx == .*\", # semantic-release", + "to": " \"pyirx == ${nextRelease.version}\", # semantic-release", + "results": [ + { + "file": "packages/aix/pyproject.toml", + "hasChanged": true, + "numMatches": 1, + "numReplacements": 1 + } + ], + "countMatches": true + }, + { + "files": ["packages/aix/src/aix/__init__.py"], + "from": "return \".*\" # semantic-release", + "to": "return \"${nextRelease.version}\" # semantic-release", + "results": [ + { + "file": "packages/aix/src/aix/__init__.py", + "hasChanged": true, + "numMatches": 1, + "numReplacements": 1 + } + ], + "countMatches": true } ] } @@ -174,7 +230,9 @@ "packages/irx/dist/*.whl", "packages/irx/dist/*.tar.gz", "packages/arx/dist/*.whl", - "packages/arx/dist/*.tar.gz" + "packages/arx/dist/*.tar.gz", + "packages/aix/dist/*.whl", + "packages/aix/dist/*.tar.gz" ] } ], @@ -190,7 +248,9 @@ "packages/irx/pyproject.toml", "packages/irx/src/irx/__init__.py", "packages/arx/pyproject.toml", - "packages/arx/src/arx/__init__.py" + "packages/arx/src/arx/__init__.py", + "packages/aix/pyproject.toml", + "packages/aix/src/aix/__init__.py" ], "message": "chore(release): ${nextRelease.version}" } diff --git a/packages/aix/README.md b/packages/aix/README.md new file mode 100644 index 0000000..e176766 --- /dev/null +++ b/packages/aix/README.md @@ -0,0 +1,43 @@ +# AIX + +AIX is an experimental AI-oriented symbolic programming language frontend in the +Arx ecosystem. + +- PyPI distribution: `airx` +- Python import: `aix` +- CLI command: `aix` +- Source extension: `.aix` + +## Usage + +```bash +pip install airx +aix --help +aix --show-tokens examples/fib.aix +aix --show-ast examples/fib.aix +``` + +## MVP syntax + +```aix +∴ fib ⟦ n:ℕ ⟧ → ℕ + ⊢ n ≤ 1 ⇒ n + ⊢ fib⟦n - 1⟧ + fib⟦n - 2⟧ +∎ +``` + +Core forms: + +- `∴` defines a function or constant. +- `⟦...⟧` is used for parameters and calls. +- `→` marks the return type. +- `⊢ expr` returns from a function. +- `⊢ cond ⇒ expr` emits an if-return branch. +- `⌁ name:T ≔ expr` creates a local binding. +- `⟣ expr` emits through the existing `print` builtin. +- `∎` ends a block; `{...}` and `;` support compact inline blocks. +- `κ⟦...⟧` metadata blocks are parsed and ignored in the MVP. +- Comments start with `⍝`. + +Reserved APL-inspired operators such as `⍴`, `⍳`, `¨`, `↑`, `↓`, `⍋`, and `⍒` +are tokenized but intentionally rejected until backend support exists. diff --git a/packages/aix/docs/apl-inspired-operators.md b/packages/aix/docs/apl-inspired-operators.md new file mode 100644 index 0000000..e70a6af --- /dev/null +++ b/packages/aix/docs/apl-inspired-operators.md @@ -0,0 +1,5 @@ +# Reserved APL-inspired operators + +AIX reserves symbols such as `⍴`, `⍳`, `¨`, `∘`, `↑`, `↓`, `⍋`, `⍒`, `∊`, and +`∪`. The lexer recognizes these symbols, but the parser raises a clear +unsupported-feature error in the MVP. diff --git a/packages/aix/docs/examples.md b/packages/aix/docs/examples.md new file mode 100644 index 0000000..e0e4670 --- /dev/null +++ b/packages/aix/docs/examples.md @@ -0,0 +1,4 @@ +# AIX examples + +See `packages/aix/examples` for `hello.aix`, `fib.aix`, `compact_fib.aix`, +`bindings.aix`, and `metadata.aix`. diff --git a/packages/aix/docs/grammar.md b/packages/aix/docs/grammar.md new file mode 100644 index 0000000..f6832c3 --- /dev/null +++ b/packages/aix/docs/grammar.md @@ -0,0 +1,11 @@ +# AIX MVP grammar + +```ebnf +program ::= item* EOF ; +item ::= metadata_block? definition ; +definition ::= "∴" identifier "⟦" parameter_list? "⟧" "→" type block ; +block ::= statement* "∎" | "{" statement_list? "}" ; +statement ::= "⊢" expression | "⊢" expression "⇒" expression + | "⌁" identifier (":" type)? "≔" expression + | "⟣" expression | expression ; +``` diff --git a/packages/aix/docs/syntax.md b/packages/aix/docs/syntax.md new file mode 100644 index 0000000..bfa667e --- /dev/null +++ b/packages/aix/docs/syntax.md @@ -0,0 +1,11 @@ +# AIX syntax + +AIX uses symbolic, indentation-insensitive blocks. Pretty layout is encouraged, +but `∎` and `{...}` are the source of truth for block boundaries. + +```aix +∴ main ⟦⟧ → ∅ + ⌁ x:ℕ ≔ 41 + ⟣ x + 1 +∎ +``` diff --git a/packages/aix/examples/bindings.aix b/packages/aix/examples/bindings.aix new file mode 100644 index 0000000..888babc --- /dev/null +++ b/packages/aix/examples/bindings.aix @@ -0,0 +1,4 @@ +∴ main ⟦⟧ → ∅ + ⌁ answer:ℕ ≔ 42 + ⟣ answer +∎ diff --git a/packages/aix/examples/compact_fib.aix b/packages/aix/examples/compact_fib.aix new file mode 100644 index 0000000..0923960 --- /dev/null +++ b/packages/aix/examples/compact_fib.aix @@ -0,0 +1 @@ +∴fib⟦n:ℕ⟧→ℕ{⊢n≤1⇒n;⊢fib⟦n-1⟧+fib⟦n-2⟧} diff --git a/packages/aix/examples/fib.aix b/packages/aix/examples/fib.aix new file mode 100644 index 0000000..2b6dd9e --- /dev/null +++ b/packages/aix/examples/fib.aix @@ -0,0 +1,15 @@ +κ⟦ + ι: fib.recursive.v1, + π: true, + τ: ℕ → ℕ, + χ: recursion +⟧ + +∴ fib ⟦ n:ℕ ⟧ → ℕ + ⊢ n ≤ 1 ⇒ n + ⊢ fib⟦n - 1⟧ + fib⟦n - 2⟧ +∎ + +∴ main ⟦⟧ → ∅ + ⟣ fib⟦10⟧ +∎ diff --git a/packages/aix/examples/hello.aix b/packages/aix/examples/hello.aix new file mode 100644 index 0000000..5e54519 --- /dev/null +++ b/packages/aix/examples/hello.aix @@ -0,0 +1,3 @@ +∴ main ⟦⟧ → ∅ + ⟣ "hello from aix" +∎ diff --git a/packages/aix/examples/metadata.aix b/packages/aix/examples/metadata.aix new file mode 100644 index 0000000..385c50c --- /dev/null +++ b/packages/aix/examples/metadata.aix @@ -0,0 +1,8 @@ +κ⟦ + ι: hello.v1, + χ: example +⟧ + +∴ main ⟦⟧ → ∅ + ⟣ "metadata parsed" +∎ diff --git a/packages/aix/pyproject.toml b/packages/aix/pyproject.toml new file mode 100644 index 0000000..f31375e --- /dev/null +++ b/packages/aix/pyproject.toml @@ -0,0 +1,122 @@ +[project] +name = "airx" +version = "0.1.0" # semantic-release +description = "AIX is an AI-oriented symbolic programming language frontend" +readme = "README.md" +authors = [ + {name = "Ivan Ogasawara", email = "ivan.ogasawara@gmail.com"} +] +license = "Apache-2.0" +requires-python = ">=3.10,<4" +dependencies = [ + "pyyaml >=4", + "astx == 1.23.1", # semantic-release + "pyirx == 1.23.1", # semantic-release + "jsonschema (>=4.0.0)", + "packaging >=23", + "types-pyyaml (>=6.0.12.20250516)", + "tomli >=2.0.0 ; python_version < \"3.11\"" +] + +[project.scripts] +"aix" = "aix.__main__:app" + +[build-system] +requires = ["poetry-core>=2"] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +include = [ + "src/aix", + "src/aix/builtins/**/*.aix", + "src/aix/builtins/**/*.x", + "src/aix/schema/*.json", + "src/aix/py.typed", + "src/aix/lexer/syntax.json", + "src/aix/stdlib/**/*.aix", + "src/aix/stdlib/**/*.x", +] +exclude = [ + ".git/*", + ".env*", +] +packages = [ + {include = "aix", from="src"}, +] + +[tool.pytest.ini_options] +testpaths = [ + "tests/python", +] +filterwarnings = [ + "error::RuntimeWarning", + "error::typeguard.TypeHintWarning", +] + +[tool.bandit] +exclude_dirs = ["tests"] +targets = "./" + +[tool.vulture] +exclude = ["tests"] +ignore_decorators = ["abc.abstractmethod"] +ignore_names = [] +make_whitelist = true +min_confidence = 80 +paths = ["./"] +sort_by_size = true +verbose = false + +[tool.ruff] +target-version = "py310" +line-length = 79 +force-exclude = true +src = ["./"] +exclude = [ + 'docs', +] +fix = true + +[tool.ruff.lint] +ignore = [ + "F811", + "PLR0911", + "PLR0912", + "PLR0913", + "RUF012", + "PLR0915", + "PLR2004", + "PLC0415", +] +select = [ + "E", + "F", + "YTT", + "PL", + "RUF", + "I001", +] +extend-select = [ + "UP006", + "UP007", +] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +[tool.ruff.lint.isort] +lines-between-types = 1 + +[tool.ruff.format] +quote-style = "double" + +[tool.mypy] +python_version = "3.10" +check_untyped_defs = true +strict = true +ignore_missing_imports = true +warn_unused_ignores = true +warn_redundant_casts = true +warn_unused_configs = true +show_error_codes = true +exclude = ["scripts/"] diff --git a/packages/aix/src/aix/__init__.py b/packages/aix/src/aix/__init__.py new file mode 100644 index 0000000..773bdcb --- /dev/null +++ b/packages/aix/src/aix/__init__.py @@ -0,0 +1,26 @@ +""" +title: AIX package metadata. +""" + +from importlib import metadata as importlib_metadata + +_DISTRIBUTION_NAME = "airx" + + +def get_version() -> str: + """ + title: Return the installed package version. + returns: + type: str + """ + try: + return importlib_metadata.version(_DISTRIBUTION_NAME) + except importlib_metadata.PackageNotFoundError: # pragma: no cover + return "0.1.0" # semantic-release + + +version: str = get_version() + +__author__: str = "Ivan Ogasawara" +__email__: str = "ivan.ogasawara@gmail.com" +__version__: str = version diff --git a/packages/aix/src/aix/__main__.py b/packages/aix/src/aix/__main__.py new file mode 100644 index 0000000..2057344 --- /dev/null +++ b/packages/aix/src/aix/__main__.py @@ -0,0 +1,8 @@ +""" +title: Entrypoint module, in case you use `python -m aix`. +""" + +from aix.cli import app + +if __name__ == "__main__": + app() diff --git a/packages/aix/src/aix/builtins.py b/packages/aix/src/aix/builtins.py new file mode 100644 index 0000000..a97c7b5 --- /dev/null +++ b/packages/aix/src/aix/builtins.py @@ -0,0 +1,419 @@ +""" +title: Compiler builtin helpers and bundled builtin-module loader. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from importlib.resources import files +from typing import Any, TypeAlias, cast + +import astx +import astx as irx_astx + +Traversable: TypeAlias = Any + +BUILTIN_NAMESPACE = "builtins" +BUILTIN_SOURCE_EXTENSION = ".x" +_AIX_RESOURCE_PACKAGE = "aix" +_BUILTIN_RESOURCE_DIR = "builtins" + +BUILTIN_CAST = "cast" +BUILTIN_DATAFRAME = "dataframe" +BUILTIN_ISINSTANCE = "isinstance" +BUILTIN_PRINT = "print" +BUILTIN_RANGE = "range" +BUILTIN_TYPE = "type" +_GENERATORS_MODULE = f"{BUILTIN_NAMESPACE}.generators" + + +@dataclass(frozen=True) +class BuiltinModuleAsset: + """ + title: One bundled builtin module asset. + attributes: + logical_name: + type: str + specifier: + type: str + origin: + type: str + source: + type: str + is_package: + type: bool + """ + + logical_name: str + specifier: str + origin: str + source: str + is_package: bool + + +@dataclass(frozen=True) +class AmbientBuiltinBinding: + """ + title: One compiler-injected builtin binding. + attributes: + name: + type: str + module: + type: str + """ + + name: str + module: str + + +_AMBIENT_BUILTIN_BINDINGS = ( + AmbientBuiltinBinding(name=BUILTIN_RANGE, module=_GENERATORS_MODULE), +) + +__all__ = [ + "BUILTIN_CAST", + "BUILTIN_DATAFRAME", + "BUILTIN_ISINSTANCE", + "BUILTIN_NAMESPACE", + "BUILTIN_PRINT", + "BUILTIN_RANGE", + "BUILTIN_SOURCE_EXTENSION", + "BUILTIN_TYPE", + "AmbientBuiltinBinding", + "BuiltinModuleAsset", + "build_cast", + "build_isinstance", + "build_print", + "build_type_of", + "get_ambient_builtin_imports", + "get_builtin_source", + "is_builtin", + "is_builtin_module_specifier", + "list_builtin_modules", + "load_builtin_module", + "resolve_builtin_resource", +] + + +def is_builtin(name: str) -> bool: + """ + title: Check whether a function name is a parser-level built-in. + parameters: + name: + type: str + returns: + type: bool + """ + return name in { + BUILTIN_CAST, + BUILTIN_DATAFRAME, + BUILTIN_ISINSTANCE, + BUILTIN_PRINT, + BUILTIN_TYPE, + } + + +def build_cast( + value: astx.DataType, target_type: astx.DataType +) -> irx_astx.Cast: + """ + title: Build an IRx Cast node. + parameters: + value: + type: astx.DataType + target_type: + type: astx.DataType + returns: + type: irx_astx.Cast + """ + return irx_astx.Cast(value=value, target_type=target_type) + + +def build_isinstance( + value: astx.Expr, + target_type: astx.DataType, +) -> irx_astx.IsInstanceExpr: + """ + title: Build an IRx IsInstanceExpr node. + parameters: + value: + type: astx.Expr + target_type: + type: astx.DataType + returns: + type: irx_astx.IsInstanceExpr + """ + return irx_astx.IsInstanceExpr(value=value, target_type=target_type) + + +def build_print(message: astx.Expr) -> irx_astx.PrintExpr: + """ + title: Build an IRx PrintExpr node. + parameters: + message: + type: astx.Expr + returns: + type: irx_astx.PrintExpr + """ + return irx_astx.PrintExpr(message=message) + + +def build_type_of(value: astx.Expr) -> irx_astx.TypeOfExpr: + """ + title: Build an IRx TypeOfExpr node. + parameters: + value: + type: astx.Expr + returns: + type: irx_astx.TypeOfExpr + """ + return irx_astx.TypeOfExpr(value=value) + + +def is_builtin_module_specifier(specifier: str) -> bool: + """ + title: Return whether one specifier targets the bundled builtins. + parameters: + specifier: + type: str + returns: + type: bool + """ + return specifier == BUILTIN_NAMESPACE or specifier.startswith( + f"{BUILTIN_NAMESPACE}." + ) + + +def list_builtin_modules() -> tuple[str, ...]: + """ + title: List bundled builtin module names. + returns: + type: tuple[str, Ellipsis] + """ + module_names: list[str] = [] + seen: set[str] = set() + + for module_name in _iter_builtin_modules(_builtin_root()): + if module_name in seen: + raise LookupError( + f"ambiguous builtin module '{module_name}' in package data" + ) + seen.add(module_name) + module_names.append(module_name) + + return tuple(module_names) + + +def resolve_builtin_resource(module_name: str) -> Traversable: + """ + title: Resolve one builtin logical module name to a packaged resource. + parameters: + module_name: + type: str + returns: + type: Traversable + """ + resource, _relative_path, _is_package = _resolve_builtin_module_entry( + module_name + ) + return resource + + +def get_builtin_source(module_name: str) -> str: + """ + title: Return the bundled source text for one builtin module. + parameters: + module_name: + type: str + returns: + type: str + """ + return cast( + str, + resolve_builtin_resource(module_name).read_text(encoding="utf-8"), + ) + + +def load_builtin_module(specifier: str) -> BuiltinModuleAsset: + """ + title: Load one builtin module from packaged resources. + parameters: + specifier: + type: str + returns: + type: BuiltinModuleAsset + """ + if specifier == BUILTIN_NAMESPACE: + resource = _join_resource(_builtin_root(), "__init__.x") + if not resource.is_file(): + raise LookupError(specifier) + relative_path = f"{_BUILTIN_RESOURCE_DIR}/__init__.x" + logical_name = "" + is_package = True + else: + if not is_builtin_module_specifier(specifier): + raise LookupError(specifier) + logical_name = specifier.removeprefix(f"{BUILTIN_NAMESPACE}.") + resource, relative_path, is_package = _resolve_builtin_module_entry( + logical_name + ) + + return BuiltinModuleAsset( + logical_name=logical_name, + specifier=specifier, + origin=f"{_AIX_RESOURCE_PACKAGE}:{relative_path}", + source=cast(str, resource.read_text(encoding="utf-8")), + is_package=is_package, + ) + + +def get_ambient_builtin_imports( + module_key: str, +) -> tuple[irx_astx.ImportFromStmt, ...]: + """ + title: Build the implicit builtin imports for one module. + parameters: + module_key: + type: str + returns: + type: tuple[irx_astx.ImportFromStmt, Ellipsis] + """ + grouped_aliases: dict[str, list[irx_astx.AliasExpr]] = {} + + for binding in _AMBIENT_BUILTIN_BINDINGS: + if module_key == binding.module: + continue + aliases = grouped_aliases.setdefault(binding.module, []) + aliases.append(irx_astx.AliasExpr(binding.name)) + + imports: list[irx_astx.ImportFromStmt] = [] + for module_name, aliases in grouped_aliases.items(): + imports.append(irx_astx.ImportFromStmt(aliases, module=module_name)) + + return tuple(imports) + + +def _builtin_root() -> Traversable: + """ + title: Return the resource root for bundled builtin assets. + returns: + type: Traversable + """ + return files(_AIX_RESOURCE_PACKAGE).joinpath(_BUILTIN_RESOURCE_DIR) + + +def _iter_builtin_modules( + directory: Traversable, + prefix: tuple[str, ...] = (), +) -> tuple[str, ...]: + """ + title: Recursively collect bundled builtin module names. + parameters: + directory: + type: Traversable + prefix: + type: tuple[str, Ellipsis] + returns: + type: tuple[str, Ellipsis] + """ + module_names: list[str] = [] + + for child in sorted(directory.iterdir(), key=lambda entry: entry.name): + if child.name.startswith(".") or child.name == "__pycache__": + continue + + child_prefix = (*prefix, child.name) + if child.is_dir(): + package_name = ".".join(child_prefix) + init_resource = child.joinpath("__init__.x") + if init_resource.is_file(): + module_names.append(package_name) + module_names.extend(_iter_builtin_modules(child, child_prefix)) + continue + + if child.name == "__init__.x": + continue + if not child.name.endswith(BUILTIN_SOURCE_EXTENSION): + continue + + module_names.append( + ".".join( + (*prefix, child.name.removesuffix(BUILTIN_SOURCE_EXTENSION)) + ) + ) + + return tuple(module_names) + + +def _resolve_builtin_module_entry( + module_name: str, +) -> tuple[Traversable, str, bool]: + """ + title: Resolve one builtin logical module entry. + parameters: + module_name: + type: str + returns: + type: tuple[Traversable, str, bool] + """ + parts = _split_module_name(module_name) + file_parts = (*parts[:-1], f"{parts[-1]}{BUILTIN_SOURCE_EXTENSION}") + init_parts = (*parts, "__init__.x") + + root = _builtin_root() + file_resource = _join_resource(root, *file_parts) + init_resource = _join_resource(root, *init_parts) + file_relative = "/".join((_BUILTIN_RESOURCE_DIR, *file_parts)) + init_relative = "/".join((_BUILTIN_RESOURCE_DIR, *init_parts)) + + has_file = file_resource.is_file() + has_init = init_resource.is_file() + + if has_file and has_init: + raise LookupError( + "ambiguous builtin module " + f"'{module_name}': both '{file_relative}' and " + f"'{init_relative}' exist" + ) + if has_init: + return init_resource, init_relative, True + if has_file: + return file_resource, file_relative, False + raise LookupError(module_name) + + +def _join_resource(root: Traversable, *parts: str) -> Traversable: + """ + title: Join one package resource path from traversable parts. + parameters: + root: + type: Traversable + parts: + type: str + variadic: positional + returns: + type: Traversable + """ + resource = root + for part in parts: + resource = resource.joinpath(part) + return resource + + +def _split_module_name(module_name: str) -> tuple[str, ...]: + """ + title: Validate and split one dotted builtin module name. + parameters: + module_name: + type: str + returns: + type: tuple[str, Ellipsis] + """ + if ( + not module_name + or module_name.startswith(".") + or module_name.endswith(".") + or ".." in module_name + ): + raise LookupError(module_name) + return tuple(module_name.split(".")) diff --git a/packages/aix/src/aix/builtins/__init__.x b/packages/aix/src/aix/builtins/__init__.x new file mode 100644 index 0000000..7b2488a --- /dev/null +++ b/packages/aix/src/aix/builtins/__init__.x @@ -0,0 +1,4 @@ +``` +title: Bundled builtin package +summary: Root namespace for compiler-provided Arx builtin modules. +``` diff --git a/packages/aix/src/aix/builtins/generators.x b/packages/aix/src/aix/builtins/generators.x new file mode 100644 index 0000000..f2101d6 --- /dev/null +++ b/packages/aix/src/aix/builtins/generators.x @@ -0,0 +1,27 @@ +``` +title: Bundled generators builtin module +summary: >- + Compiler-provided generator-adjacent helpers shipped inside the Arx package. +``` + +# TODO: Expand this module when iterable generators and `yield` land. +fn range(start: i32, stop: i32, step: i32 = 1) -> list[i32]: + ``` + title: range + summary: >- + Returns the integer values from `start` up to but not including `stop`, + advancing by `step`. Positive steps count up, negative steps count down, + and zero step is rejected. + ``` + assert step != 0, "range() step must not be 0" + var values: list[i32] + var current: i32 = start + if step > 0: + while current < stop: + values.append(current) + current = current + step + else: + while current > stop: + values.append(current) + current = current + step + return values diff --git a/packages/aix/src/aix/cli.py b/packages/aix/src/aix/cli.py new file mode 100644 index 0000000..6e8104a --- /dev/null +++ b/packages/aix/src/aix/cli.py @@ -0,0 +1,196 @@ +""" +title: AIX command-line interface. +""" + +from __future__ import annotations + +import argparse +import sys + +from pathlib import Path +from typing import Sequence + +from aix import __version__ +from aix.main import AixMain + +KNOWN_SUBCOMMANDS: tuple[str, ...] = ("test",) + + +class CustomHelpFormatter(argparse.RawTextHelpFormatter): + """ + title: Formatter for preserving CLI help layout. + """ + + +def get_args() -> argparse.ArgumentParser: + """ + title: Build the AIX CLI argument parser. + returns: + type: argparse.ArgumentParser + """ + parser = argparse.ArgumentParser( + prog="aix", + description="AIX compiler frontend for .aix source files.", + epilog=( + "If you have any problem, open an issue at: " + "https://github.com/arxlang/arx" + ), + add_help=True, + formatter_class=CustomHelpFormatter, + ) + parser.add_argument( + "input_files", + nargs="*", + type=str, + help="The input .aix file(s)", + ) + parser.add_argument( + "--version", + action="store_true", + help="Show the installed AIX package version.", + ) + parser.add_argument("--output-file", type=str, help="The output file") + parser.add_argument( + "--lib", + dest="is_lib", + action="store_true", + help="Build source code as a library.", + ) + parser.add_argument( + "--show-ast", + action="store_true", + help="Show the AST for the input source code.", + ) + parser.add_argument( + "--show-tokens", + action="store_true", + help="Show tokens for the input source code.", + ) + parser.add_argument( + "--show-llvm-ir", + action="store_true", + help="Show LLVM IR for the input source code.", + ) + parser.add_argument( + "--shell", + action="store_true", + help="Open AIX in a shell prompt.", + ) + parser.add_argument( + "--run", + action="store_true", + help="Build and run the compiled binary.", + ) + parser.add_argument( + "--link-mode", + type=str, + choices=("auto", "pie", "no-pie"), + default="auto", + help="Set executable link mode: auto, pie, or no-pie.", + ) + return parser + + +def get_test_args() -> argparse.ArgumentParser: + """ + title: Build parser for `aix test`. + returns: + type: argparse.ArgumentParser + """ + parser = argparse.ArgumentParser( + prog="aix test", + description="Discover and run AIX tests.", + formatter_class=CustomHelpFormatter, + ) + parser.add_argument("paths", nargs="*", type=str, help="Test paths") + parser.add_argument("--list", dest="list_only", action="store_true") + parser.add_argument("-k", dest="name_filter", default="", type=str) + parser.add_argument( + "-x", + "--fail-fast", + dest="fail_fast", + action="store_true", + ) + parser.add_argument("--exclude", dest="exclude", action="append") + parser.add_argument("--file-pattern", dest="file_pattern", default=None) + parser.add_argument( + "--function-pattern", + dest="function_pattern", + default=None, + ) + parser.add_argument("--keep-artifacts", action="store_true") + parser.add_argument( + "--link-mode", + type=str, + choices=("auto", "pie", "no-pie"), + default="auto", + ) + return parser + + +def show_version() -> None: + """ + title: Print package version. + """ + print(__version__) + + +def _looks_like_subcommand_attempt(token: str) -> bool: + if not token or token.startswith("-"): + return False + if token == "run" or token in KNOWN_SUBCOMMANDS: + return False + if "/" in token or "\\" in token: + return False + if "." in token or Path(token).exists(): + return False + return True + + +def app(argv: Sequence[str] | None = None) -> None: + """ + title: Run the AIX CLI. + parameters: + argv: + type: Sequence[str] | None + """ + raw_args = list(sys.argv[1:] if argv is None else argv) + + if raw_args and raw_args[0] == "test": + args = get_test_args().parse_args(raw_args[1:]) + exit_code = AixMain().run_tests(**dict(args._get_kwargs())) + if exit_code != 0: + raise SystemExit(exit_code) + return None + + if raw_args and _looks_like_subcommand_attempt(raw_args[0]): + known = ", ".join(KNOWN_SUBCOMMANDS) + print( + f"aix: unknown command '{raw_args[0]}' " + f"(known subcommands: {known})", + file=sys.stderr, + ) + raise SystemExit(2) + + args_parser = get_args() + args = args_parser.parse_args(raw_args) + + if args.input_files and args.input_files[0] == "run": + args.run = True + args.input_files = args.input_files[1:] + + if args.version: + return show_version() + + if not args.shell and args.input_files: + missing = [ + entry for entry in args.input_files if not Path(entry).is_file() + ] + if missing: + print( + f"aix: input file not found: '{missing[0]}'", + file=sys.stderr, + ) + raise SystemExit(2) + + return AixMain().run(**dict(args._get_kwargs())) diff --git a/packages/aix/src/aix/codegen.py b/packages/aix/src/aix/codegen.py new file mode 100644 index 0000000..974dc67 --- /dev/null +++ b/packages/aix/src/aix/codegen.py @@ -0,0 +1,165 @@ +""" +title: AIX LLVM-IR integration helpers. +""" + +from __future__ import annotations + +import os +import tempfile + +from pathlib import Path +from typing import Literal + +import astx + +from irx.analysis.module_interfaces import ImportResolver, ParsedModule +from irx.builder import Builder as LLVMBuilder +from irx.builder import Visitor as LLVMVisitor +from irx.builder.runtime.linking import link_executable +from llvmlite import binding as llvm + + +class AixVisitor(LLVMVisitor): + """ + title: AIX-specific backend visitor customizations. + """ + + pass + + +class AixBuilder(LLVMBuilder): + """ + title: AIX backend builder with AIX overrides. + attributes: + translator: + type: AixVisitor + """ + + LINK_MODES = {"auto", "pie", "no-pie"} + + def __init__(self) -> None: + """ + title: Initialize AixBuilder. + """ + super().__init__() + self.translator: AixVisitor = self._new_translator() + + def _new_translator(self) -> AixVisitor: + """ + title: Create the AIX visitor. + returns: + type: AixVisitor + """ + return AixVisitor( + active_runtime_features=set(self.runtime_feature_names) + ) + + def _build_from_ir_result( + self, + result: str, + output_file: str, + link: bool, + link_mode: Literal["auto", "pie", "no-pie"], + ) -> None: + """ + title: Materialize LLVM IR into either an object file or executable. + parameters: + result: + type: str + output_file: + type: str + link: + type: bool + link_mode: + type: Literal[auto, pie, no-pie] + """ + result_mod = llvm.parse_assembly(result) + result_object = self.translator.target_machine.emit_object(result_mod) + + self.output_file = output_file + + if not link: + with open(self.output_file, "wb") as file_handler: + file_handler.write(result_object) + return + + if link_mode not in self.LINK_MODES: + raise ValueError( + "Invalid link mode. Expected one of: auto, pie, no-pie." + ) + + extra_linker_flags: tuple[str, ...] = () + if link_mode == "pie": + extra_linker_flags = ("-pie",) + elif link_mode == "no-pie": + extra_linker_flags = ("-no-pie",) + + with tempfile.TemporaryDirectory() as temp_dir: + self.tmp_path = temp_dir + file_path_o = Path(temp_dir) / "aix_module.o" + with open(file_path_o, "wb") as file_handler: + file_handler.write(result_object) + + link_executable( + primary_object=file_path_o, + output_file=Path(self.output_file), + artifacts=self.translator.runtime_features.native_artifacts(), + linker_flags=( + *self.translator.runtime_features.linker_flags(), + *extra_linker_flags, + ), + ) + + os.chmod(self.output_file, 0o755) + + def build( + self, + node: astx.AST, + output_file: str, + link: bool = True, + link_mode: Literal["auto", "pie", "no-pie"] = "auto", + ) -> None: + """ + title: >- + Transpile the ASTx to LLVM-IR and build it to an executable file. + parameters: + node: + type: astx.AST + output_file: + type: str + link: + type: bool + link_mode: + type: Literal[auto, pie, no-pie] + """ + result = self.translate(node) + self._build_from_ir_result(result, output_file, link, link_mode) + + def build_modules( + self, + root: ParsedModule, + resolver: ImportResolver, + output_file: str, + link: bool = True, + link_mode: Literal["auto", "pie", "no-pie"] = "auto", + ) -> None: + """ + title: Build a reachable graph of parsed modules. + parameters: + root: + type: ParsedModule + resolver: + type: ImportResolver + output_file: + type: str + link: + type: bool + link_mode: + type: Literal[auto, pie, no-pie] + """ + result = self.translate_modules(root, resolver) + self._build_from_ir_result(result, output_file, link, link_mode) + + +ArxVisitor = AixVisitor +ArxBuilder = AixBuilder diff --git a/packages/aix/src/aix/dataframe.py b/packages/aix/src/aix/dataframe.py new file mode 100644 index 0000000..d2337e4 --- /dev/null +++ b/packages/aix/src/aix/dataframe.py @@ -0,0 +1,248 @@ +""" +title: DataFrame surface helpers for Arx. +summary: >- + Adapt Arx surface dataframe syntax to IRx DataFrame nodes while keeping user- + facing schema rules local to Arx. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import cast + +import astx + +from irx.analysis.resolved_nodes import SemanticInfo +from irx.builtins.collections.dataframe import ( + DATAFRAME_SCHEMA_EXTRA, + DataFrameSchema, + dataframe_column_type_is_supported, + schema_from_type, +) + + +@dataclass(frozen=True) +class DataFrameBinding: + """ + title: Static DataFrame binding metadata. + attributes: + schema: + type: DataFrameSchema + """ + + schema: DataFrameSchema + + +def is_dataframe_type(data_type: astx.DataType | None) -> bool: + """ + title: Return whether one type is a DataFrame type. + parameters: + data_type: + type: astx.DataType | None + returns: + type: bool + """ + return isinstance(data_type, astx.DataFrameType) + + +def is_series_type(data_type: astx.DataType | None) -> bool: + """ + title: Return whether one type is a Series type. + parameters: + data_type: + type: astx.DataType | None + returns: + type: bool + """ + return isinstance(data_type, astx.SeriesType) + + +def dataframe_type( + columns: tuple[astx.DataFrameColumn, ...], +) -> astx.DataFrameType: + """ + title: Build one static-schema DataFrame surface type. + parameters: + columns: + type: tuple[astx.DataFrameColumn, Ellipsis] + returns: + type: astx.DataFrameType + """ + if not columns: + raise ValueError("dataframe types require at least one column") + seen: set[str] = set() + for column in columns: + if column.name in seen: + raise ValueError(f"duplicate dataframe column '{column.name}'") + seen.add(column.name) + if column.nullable: + raise ValueError( + "nullable dataframe columns are not supported yet" + ) + if not dataframe_column_type_is_supported(column.type_): + raise ValueError( + "dataframe columns currently support only fixed-width " + "numeric and bool types" + ) + return astx.DataFrameType(columns) + + +def runtime_dataframe_type() -> astx.DataFrameType: + """ + title: Build one runtime-schema DataFrame surface type. + returns: + type: astx.DataFrameType + """ + return astx.DataFrameType() + + +def series_type(element_type: astx.DataType) -> astx.SeriesType: + """ + title: Build one Series surface type. + parameters: + element_type: + type: astx.DataType + returns: + type: astx.SeriesType + """ + if not dataframe_column_type_is_supported(element_type): + raise ValueError( + "series element types currently support only fixed-width " + "numeric and bool types" + ) + return astx.SeriesType(element_type) + + +def binding_from_type( + data_type: astx.DataType | None, +) -> DataFrameBinding | None: + """ + title: Build one static DataFrame binding from one declared type. + parameters: + data_type: + type: astx.DataType | None + returns: + type: DataFrameBinding | None + """ + if not isinstance(data_type, astx.DataFrameType): + return None + schema = schema_from_type(data_type) + if schema is None: + return None + return DataFrameBinding(schema) + + +def attach_binding(node: astx.AST, binding: DataFrameBinding) -> None: + """ + title: Attach static DataFrame metadata to one AST node. + parameters: + node: + type: astx.AST + binding: + type: DataFrameBinding + """ + info = cast(SemanticInfo | None, getattr(node, "semantic", None)) + if info is None or not isinstance(info, SemanticInfo): + info = SemanticInfo() + setattr(node, "semantic", info) + info.extras[DATAFRAME_SCHEMA_EXTRA] = binding.schema + + +def coerce_expression( + expr: astx.Expr, + target_type: astx.DataType, + *, + context: str, +) -> astx.Expr: + """ + title: Coerce one parsed expression into one declared DataFrame type. + parameters: + expr: + type: astx.Expr + target_type: + type: astx.DataType + context: + type: str + returns: + type: astx.Expr + """ + del context + if not isinstance(target_type, astx.DataFrameType): + return expr + if not isinstance(expr, astx.DataFrameLiteral): + return expr + binding = binding_from_type(target_type) + if binding is None: + raise ValueError( + "dataframe literals require a static dataframe schema" + ) + coerced = astx.DataFrameLiteral( + _columns_in_schema_order(expr, binding.schema), + type_=target_type, + ) + attach_binding(coerced, binding) + return coerced + + +def column_type( + binding: DataFrameBinding, + column_name: str, +) -> astx.DataType | None: + """ + title: Return the type of one DataFrame column. + parameters: + binding: + type: DataFrameBinding + column_name: + type: str + returns: + type: astx.DataType | None + """ + column = binding.schema.column(column_name) + return None if column is None else column.type_ + + +def _columns_in_schema_order( + literal: astx.DataFrameLiteral, + schema: DataFrameSchema, +) -> tuple[astx.DataFrameLiteralColumn, ...]: + """ + title: Return literal columns ordered by schema. + parameters: + literal: + type: astx.DataFrameLiteral + schema: + type: DataFrameSchema + returns: + type: tuple[astx.DataFrameLiteralColumn, Ellipsis] + """ + literal_columns = {column.name: column for column in literal.columns} + ordered: list[astx.DataFrameLiteralColumn] = [] + for schema_column in schema.columns: + column = literal_columns.get(schema_column.name) + if column is None: + raise ValueError( + f"dataframe literal is missing column '{schema_column.name}'" + ) + ordered.append(column) + schema_column_names = {column.name for column in schema.columns} + extra = sorted(set(literal_columns) - schema_column_names) + if extra: + raise ValueError( + "dataframe literal has undeclared columns: " + ", ".join(extra) + ) + return tuple(ordered) + + +__all__ = [ + "DataFrameBinding", + "attach_binding", + "binding_from_type", + "coerce_expression", + "column_type", + "dataframe_type", + "is_dataframe_type", + "is_series_type", + "runtime_dataframe_type", + "series_type", +] diff --git a/packages/aix/src/aix/docstrings.py b/packages/aix/src/aix/docstrings.py new file mode 100644 index 0000000..a7b2bb4 --- /dev/null +++ b/packages/aix/src/aix/docstrings.py @@ -0,0 +1,61 @@ +""" +title: Validate Arx docstrings against the Douki YAML schema. +""" + +from __future__ import annotations + +import json +import textwrap + +from functools import lru_cache +from importlib.resources import files +from typing import Any, cast + +import yaml + +from jsonschema import ValidationError, validate + + +@lru_cache(maxsize=1) +def _schema() -> dict[str, Any]: + """ + title: Load and cache the Douki JSON schema used by Arx. + returns: + type: dict[str, Any] + """ + with ( + files("aix.schema").joinpath("douki.json").open(encoding="utf-8") as fh + ): + return cast(dict[str, Any], json.load(fh)) + + +def validate_docstring(raw: str) -> dict[str, Any]: + """ + title: Validate docstring content as Douki YAML. + parameters: + raw: + type: str + description: Raw text found inside the docstring block. + returns: + type: dict[str, Any] + """ + normalized = textwrap.dedent(raw).strip() + if not normalized: + raise ValueError("Docstring block cannot be empty.") + + try: + data = yaml.safe_load(normalized) + except yaml.YAMLError as err: + raise ValueError("Docstring content must be valid YAML.") from err + + if not isinstance(data, dict): + raise ValueError("Docstring YAML must define an object mapping.") + + try: + validate(instance=data, schema=_schema()) + except ValidationError as err: + raise ValueError( + f"Docstring YAML does not follow douki schema: {err.message}" + ) from err + + return cast(dict[str, Any], data) diff --git a/packages/aix/src/aix/exceptions.py b/packages/aix/src/aix/exceptions.py new file mode 100644 index 0000000..32c8681 --- /dev/null +++ b/packages/aix/src/aix/exceptions.py @@ -0,0 +1,33 @@ +""" +title: AIX frontend exceptions. +""" + + +class ParserException(Exception): + """ + title: Handle parser-specific errors. + """ + + def __init__(self, message: str) -> None: + """ + title: Initialize parser exception. + parameters: + message: + type: str + """ + super().__init__(message) + + +class CodeGenException(Exception): + """ + title: Handle code generation errors. + """ + + def __init__(self, message: str) -> None: + """ + title: Initialize code generation exception. + parameters: + message: + type: str + """ + super().__init__(message) diff --git a/packages/aix/src/aix/io.py b/packages/aix/src/aix/io.py new file mode 100644 index 0000000..ed83f60 --- /dev/null +++ b/packages/aix/src/aix/io.py @@ -0,0 +1,171 @@ +""" +title: AIX compiler input buffer helpers. +""" + +from __future__ import annotations + +import os +import sys +import tempfile + + +class AixBuffer: + """ + title: Shared source buffer used by lexer and CLI flows. + attributes: + buffer: + type: str + position: + type: int + """ + + buffer: str + position: int + + def __init__(self) -> None: + """ + title: Initialize the buffer. + """ + self.clean() + + def clean(self) -> None: + """ + title: Reset buffer content and cursor. + """ + self.buffer = "" + self.position = 0 + + def write(self, text: str) -> None: + """ + title: Replace buffer content. + parameters: + text: + type: str + """ + self.buffer = text + self.position = 0 + + def read(self) -> str: + """ + title: Return the next buffered character or EOF marker. + returns: + type: str + """ + if self.position >= len(self.buffer): + return "" + char = self.buffer[self.position] + self.position += 1 + return char + + +class AixIO: + """ + title: AIX input loading facade. + attributes: + INPUT_FROM_STDIN: + type: bool + INPUT_FILE: + type: str + EOF: + type: int + buffer: + type: AixBuffer + """ + + INPUT_FROM_STDIN: bool = False + INPUT_FILE: str = "" + EOF: int = sys.maxunicode + 1 + buffer: AixBuffer = AixBuffer() + + @classmethod + def get_char(cls) -> str: + """ + title: Return one character from stdin or the shared buffer. + returns: + type: str + """ + if cls.INPUT_FROM_STDIN: + return sys.stdin.read(1) + return cls.buffer.read() + + @classmethod + def file_to_buffer(cls, filename: str) -> None: + """ + title: Load one source file into the shared buffer. + parameters: + filename: + type: str + """ + with open(filename, encoding="utf-8") as aix_file: + cls.buffer.clean() + cls.buffer.write(aix_file.read()) + + @classmethod + def string_to_buffer(cls, value: str) -> None: + """ + title: Load one source string into the shared buffer. + parameters: + value: + type: str + """ + cls.buffer.clean() + cls.buffer.write(value) + + @classmethod + def load_input_to_buffer(cls) -> None: + """ + title: Load configured file or stdin into the shared buffer. + """ + if cls.INPUT_FILE: + input_file_path = os.path.abspath(cls.INPUT_FILE) + cls.file_to_buffer(input_file_path) + return + + file_content = sys.stdin.read().strip() + if file_content: + cls.string_to_buffer(file_content) + + +class AixFile: + """ + title: Temporary file helpers used by backend flows. + """ + + @staticmethod + def create_tmp_file(content: str) -> str: + """ + title: Create a temporary C++ file with given content. + parameters: + content: + type: str + returns: + type: str + """ + with tempfile.NamedTemporaryFile(delete=False) as tmpfile: + tmpfile.write(content.encode()) + + filename = tmpfile.name + filename_ext = filename + ".cpp" + os.rename(filename, filename_ext) + return filename_ext + + @staticmethod + def delete_file(filename: str) -> int: + """ + title: Delete one file if present. + parameters: + filename: + type: str + returns: + type: int + """ + try: + os.remove(filename) + return 0 + except OSError: + return -1 + + +ArxBuffer = AixBuffer +ArxIO = AixIO +ArxFile = AixFile diff --git a/packages/aix/src/aix/lexer/__init__.py b/packages/aix/src/aix/lexer/__init__.py new file mode 100644 index 0000000..18c55d6 --- /dev/null +++ b/packages/aix/src/aix/lexer/__init__.py @@ -0,0 +1,13 @@ +""" +title: AIX lexer subpackage. +""" + +from aix.lexer.core import Lexer, LexerError, Token, TokenKind, TokenList + +__all__ = [ + "Lexer", + "LexerError", + "Token", + "TokenKind", + "TokenList", +] diff --git a/packages/aix/src/aix/lexer/core.py b/packages/aix/src/aix/lexer/core.py new file mode 100644 index 0000000..da32d12 --- /dev/null +++ b/packages/aix/src/aix/lexer/core.py @@ -0,0 +1,580 @@ +# ruff: noqa: RUF001 +""" +title: AIX Unicode lexer. +""" + +from __future__ import annotations + +import copy +import unicodedata + +from dataclasses import dataclass, field +from enum import Enum, auto +from typing import Any, Final + +from astx import SourceLocation + +from aix.io import AixIO + +EOF_VALUE: Final[str] = "" + + +class TokenKind(Enum): + """ + title: Token kinds emitted by the AIX lexer. + """ + + eof = auto() + identifier = auto() + + integer = auto() + float = auto() + string = auto() + boolean = auto() + unit = auto() + + define = auto() + metadata = auto() + semantic_lbracket = auto() + semantic_rbracket = auto() + index_lbracket = auto() + index_rbracket = auto() + tuple_lbracket = auto() + tuple_rbracket = auto() + + arrow = auto() + turnstile = auto() + implies = auto() + bind = auto() + assign = auto() + emit = auto() + end = auto() + lambda_ = auto() + + colon = auto() + comma = auto() + semicolon = auto() + dot = auto() + range = auto() + ellipsis = auto() + + lparen = auto() + rparen = auto() + lbrace = auto() + rbrace = auto() + + plus = auto() + minus = auto() + star = auto() + multiply = auto() + slash = auto() + percent = auto() + power = auto() + + equal = auto() + not_equal = auto() + less = auto() + greater = auto() + less_equal = auto() + greater_equal = auto() + equivalent = auto() + congruent = auto() + + and_ = auto() + or_ = auto() + not_ = auto() + + primitive_type = auto() + symbolic_operator = auto() + not_initialized = auto() + + +@dataclass +class Token: + """ + title: Store one token kind, value, and source location. + attributes: + kind: + type: TokenKind + value: + type: Any + location: + type: SourceLocation + """ + + kind: TokenKind + value: Any + location: SourceLocation = field( + default_factory=lambda: SourceLocation(1, 1) + ) + + def __post_init__(self) -> None: + """ + title: Copy location values away from mutable defaults. + """ + self.location = copy.deepcopy(self.location) + + def __hash__(self) -> int: + """ + title: Return a hash for this token. + returns: + type: int + """ + return hash((self.kind, self.value)) + + def get_name(self) -> str: + """ + title: Return one user-facing token name. + returns: + type: str + """ + return self.kind.name.removesuffix("_") + + def get_display_value(self) -> str: + """ + title: Return a compact display value for token printing. + returns: + type: str + """ + if self.kind in { + TokenKind.identifier, + TokenKind.integer, + TokenKind.float, + TokenKind.boolean, + TokenKind.primitive_type, + TokenKind.unit, + TokenKind.symbolic_operator, + }: + return f"({self.value})" + if self.kind == TokenKind.string: + return "(...)" + if self.value not in (None, "") and self.kind != TokenKind.eof: + return f"({self.value})" + return "" + + def __eq__(self, other: object) -> bool: + """ + title: Compare tokens by kind and value. + parameters: + other: + type: object + returns: + type: bool + """ + if not isinstance(other, Token): + return False + return (self.kind, self.value) == (other.kind, other.value) + + def __str__(self) -> str: + """ + title: Return one human-readable token representation. + returns: + type: str + """ + return f"{self.get_name()}{self.get_display_value()}" + + +class TokenList: + """ + title: Simple token stream consumed by the parser. + attributes: + tokens: + type: list[Token] + position: + type: int + cur_tok: + type: Token + """ + + tokens: list[Token] + position: int + cur_tok: Token + + def __init__(self, tokens: list[Token]) -> None: + """ + title: Initialize the token stream. + parameters: + tokens: + type: list[Token] + """ + self.tokens = tokens + self.position = 0 + self.cur_tok = Token(TokenKind.not_initialized, "") + + def __iter__(self) -> TokenList: + """ + title: Reset iteration and return this stream. + returns: + type: TokenList + """ + self.position = 0 + return self + + def __next__(self) -> Token: + """ + title: Return the next token for iteration. + returns: + type: Token + """ + if self.position == len(self.tokens): + raise StopIteration + return self.get_token() + + def get_token(self) -> Token: + """ + title: Return the next token and advance. + returns: + type: Token + """ + token = self.tokens[self.position] + self.position += 1 + return token + + def get_next_token(self) -> Token: + """ + title: Advance parser cursor and return the current token. + returns: + type: Token + """ + self.cur_tok = self.get_token() + return self.cur_tok + + +class LexerError(Exception): + """ + title: AIX-specific lexer error with source location. + attributes: + location: + type: SourceLocation + """ + + location: SourceLocation + + def __init__(self, message: str, location: SourceLocation): + """ + title: Initialize a lexer error. + parameters: + message: + type: str + location: + type: SourceLocation + """ + super().__init__( + "AIX lexer error at line " + f"{location.line}, column {location.col}: {message}" + ) + self.location = location + + +PRIMITIVE_TYPES: Final[frozenset[str]] = frozenset( + { + "ℕ", + "ℤ", + "ℝ", + "ℂ", + "𝔹", + "i8", + "i16", + "i32", + "i64", + "u8", + "u16", + "u32", + "u64", + "f32", + "f64", + } +) + +_RESERVED_OPERATORS: Final[frozenset[str]] = frozenset( + {"⍴", "⍳", "¨", "∘", "↑", "↓", "⍋", "⍒", "∊", "∪", "∑", "∫", "∂"} +) + +_SYMBOL_TOKENS: Final[dict[str, TokenKind]] = { + "...": TokenKind.ellipsis, + "..": TokenKind.range, + "->": TokenKind.arrow, + "<=": TokenKind.less_equal, + ">=": TokenKind.greater_equal, + "!=": TokenKind.not_equal, + "==": TokenKind.equal, + "∴": TokenKind.define, + "κ": TokenKind.metadata, + "⟦": TokenKind.semantic_lbracket, + "⟧": TokenKind.semantic_rbracket, + "⟬": TokenKind.index_lbracket, + "⟭": TokenKind.index_rbracket, + "⟨": TokenKind.tuple_lbracket, + "⟩": TokenKind.tuple_rbracket, + "→": TokenKind.arrow, + "⊢": TokenKind.turnstile, + "⇒": TokenKind.implies, + "⌁": TokenKind.bind, + "≔": TokenKind.assign, + "⟣": TokenKind.emit, + "∎": TokenKind.end, + "λ": TokenKind.lambda_, + ":": TokenKind.colon, + ",": TokenKind.comma, + ";": TokenKind.semicolon, + ".": TokenKind.dot, + "(": TokenKind.lparen, + ")": TokenKind.rparen, + "{": TokenKind.lbrace, + "}": TokenKind.rbrace, + "+": TokenKind.plus, + "-": TokenKind.minus, + "*": TokenKind.star, + "×": TokenKind.multiply, + "/": TokenKind.slash, + "%": TokenKind.percent, + "^": TokenKind.power, + "=": TokenKind.equal, + "≠": TokenKind.not_equal, + "<": TokenKind.less, + ">": TokenKind.greater, + "≤": TokenKind.less_equal, + "≥": TokenKind.greater_equal, + "≡": TokenKind.equivalent, + "≅": TokenKind.congruent, + "∧": TokenKind.and_, + "∨": TokenKind.or_, + "¬": TokenKind.not_, +} + + +class Lexer: + """ + title: Tokenize AIX source text. + attributes: + source: + type: str | None + """ + + source: str | None + + def __init__(self, source: str | None = None) -> None: + """ + title: Initialize the lexer. + parameters: + source: + type: str | None + """ + self.source = source + + def clean(self) -> None: + """ + title: Keep API compatibility with the Arx-derived lexer. + """ + + def lex(self) -> TokenList: + """ + title: Tokenize configured source and return a token stream. + returns: + type: TokenList + """ + source = ( + self.source if self.source is not None else AixIO.buffer.buffer + ) + return self._tokenize_source(unicodedata.normalize("NFC", source)) + + def tokenize(self) -> TokenList: + """ + title: Alias for lex used by tests and callers. + returns: + type: TokenList + """ + return self.lex() + + def _tokenize_source(self, source: str) -> TokenList: + tokens: list[Token] = [] + index = 0 + line = 1 + col = 1 + + def location() -> SourceLocation: + return SourceLocation(line, col) + + def advance() -> str: + nonlocal index, line, col + char = source[index] + index += 1 + if char == "\n": + line += 1 + col = 1 + else: + col += 1 + return char + + while index < len(source): + char = source[index] + + if char.isspace(): + advance() + continue + + if char == "⍝": + while index < len(source) and source[index] not in "\r\n": + advance() + continue + + token_location = location() + + if char in {'"', "'"}: + token, consumed = self._read_string( + source, index, token_location + ) + tokens.append(token) + for _ in range(consumed): + advance() + continue + + if char.isdigit() or ( + char == "." + and index + 1 < len(source) + and source[index + 1].isdigit() + ): + number_value, kind, consumed = self._read_number(source, index) + tokens.append(Token(kind, number_value, token_location)) + for _ in range(consumed): + advance() + continue + + matched = self._match_symbol(source, index) + if matched is not None: + symbol, kind = matched + symbol_value: Any = symbol + if symbol == "⊤": + kind = TokenKind.boolean + symbol_value = True + elif symbol == "⊥": + kind = TokenKind.boolean + symbol_value = False + elif symbol == "∅": + kind = TokenKind.unit + symbol_value = None + tokens.append(Token(kind, symbol_value, token_location)) + for _ in symbol: + advance() + continue + + if self._is_identifier_start(char): + identifier = char + advance() + while index < len(source) and self._is_identifier_part( + source[index] + ): + identifier += source[index] + advance() + + if identifier in {"true", "false"}: + tokens.append( + Token( + TokenKind.boolean, + identifier == "true", + token_location, + ) + ) + continue + if identifier in PRIMITIVE_TYPES: + tokens.append( + Token( + TokenKind.primitive_type, + identifier, + token_location, + ) + ) + continue + tokens.append( + Token(TokenKind.identifier, identifier, token_location) + ) + continue + + if char in _RESERVED_OPERATORS: + tokens.append( + Token(TokenKind.symbolic_operator, char, token_location) + ) + advance() + continue + + raise LexerError(f"unknown symbol {char!r}", token_location) + + tokens.append(Token(TokenKind.eof, "", SourceLocation(line, col))) + return TokenList(tokens) + + def _match_symbol( + self, source: str, index: int + ) -> tuple[str, TokenKind] | None: + for symbol, kind in sorted( + _SYMBOL_TOKENS.items(), key=lambda item: len(item[0]), reverse=True + ): + if source.startswith(symbol, index): + return symbol, kind + if source.startswith("⊤", index): + return "⊤", TokenKind.boolean + if source.startswith("⊥", index): + return "⊥", TokenKind.boolean + if source.startswith("∅", index): + return "∅", TokenKind.unit + return None + + def _read_number( + self, source: str, start: int + ) -> tuple[int | float, TokenKind, int]: + index = start + dots = 0 + while index < len(source): + char = source[index] + if char == ".": + dots += 1 + if dots > 1: + raise LexerError( + "invalid number format: multiple decimal points", + SourceLocation(1, start + 1), + ) + index += 1 + continue + if not char.isdigit(): + break + index += 1 + + raw = source[start:index] + if dots: + return float(raw), TokenKind.float, len(raw) + return int(raw), TokenKind.integer, len(raw) + + def _read_string( + self, source: str, start: int, loc: SourceLocation + ) -> tuple[Token, int]: + quote = source[start] + index = start + 1 + content = "" + while index < len(source) and source[index] not in {quote, "\n", "\r"}: + char = source[index] + if char == "\\": + index += 1 + if index >= len(source): + raise LexerError("unterminated string literal", loc) + escapes = { + "n": "\n", + "t": "\t", + "r": "\r", + "\\": "\\", + "'": "'", + '"': '"', + } + content += escapes.get(source[index], source[index]) + index += 1 + continue + content += char + index += 1 + + if index >= len(source) or source[index] != quote: + raise LexerError("unterminated string literal", loc) + return Token(TokenKind.string, content, loc), index - start + 1 + + def _is_identifier_start(self, char: str) -> bool: + return char == "_" or char.isalpha() + + def _is_identifier_part(self, char: str) -> bool: + return char == "_" or char.isalpha() or char.isdigit() diff --git a/packages/aix/src/aix/lexer/syntax.json b/packages/aix/src/aix/lexer/syntax.json new file mode 100644 index 0000000..23a8aa4 --- /dev/null +++ b/packages/aix/src/aix/lexer/syntax.json @@ -0,0 +1,58 @@ +{ + "name": "aix", + "extensions": [".aix"], + "comments": { + "line": "⍝" + }, + "symbols": { + "define": "∴", + "metadata": "κ", + "semantic_lbracket": "⟦", + "semantic_rbracket": "⟧", + "return": "⊢", + "conditional_return": "⇒", + "bind": "⌁", + "assign": "≔", + "emit": "⟣", + "end": "∎", + "arrow": "→" + }, + "types": [ + "ℕ", + "ℤ", + "ℝ", + "ℂ", + "𝔹", + "∅", + "i8", + "i16", + "i32", + "i64", + "u8", + "u16", + "u32", + "u64", + "f32", + "f64" + ], + "literals": { + "true": "⊤", + "false": "⊥", + "unit": "∅" + }, + "reserved_operators": [ + "⍴", + "⍳", + "¨", + "∘", + "↑", + "↓", + "⍋", + "⍒", + "∊", + "∪", + "∑", + "∫", + "∂" + ] +} diff --git a/packages/aix/src/aix/lexer/syntax.py b/packages/aix/src/aix/lexer/syntax.py new file mode 100644 index 0000000..f24c631 --- /dev/null +++ b/packages/aix/src/aix/lexer/syntax.py @@ -0,0 +1,65 @@ +""" +title: Helpers for the bundled AIX lexer syntax manifest. +""" + +from __future__ import annotations + +import json + +from dataclasses import dataclass +from importlib import resources +from typing import Any + + +@dataclass(frozen=True) +class SyntaxManifest: + """ + title: Loaded syntax manifest facade. + attributes: + data: + type: dict[str, Any] + """ + + data: dict[str, Any] + + @property + def line_comment_delimiters(self) -> tuple[str, ...]: + """ + title: Return configured line comment delimiters. + returns: + type: tuple[str, Ellipsis] + """ + comments = self.data.get("comments", {}) + line = comments.get("line", "⍝") + if isinstance(line, str): + return (line,) + return tuple(line) + + @property + def reserved_operators(self) -> set[str]: + """ + title: Return reserved AIX operator symbols. + returns: + type: set[str] + """ + return set(self.data.get("reserved_operators", [])) + + @property + def types(self) -> set[str]: + """ + title: Return primitive type spellings. + returns: + type: set[str] + """ + return set(self.data.get("types", [])) + + +def load_syntax_manifest() -> SyntaxManifest: + """ + title: Load the lexer syntax manifest bundled with AIX. + returns: + type: SyntaxManifest + """ + manifest = resources.files("aix.lexer").joinpath("syntax.json") + data = json.loads(manifest.read_text(encoding="utf-8")) + return SyntaxManifest(data) diff --git a/packages/aix/src/aix/logs.py b/packages/aix/src/aix/logs.py new file mode 100644 index 0000000..8886ead --- /dev/null +++ b/packages/aix/src/aix/logs.py @@ -0,0 +1,19 @@ +""" +title: The logs functions and classes handle all the system messages. +""" + +import sys + + +def LogError(message: str) -> None: + """ + title: LogError - A helper function for error handling. + parameters: + message: + type: str + description: The error message. + """ + print(f"Error: {message}\n", file=sys.stderr) + + +LogErrorV = LogError diff --git a/packages/aix/src/aix/main.py b/packages/aix/src/aix/main.py new file mode 100644 index 0000000..fc524f7 --- /dev/null +++ b/packages/aix/src/aix/main.py @@ -0,0 +1,270 @@ +""" +title: AIX main compiler frontend orchestration. +""" + +from __future__ import annotations + +import subprocess + +from fnmatch import fnmatch +from pathlib import Path +from typing import Any, Literal, cast + +import astx + +from aix.io import AixIO +from aix.lexer import Lexer +from aix.parser import Parser + +SOURCE_EXTENSION = ".aix" + + +def get_module_name_from_file_path(filepath: str) -> str: + """ + title: Return module name from one AIX source path. + parameters: + filepath: + type: str + returns: + type: str + """ + return Path(filepath).with_suffix("").name + + +class AixMain: + """ + title: Main AIX frontend facade used by the CLI. + attributes: + input_files: + type: list[str] + output_file: + type: str + is_lib: + type: bool + link_mode: + type: Literal[auto, pie, no-pie] + """ + + input_files: list[str] + output_file: str + is_lib: bool + link_mode: Literal["auto", "pie", "no-pie"] + + def __init__(self) -> None: + """ + title: Initialize AIX main state. + """ + self.input_files = [] + self.output_file = "" + self.is_lib = False + self.link_mode = "auto" + + def _get_astx(self) -> astx.AST: + """ + title: Parse configured input files into ASTx nodes. + returns: + type: astx.AST + """ + parser = Parser() + modules: list[astx.Module] = [] + for input_file in self.input_files: + AixIO.file_to_buffer(input_file) + module_name = get_module_name_from_file_path(input_file) + modules.append(parser.parse(Lexer().lex(), module_name)) + + if len(modules) == 1: + return modules[0] + + tree_ast = astx.Block() + tree_ast.nodes.extend(modules) + return tree_ast + + def _resolve_output_file(self) -> str: + if self.output_file: + return self.output_file + if not self.input_files: + return "a.out" + return Path(self.input_files[0]).stem or "a.out" + + def _has_main_entry(self, node: astx.AST) -> bool: + modules: list[astx.Module] = [] + if isinstance(node, astx.Module): + modules = [node] + elif isinstance(node, astx.Block): + modules = [ + item for item in node.nodes if isinstance(item, astx.Module) + ] + + for module in modules: + for module_node in module.nodes: + if ( + isinstance(module_node, astx.FunctionDef) + and module_node.prototype.name == "main" + ): + return True + return False + + def run(self, **kwargs: Any) -> None: + """ + title: Run one AIX compiler/frontend action. + parameters: + kwargs: + type: Any + variadic: keyword + """ + self.input_files = list(kwargs.get("input_files", [])) + output_file = kwargs.get("output_file") + self.output_file = output_file.strip() if output_file else "" + self.is_lib = bool(kwargs.get("is_lib", False)) + link_mode = str(kwargs.get("link_mode", "auto")).strip().lower() + if link_mode not in {"auto", "pie", "no-pie"}: + raise ValueError( + "Invalid link mode. Expected auto, pie, or no-pie." + ) + self.link_mode = cast(Literal["auto", "pie", "no-pie"], link_mode) + + if kwargs.get("show_ast"): + return self.show_ast() + if kwargs.get("show_tokens"): + return self.show_tokens() + if kwargs.get("show_llvm_ir"): + return self.show_llvm_ir() + if kwargs.get("shell"): + return self.run_shell() + if not self.input_files: + return None + + emits_executable = self.compile() + if kwargs.get("run"): + if not emits_executable: + raise ValueError("`--run` requires an AIX main function.") + self.run_binary() + return None + + def run_tests(self, **kwargs: Any) -> int: + """ + title: Discover and parse AIX test source files. + parameters: + kwargs: + type: Any + variadic: keyword + returns: + type: int + """ + paths = tuple(kwargs.get("paths") or ("tests/aix",)) + file_pattern = str(kwargs.get("file_pattern") or "test_*.aix") + excludes = tuple(kwargs.get("exclude") or ()) + list_only = bool(kwargs.get("list_only", False)) + + files = self._discover_test_files(paths, file_pattern, excludes) + if list_only: + for path in files: + print(path) + return 0 + + parser = Parser() + for path in files: + try: + AixIO.file_to_buffer(str(path)) + parser.parse(Lexer().lex(), path.stem) + except Exception as err: + print(f"FAILED {path}: {err}") + return 1 + print(f"PASSED {path}") + return 0 + + def _discover_test_files( + self, + paths: tuple[str, ...], + file_pattern: str, + excludes: tuple[str, ...], + ) -> list[Path]: + files: list[Path] = [] + for entry in paths: + path = Path(entry) + if path.is_file() and self._is_included_test(path, excludes): + files.append(path) + continue + if not path.is_dir(): + continue + for candidate in sorted(path.rglob(file_pattern)): + if candidate.is_file() and self._is_included_test( + candidate, + excludes, + ): + files.append(candidate) + return files + + def _is_included_test(self, path: Path, excludes: tuple[str, ...]) -> bool: + text = path.as_posix() + return not any(fnmatch(text, pattern) for pattern in excludes) + + def show_ast(self) -> None: + """ + title: Print AST for configured input files. + """ + tree_ast = self._get_astx() + if hasattr(tree_ast, "to_json"): + print(tree_ast.to_json()) + return + print(repr(tree_ast)) + + def show_tokens(self) -> None: + """ + title: Print token stream for configured input files. + """ + for input_file in self.input_files: + AixIO.file_to_buffer(input_file) + for token in Lexer().lex(): + print(token) + + def show_llvm_ir(self) -> None: + """ + title: Translate configured input to LLVM IR when backend supports it. + """ + from aix.codegen import AixBuilder + + print(AixBuilder().translate(self._get_astx())) + + def run_shell(self) -> None: + """ + title: Open shell mode. + """ + raise NotImplementedError("AIX shell is not implemented yet.") + + def run_binary(self) -> None: + """ + title: Run generated binary. + """ + binary_path = Path(self.output_file) + if not binary_path.is_absolute(): + binary_path = Path.cwd() / binary_path + result = subprocess.run([str(binary_path)], check=False) + if result.returncode != 0: + raise SystemExit(result.returncode) + + def compile(self, show_llvm_ir: bool = False) -> bool: + """ + title: Compile configured input with the existing IRx backend. + parameters: + show_llvm_ir: + type: bool + returns: + type: bool + """ + _ = show_llvm_ir + from aix.codegen import AixBuilder + + tree_ast = self._get_astx() + self.output_file = self._resolve_output_file() + emits_executable = not self.is_lib and self._has_main_entry(tree_ast) + AixBuilder().build( + tree_ast, + output_file=self.output_file, + link=emits_executable, + link_mode=self.link_mode, + ) + return emits_executable + + +ArxMain = AixMain diff --git a/packages/aix/src/aix/package_index.py b/packages/aix/src/aix/package_index.py new file mode 100644 index 0000000..a574c02 --- /dev/null +++ b/packages/aix/src/aix/package_index.py @@ -0,0 +1,482 @@ +""" +title: Discover installed Arx source packages. +summary: >- + Build a scoped index of Arx packages installed as Python distributions. +""" + +from __future__ import annotations + +import re +import sys + +from collections.abc import Iterable +from dataclasses import dataclass, field +from importlib import metadata as importlib_metadata +from pathlib import Path +from typing import Any + +from packaging.requirements import InvalidRequirement, Requirement + +from aix import builtins as arx_builtins +from aix import settings as arx_settings + +if sys.version_info >= (3, 11): + import tomllib +else: # pragma: no cover + import tomli as tomllib + +_ARX_MODULE_NAME_PATTERN = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") +_DEPENDENCY_NAME_PATTERN = re.compile( + r"^\s*(?P[A-Za-z0-9][A-Za-z0-9._-]*)" +) +_DISTRIBUTION_NORMALIZE_PATTERN = re.compile(r"[-_.]+") +_RESERVED_MODULE_NAMES = frozenset( + { + "stdlib", + arx_builtins.BUILTIN_NAMESPACE, + } +) +_SOURCE_SUFFIXES = frozenset({".x"}) + + +@dataclass(frozen=True) +class InstalledArxPackage: + """ + title: One installed Arx source package. + attributes: + module_name: + type: str + source_root: + type: Path + distribution_name: + type: str + """ + + module_name: str + source_root: Path + distribution_name: str + + +@dataclass(frozen=True) +class InstalledArxPackageIndex: + """ + title: Indexed installed Arx source packages. + attributes: + packages: + type: dict[str, InstalledArxPackage] + missing_distributions: + type: frozenset[str] + conflicts: + type: dict[str, tuple[InstalledArxPackage, Ellipsis]] + """ + + packages: dict[str, InstalledArxPackage] = field(default_factory=dict) + missing_distributions: frozenset[str] = frozenset() + conflicts: dict[str, tuple[InstalledArxPackage, ...]] = field( + default_factory=dict + ) + + def missing_distribution_for_module(self, module_name: str) -> str | None: + """ + title: Return a missing distribution matching one import head. + parameters: + module_name: + type: str + returns: + type: str | None + """ + normalized = normalize_distribution_name(module_name) + for distribution_name in self.missing_distributions: + if normalize_distribution_name(distribution_name) == normalized: + return distribution_name + return None + + +def normalize_distribution_name(name: str) -> str: + """ + title: Normalize a Python distribution name for comparisons. + parameters: + name: + type: str + returns: + type: str + """ + return _DISTRIBUTION_NORMALIZE_PATTERN.sub("-", name).lower() + + +def extract_dependency_name(dependency: str) -> str | None: + """ + title: Extract the distribution name from one dependency string. + parameters: + dependency: + type: str + returns: + type: str | None + """ + try: + return Requirement(dependency).name + except InvalidRequirement: + pass + + match = _DEPENDENCY_NAME_PATTERN.match(dependency) + if match is None: + return None + return match.group("name") + + +def active_requirement_name(requirement_text: str) -> str | None: + """ + title: Return the distribution name for an active metadata requirement. + parameters: + requirement_text: + type: str + returns: + type: str | None + """ + try: + requirement = Requirement(requirement_text) + except InvalidRequirement: + return None + + if requirement.marker is not None and not requirement.marker.evaluate(): + return None + return requirement.name + + +def discover_installed_arx_packages( + start: Path | None = None, +) -> InstalledArxPackageIndex: + """ + title: Discover installed Arx packages from project dependencies. + parameters: + start: + type: Path | None + returns: + type: InstalledArxPackageIndex + """ + config = arx_settings.find_config_file(start=start) + if config is None: + return InstalledArxPackageIndex() + + try: + settings = arx_settings.load_settings(config) + except arx_settings.ArxProjectError: + return InstalledArxPackageIndex() + + return discover_installed_arx_packages_from_dependencies( + settings.project.dependencies + ) + + +def discover_installed_arx_packages_from_dependencies( + dependencies: Iterable[str], +) -> InstalledArxPackageIndex: + """ + title: Discover installed Arx packages from dependency strings. + parameters: + dependencies: + type: Iterable[str] + returns: + type: InstalledArxPackageIndex + """ + package_entries: dict[str, InstalledArxPackage] = {} + conflict_entries: dict[str, tuple[InstalledArxPackage, ...]] = {} + missing_distributions: set[str] = set() + pending = [ + dependency_name + for dependency in dependencies + if (dependency_name := extract_dependency_name(dependency)) is not None + ] + visited: set[str] = set() + + while pending: + dependency_name = pending.pop(0) + normalized_name = normalize_distribution_name(dependency_name) + if normalized_name in visited: + continue + visited.add(normalized_name) + + try: + distribution = importlib_metadata.distribution(dependency_name) + except importlib_metadata.PackageNotFoundError: + missing_distributions.add(dependency_name) + continue + + for package in _arx_packages_from_distribution(distribution): + _add_package(package_entries, conflict_entries, package) + + for requirement in distribution.requires or (): + requirement_name = active_requirement_name(requirement) + if requirement_name is None: + continue + if normalize_distribution_name(requirement_name) in visited: + continue + pending.append(requirement_name) + + return InstalledArxPackageIndex( + packages=package_entries, + missing_distributions=frozenset(missing_distributions), + conflicts=conflict_entries, + ) + + +def _add_package( + packages: dict[str, InstalledArxPackage], + conflicts: dict[str, tuple[InstalledArxPackage, ...]], + package: InstalledArxPackage, +) -> None: + """ + title: Add one package to the mutable package index. + parameters: + packages: + type: dict[str, InstalledArxPackage] + conflicts: + type: dict[str, tuple[InstalledArxPackage, Ellipsis]] + package: + type: InstalledArxPackage + """ + conflict = conflicts.get(package.module_name) + if conflict is not None: + conflicts[package.module_name] = (*conflict, package) + return + + existing = packages.get(package.module_name) + if existing is None: + packages[package.module_name] = package + return + + del packages[package.module_name] + conflicts[package.module_name] = (existing, package) + + +def _arx_packages_from_distribution( + distribution: importlib_metadata.Distribution, +) -> tuple[InstalledArxPackage, ...]: + """ + title: Extract Arx package roots from one installed distribution. + parameters: + distribution: + type: importlib_metadata.Distribution + returns: + type: tuple[InstalledArxPackage, Ellipsis] + """ + files = distribution.files + if files is None: + return () + + packages: list[InstalledArxPackage] = [] + for distribution_file in files: + if distribution_file.name != arx_settings.DEFAULT_CONFIG_FILENAME: + continue + + manifest_path = Path( + str(distribution.locate_file(distribution_file)) + ).resolve() + if not manifest_path.is_file(): + continue + + package = _package_from_manifest( + _distribution_name(distribution), + manifest_path, + ) + if package is None: + continue + + packages.append(package) + + return tuple(packages) + + +def _distribution_name( + distribution: importlib_metadata.Distribution, +) -> str: + """ + title: Return the canonical display name for one distribution. + parameters: + distribution: + type: importlib_metadata.Distribution + returns: + type: str + """ + try: + return str(distribution.metadata["Name"]) + except KeyError: + return str(distribution) + + +def _has_arx_sources(source_root: Path) -> bool: + """ + title: Return whether one package root contains Arx source files. + parameters: + source_root: + type: Path + returns: + type: bool + """ + for source_path in source_root.rglob("*"): + if source_path.suffix in _SOURCE_SUFFIXES and source_path.is_file(): + return True + return False + + +def _package_from_manifest( + distribution_name: str, + manifest_path: Path, +) -> InstalledArxPackage | None: + """ + title: Build one installed Arx package entry from a manifest. + parameters: + distribution_name: + type: str + manifest_path: + type: Path + returns: + type: InstalledArxPackage | None + """ + data = _load_manifest_data(manifest_path) + if data is None: + return None + + module_name = _manifest_package_name(data) + source_root = _manifest_source_root( + data, + manifest_path.parent, + module_name, + ) + if source_root is None: + return None + + if module_name is None: + module_name = source_root.name + + if ( + _ARX_MODULE_NAME_PATTERN.fullmatch(module_name) is None + or module_name in _RESERVED_MODULE_NAMES + ): + return None + return InstalledArxPackage( + module_name=module_name, + source_root=source_root, + distribution_name=distribution_name, + ) + + +def _load_manifest_data(manifest_path: Path) -> dict[str, Any] | None: + """ + title: Load a packaged manifest without full project validation. + parameters: + manifest_path: + type: Path + returns: + type: dict[str, Any] | None + """ + try: + data = tomllib.loads(manifest_path.read_text(encoding="utf-8")) + except (OSError, tomllib.TOMLDecodeError): + return None + + if not isinstance(data, dict): + return None + return data + + +def _manifest_package_name(data: dict[str, Any]) -> str | None: + """ + title: Extract an optional ``[build].package`` value. + parameters: + data: + type: dict[str, Any] + returns: + type: str | None + """ + build = data.get("build") + if not isinstance(build, dict): + return None + + package_name = build.get("package") + if not isinstance(package_name, str): + return None + return package_name + + +def _manifest_src_dir(data: dict[str, Any]) -> str | None: + """ + title: Extract an optional explicit ``[build].src_dir`` value. + parameters: + data: + type: dict[str, Any] + returns: + type: str | None + """ + build = data.get("build") + if not isinstance(build, dict): + return None + + src_dir = build.get("src_dir") + if not isinstance(src_dir, str): + return None + return src_dir + + +def _manifest_source_root( + data: dict[str, Any], + manifest_parent: Path, + module_name: str | None, +) -> Path | None: + """ + title: Resolve the installed source root for one manifest. + parameters: + data: + type: dict[str, Any] + manifest_parent: + type: Path + module_name: + type: str | None + returns: + type: Path | None + """ + candidates = _source_root_candidates( + manifest_parent, + _manifest_src_dir(data), + module_name, + ) + for candidate in candidates: + source_root = candidate.resolve() + if _has_arx_sources(source_root): + return source_root + return None + + +def _source_root_candidates( + manifest_parent: Path, + src_dir: str | None, + module_name: str | None, +) -> tuple[Path, ...]: + """ + title: Build candidate installed source roots in precedence order. + parameters: + manifest_parent: + type: Path + src_dir: + type: str | None + module_name: + type: str | None + returns: + type: tuple[Path, Ellipsis] + """ + if src_dir is not None: + source_root = manifest_parent / src_dir + if module_name is not None: + return (source_root / module_name,) + return (source_root,) + + candidates: list[Path] = [] + if module_name is not None: + candidates.extend( + ( + manifest_parent / "src" / module_name, + manifest_parent / module_name, + ) + ) + candidates.append(manifest_parent) + return tuple(candidates) diff --git a/packages/aix/src/aix/parser/__init__.py b/packages/aix/src/aix/parser/__init__.py new file mode 100644 index 0000000..3b57903 --- /dev/null +++ b/packages/aix/src/aix/parser/__init__.py @@ -0,0 +1,7 @@ +""" +title: AIX parser package. +""" + +from aix.parser.core import Parser + +__all__ = ["Parser"] diff --git a/packages/aix/src/aix/parser/core.py b/packages/aix/src/aix/parser/core.py new file mode 100644 index 0000000..f2fd88f --- /dev/null +++ b/packages/aix/src/aix/parser/core.py @@ -0,0 +1,576 @@ +# ruff: noqa: RUF001 +""" +title: AIX recursive-descent parser. +""" + +from __future__ import annotations + +from typing import Final, NoReturn, cast + +import astx + +from astx.types import AnyType + +from aix import builtins +from aix.exceptions import ParserException +from aix.lexer import Token, TokenKind, TokenList + +_TYPE_MAP: Final[dict[str, type[astx.DataType]]] = { + "ℕ": astx.Int64, + "ℤ": astx.Int64, + "ℝ": astx.Float64, + "𝔹": astx.Boolean, + "i8": astx.Int8, + "i16": astx.Int16, + "i32": astx.Int32, + "i64": astx.Int64, + "u8": astx.UInt8, + "u16": astx.UInt16, + "u32": astx.UInt32, + "u64": astx.UInt64, + "f32": astx.Float32, + "f64": astx.Float64, + "bool": astx.Boolean, + "boolean": astx.Boolean, + "none": astx.NoneType, + "str": astx.String, + "string": astx.String, +} + +_BINARY_OPERATORS: Final[dict[TokenKind, tuple[int, str, bool]]] = { + TokenKind.or_: (10, "or", False), + TokenKind.and_: (20, "and", False), + TokenKind.equal: (30, "==", False), + TokenKind.not_equal: (30, "!=", False), + TokenKind.less: (30, "<", False), + TokenKind.greater: (30, ">", False), + TokenKind.less_equal: (30, "<=", False), + TokenKind.greater_equal: (30, ">=", False), + TokenKind.equivalent: (30, "==", False), + TokenKind.congruent: (30, "==", False), + TokenKind.plus: (40, "+", False), + TokenKind.minus: (40, "-", False), + TokenKind.star: (50, "*", False), + TokenKind.multiply: (50, "*", False), + TokenKind.slash: (50, "/", False), + TokenKind.percent: (50, "%", False), + TokenKind.power: (60, "^", True), +} + +_EXPRESSION_TERMINATORS: Final[frozenset[TokenKind]] = frozenset( + { + TokenKind.eof, + TokenKind.semantic_rbracket, + TokenKind.index_rbracket, + TokenKind.tuple_rbracket, + TokenKind.rparen, + TokenKind.rbrace, + TokenKind.comma, + TokenKind.semicolon, + TokenKind.end, + TokenKind.turnstile, + TokenKind.implies, + TokenKind.bind, + TokenKind.emit, + TokenKind.define, + } +) + +_STATEMENT_STARTERS: Final[frozenset[TokenKind]] = frozenset( + { + TokenKind.turnstile, + TokenKind.bind, + TokenKind.emit, + TokenKind.identifier, + TokenKind.integer, + TokenKind.float, + TokenKind.string, + TokenKind.boolean, + TokenKind.unit, + TokenKind.lparen, + TokenKind.minus, + TokenKind.not_, + } +) + + +class Parser: + """ + title: Parse AIX tokens into IRx/ASTx-compatible AST nodes. + attributes: + tokens: + type: TokenList + """ + + tokens: TokenList + + def __init__(self, tokens: TokenList | None = None) -> None: + """ + title: Initialize parser state. + parameters: + tokens: + type: TokenList | None + """ + self.tokens = tokens or TokenList([]) + + def clean(self) -> None: + """ + title: Reset parser state. + """ + self.tokens = TokenList([]) + + def parse( + self, tokens: TokenList, module_name: str = "main" + ) -> astx.Module: + """ + title: Parse a token stream into an AST module. + parameters: + tokens: + type: TokenList + module_name: + type: str + returns: + type: astx.Module + """ + self.tokens = tokens + module = astx.Module(module_name) + self.tokens.get_next_token() + + while not self._at(TokenKind.eof): + self._skip_separators() + if self._at(TokenKind.eof): + break + if self._at(TokenKind.metadata): + self._parse_metadata_block() + self._skip_separators() + continue + if not self._at(TokenKind.define): + self._raise_here("expected definition marker '∴'") + module.append(self._parse_definition()) + + return module + + def _parse_metadata_block(self) -> None: + self._consume(TokenKind.metadata, "κ") + self._consume(TokenKind.semantic_lbracket, "⟦") + depth = 1 + while depth: + if self._at(TokenKind.eof): + self._raise_here("unterminated metadata block, expected '⟧'") + if self._at(TokenKind.semantic_lbracket): + depth += 1 + elif self._at(TokenKind.semantic_rbracket): + depth -= 1 + self.tokens.get_next_token() + + def _parse_definition(self) -> astx.AST: + self._consume(TokenKind.define, "∴") + name_token = self._expect(TokenKind.identifier, "definition name") + name = cast(str, name_token.value) + + if self._at(TokenKind.semantic_lbracket): + return self._parse_function_definition(name, name_token) + if self._at(TokenKind.colon): + return self._parse_constant_definition(name, name_token) + + self._raise_here( + "expected parameter block '⟦...⟧' or type annotation ':' " + f"after definition name '{name}'" + ) + + def _parse_function_definition( + self, name: str, name_token: Token + ) -> astx.FunctionDef: + args = self._parse_parameter_block() + return_type: astx.DataType = astx.NoneType() + if self._at(TokenKind.arrow): + self.tokens.get_next_token() + return_type = self._parse_type_expression() + + body = self._parse_function_body() + prototype = astx.FunctionPrototype( + name, + args, + cast(AnyType, return_type), + loc=name_token.location, + ) + return astx.FunctionDef(prototype, body, loc=name_token.location) + + def _parse_constant_definition( + self, name: str, name_token: Token + ) -> astx.VariableDeclaration: + self._consume(TokenKind.colon, ":") + type_ = self._parse_type_expression() + self._consume(TokenKind.assign, "≔") + value = cast(astx.Expr, self.parse_expression()) + if self._at(TokenKind.end): + self.tokens.get_next_token() + return astx.VariableDeclaration( + name=name, + type_=type_, + value=value, + mutability=astx.MutabilityKind.constant, + loc=name_token.location, + ) + + def _parse_parameter_block(self) -> astx.Arguments: + self._consume(TokenKind.semantic_lbracket, "⟦") + args = astx.Arguments() + if self._at(TokenKind.semantic_rbracket): + self.tokens.get_next_token() + return args + + while True: + param_token = self._expect(TokenKind.identifier, "parameter name") + param_name = cast(str, param_token.value) + self._consume(TokenKind.colon, ":") + param_type = self._parse_type_expression() + args.append( + astx.Argument(param_name, param_type, loc=param_token.location) + ) + + if self._at(TokenKind.comma): + self.tokens.get_next_token() + if self._at(TokenKind.semantic_rbracket): + break + continue + break + + self._consume(TokenKind.semantic_rbracket, "⟧") + return args + + def _parse_type_expression(self) -> astx.DataType: + token = self.tokens.cur_tok + if token.kind == TokenKind.unit: + self.tokens.get_next_token() + return astx.NoneType(loc=token.location) + + if token.kind not in {TokenKind.primitive_type, TokenKind.identifier}: + self._raise_here("expected type expression") + + type_name = cast(str, token.value) + self.tokens.get_next_token() + + if type_name == "ℂ": + raise ParserException( + self._message_at( + token, + "AIX parser recognized 'ℂ', but complex numbers are " + "not supported by the current IRx backend yet.", + ) + ) + + type_factory = _TYPE_MAP.get(type_name) + if type_factory is None: + raise ParserException( + self._message_at(token, f"unknown AIX type '{type_name}'") + ) + return type_factory(loc=token.location) + + def _parse_function_body(self) -> astx.Block: + if self._at(TokenKind.lbrace): + return self._parse_inline_block() + return self._parse_block_until_end() + + def _parse_inline_block(self) -> astx.Block: + self._consume(TokenKind.lbrace, "{") + block = astx.Block() + while not self._at(TokenKind.rbrace): + if self._at(TokenKind.eof): + self._raise_here("missing closing '}' for inline block") + self._skip_separators() + if self._at(TokenKind.rbrace): + break + block.append(self._parse_statement()) + self._skip_separators() + self._consume(TokenKind.rbrace, "}") + return block + + def _parse_block_until_end(self) -> astx.Block: + block = astx.Block() + while not self._at(TokenKind.end): + if self._at(TokenKind.eof): + self._raise_here("missing block terminator '∎'") + self._skip_separators() + if self._at(TokenKind.end): + break + if self.tokens.cur_tok.kind not in _STATEMENT_STARTERS: + self._raise_here("expected statement or block terminator '∎'") + block.append(self._parse_statement()) + self._skip_separators() + self._consume(TokenKind.end, "∎") + return block + + def _parse_statement(self) -> astx.AST: + if self._at(TokenKind.turnstile): + return self._parse_branch_statement() + if self._at(TokenKind.bind): + return self._parse_binding_statement() + if self._at(TokenKind.emit): + return self._parse_emit_statement() + if ( + self.tokens.cur_tok.kind == TokenKind.identifier + and self._peek().kind == TokenKind.assign + ): + return self._parse_assignment_statement() + return self.parse_expression() + + def _parse_branch_statement(self) -> astx.AST: + branch_token = self.tokens.cur_tok + self._consume(TokenKind.turnstile, "⊢") + first = cast(astx.Expr, self.parse_expression()) + if not self._at(TokenKind.implies): + return astx.FunctionReturn( + cast(astx.DataType, first), + branch_token.location, + ) + + self._consume(TokenKind.implies, "⇒") + value = cast(astx.Expr, self.parse_expression()) + then_block = astx.Block() + then_block.append( + astx.FunctionReturn( + cast(astx.DataType, value), + branch_token.location, + ) + ) + return astx.IfStmt( + first, + then_block, + astx.Block(), + loc=branch_token.location, + ) + + def _parse_binding_statement(self) -> astx.VariableDeclaration: + bind_token = self.tokens.cur_tok + self._consume(TokenKind.bind, "⌁") + name_token = self._expect(TokenKind.identifier, "binding name") + name = cast(str, name_token.value) + type_: astx.DataType | None = None + if self._at(TokenKind.colon): + self.tokens.get_next_token() + type_ = self._parse_type_expression() + self._consume(TokenKind.assign, "≔") + value = cast(astx.Expr, self.parse_expression()) + return astx.VariableDeclaration( + name=name, + type_=type_ or self._infer_type(value), + value=value, + mutability=astx.MutabilityKind.mutable, + loc=bind_token.location, + ) + + def _parse_assignment_statement(self) -> astx.VariableAssignment: + name_token = self._expect(TokenKind.identifier, "assignment target") + self._consume(TokenKind.assign, "≔") + value = cast(astx.Expr, self.parse_expression()) + return astx.VariableAssignment( + cast(str, name_token.value), + value, + loc=name_token.location, + ) + + def _parse_emit_statement(self) -> astx.AST: + self._consume(TokenKind.emit, "⟣") + expr = cast(astx.Expr, self.parse_expression()) + return cast(astx.AST, builtins.build_print(expr)) + + def parse_expression(self, min_precedence: int = 0) -> astx.AST: + """ + title: Parse an AIX expression. + parameters: + min_precedence: + type: int + returns: + type: astx.AST + """ + lhs = self._parse_unary() + + while True: + op_info = _BINARY_OPERATORS.get(self.tokens.cur_tok.kind) + if op_info is None: + return lhs + + precedence, op_code, right_associative = op_info + if precedence < min_precedence: + return lhs + + op_token = self.tokens.cur_tok + self.tokens.get_next_token() + rhs = self.parse_expression( + precedence if right_associative else precedence + 1 + ) + lhs = astx.BinaryOp( + op_code, + cast(astx.DataType, lhs), + cast(astx.DataType, rhs), + loc=op_token.location, + ) + + def _parse_unary(self) -> astx.AST: + token = self.tokens.cur_tok + if token.kind == TokenKind.minus: + self.tokens.get_next_token() + return astx.UnaryOp( + "-", + cast(astx.DataType, self._parse_unary()), + loc=token.location, + ) + if token.kind == TokenKind.not_: + self.tokens.get_next_token() + return astx.UnaryOp( + "not", + cast(astx.DataType, self._parse_unary()), + loc=token.location, + ) + if token.kind == TokenKind.symbolic_operator: + self._raise_reserved_operator(token) + return self._parse_postfix() + + def _parse_postfix(self) -> astx.AST: + expr = self._parse_primary() + while True: + if self._at(TokenKind.semantic_lbracket): + expr = self._parse_call_suffix(expr) + continue + if self._at(TokenKind.index_lbracket): + self._raise_here( + "index expressions using '⟬...⟭' are reserved but " + "not implemented yet" + ) + if self._at(TokenKind.dot): + self._raise_here( + "field access is reserved for a future AIX version" + ) + return expr + + def _parse_call_suffix(self, callee: astx.AST) -> astx.FunctionCall: + if not isinstance(callee, astx.Identifier): + self._raise_here("only identifier function calls are supported") + call_loc = self.tokens.cur_tok.location + self._consume(TokenKind.semantic_lbracket, "⟦") + args: list[astx.DataType] = [] + if not self._at(TokenKind.semantic_rbracket): + while True: + args.append(cast(astx.DataType, self.parse_expression())) + if self._at(TokenKind.comma): + self.tokens.get_next_token() + if self._at(TokenKind.semantic_rbracket): + break + continue + break + self._consume(TokenKind.semantic_rbracket, "⟧") + return astx.FunctionCall(callee.name, args, loc=call_loc) + + def _parse_primary(self) -> astx.AST: + token = self.tokens.cur_tok + if token.kind in _EXPRESSION_TERMINATORS: + self._raise_here("expected expression") + if token.kind == TokenKind.integer: + self.tokens.get_next_token() + return astx.LiteralInt64( + cast(int, token.value), + loc=token.location, + ) + if token.kind == TokenKind.float: + self.tokens.get_next_token() + return astx.LiteralFloat64( + cast(float, token.value), + loc=token.location, + ) + if token.kind == TokenKind.string: + self.tokens.get_next_token() + return astx.LiteralString( + cast(str, token.value), + loc=token.location, + ) + if token.kind == TokenKind.boolean: + self.tokens.get_next_token() + return astx.LiteralBoolean( + cast(bool, token.value), + loc=token.location, + ) + if token.kind == TokenKind.unit: + self.tokens.get_next_token() + return astx.LiteralNone(loc=token.location) + if token.kind == TokenKind.identifier: + self.tokens.get_next_token() + return astx.Identifier(cast(str, token.value), loc=token.location) + if token.kind == TokenKind.lparen: + self.tokens.get_next_token() + expr = self.parse_expression() + self._consume(TokenKind.rparen, ")") + return expr + if token.kind == TokenKind.lambda_: + self._raise_here( + "lambda expressions are parsed later in the AIX roadmap" + ) + if token.kind == TokenKind.primitive_type: + self._raise_here("type names are not valid expressions") + if token.kind == TokenKind.symbolic_operator: + self._raise_reserved_operator(token) + self._raise_here( + f"unexpected token '{token.get_name()}' in expression" + ) + + def _infer_type(self, value: astx.Expr) -> astx.DataType: + if isinstance(value, astx.LiteralBoolean): + return astx.Boolean() + if isinstance(value, astx.LiteralFloat64): + return astx.Float64() + if isinstance(value, astx.LiteralInt64): + return astx.Int64() + if isinstance(value, astx.LiteralString): + return astx.String() + if isinstance(value, astx.LiteralNone): + return astx.NoneType() + return AnyType() + + def _skip_separators(self) -> None: + while self.tokens.cur_tok.kind == TokenKind.semicolon: + self.tokens.get_next_token() + + def _at(self, kind: TokenKind) -> bool: + return self.tokens.cur_tok.kind == kind + + def _peek(self, offset: int = 0) -> Token: + index = self.tokens.position + offset + if index >= len(self.tokens.tokens): + return Token(TokenKind.eof, "") + return self.tokens.tokens[index] + + def _consume(self, kind: TokenKind, display: str) -> Token: + token = self.tokens.cur_tok + if token.kind != kind: + self._raise_here(f"expected '{display}', got '{token.get_name()}'") + self.tokens.get_next_token() + return token + + def _expect(self, kind: TokenKind, description: str) -> Token: + token = self.tokens.cur_tok + if token.kind != kind: + self._raise_here(f"expected {description}") + self.tokens.get_next_token() + return token + + def _raise_reserved_operator(self, token: Token) -> NoReturn: + raise ParserException( + self._message_at( + token, + "unsupported reserved operator " + f"'{token.value}'. This symbol is reserved for a future " + "AIX version.", + ) + ) + + def _raise_here(self, message: str) -> NoReturn: + raise ParserException(self._message_at(self.tokens.cur_tok, message)) + + def _message_at(self, token: Token, message: str) -> str: + return ( + "AIX parser error at line " + f"{token.location.line}, column {token.location.col}: {message}" + ) + + +__all__ = ["Parser"] diff --git a/packages/aix/src/aix/py.typed b/packages/aix/src/aix/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/aix/src/aix/schema/__init__.py b/packages/aix/src/aix/schema/__init__.py new file mode 100644 index 0000000..5a403c9 --- /dev/null +++ b/packages/aix/src/aix/schema/__init__.py @@ -0,0 +1,3 @@ +""" +title: JSON Schemas bundled with the Arx package. +""" diff --git a/packages/aix/src/aix/schema/arxproject.json b/packages/aix/src/aix/schema/arxproject.json new file mode 100644 index 0000000..33c0910 --- /dev/null +++ b/packages/aix/src/aix/schema/arxproject.json @@ -0,0 +1,195 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://arxlang.org/schemas/arxproject.schema.json", + "title": ".arxproject.toml", + "type": "object", + "additionalProperties": false, + "definitions": { + "author": { + "type": "object", + "additionalProperties": false, + "required": ["name"], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "email": { + "type": "string", + "minLength": 1 + } + } + }, + "dependency": { + "type": "string", + "minLength": 1 + }, + "dependency_group_name": { + "type": "string", + "minLength": 1, + "pattern": "^[A-Za-z0-9][A-Za-z0-9._-]*$" + }, + "dependency_group_include": { + "type": "object", + "additionalProperties": false, + "required": ["include-group"], + "properties": { + "include-group": { + "$ref": "#/definitions/dependency_group_name" + } + } + }, + "dependency_group_entry": { + "oneOf": [ + { + "$ref": "#/definitions/dependency" + }, + { + "$ref": "#/definitions/dependency_group_include" + } + ] + }, + "dependency_group": { + "type": "array", + "items": { + "$ref": "#/definitions/dependency_group_entry" + } + } + }, + "required": ["project"], + "properties": { + "project": { + "type": "object", + "additionalProperties": false, + "required": ["name", "version"], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "version": { + "type": "string", + "minLength": 1 + }, + "requires-arx": { + "type": "string", + "minLength": 1 + }, + "edition": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string" + }, + "license": { + "type": "string", + "minLength": 1 + }, + "authors": { + "type": "array", + "items": { + "$ref": "#/definitions/author" + } + }, + "dependencies": { + "type": "array", + "items": { + "$ref": "#/definitions/dependency" + } + } + } + }, + "dependency-groups": { + "type": "object", + "propertyNames": { + "$ref": "#/definitions/dependency_group_name" + }, + "additionalProperties": { + "$ref": "#/definitions/dependency_group" + } + }, + "environment": { + "type": "object", + "additionalProperties": false, + "required": ["kind"], + "properties": { + "kind": { + "type": "string", + "enum": ["venv", "conda", "system"] + }, + "name": { + "type": "string", + "minLength": 1 + }, + "path": { + "type": "string", + "minLength": 1 + } + } + }, + "build": { + "type": "object", + "additionalProperties": false, + "properties": { + "src_dir": { + "type": "string", + "minLength": 1 + }, + "package": { + "type": "string", + "minLength": 1 + }, + "out_dir": { + "type": "string", + "minLength": 1 + }, + "mode": { + "type": "string", + "enum": ["lib", "app"] + } + } + }, + "build-system": { + "type": "object", + "additionalProperties": false, + "properties": { + "dependencies": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + } + }, + "tests": { + "type": "object", + "additionalProperties": false, + "properties": { + "paths": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "exclude": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "file_pattern": { + "type": "string", + "minLength": 1 + }, + "function_pattern": { + "type": "string", + "minLength": 1 + } + } + } + } +} diff --git a/packages/aix/src/aix/schema/douki.json b/packages/aix/src/aix/schema/douki.json new file mode 100644 index 0000000..6f04e3c --- /dev/null +++ b/packages/aix/src/aix/schema/douki.json @@ -0,0 +1,189 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/douki/schema.json", + "title": "douki YAML docstring", + "type": "object", + "additionalProperties": false, + + "definitions": { + "param_entry": { + "anyOf": [ + { "type": "string" }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "type": "string" }, + "optional": { "type": ["boolean", "null"] }, + "description": { "type": "string" }, + "default": {}, + "variadic": { + "type": "string", + "enum": ["positional", "keyword"] + } + } + } + ] + }, + "typed_entry": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "type": "string" }, + "description": { "type": "string" } + } + }, + "attr_entry": { + "anyOf": [ + { "type": "string" }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "type": "string" }, + "optional": { "type": ["boolean", "null"] }, + "description": { "type": "string" }, + "default": {}, + "visibility": { + "type": "string", + "enum": ["public", "private", "protected", "internal"] + }, + "mutability": { + "type": "string", + "enum": ["mutable", "immutable", "constant"] + }, + "scope": { + "type": "string", + "enum": ["instance", "static", "class"] + } + } + } + ] + }, + "example_entry": { + "anyOf": [ + { "type": "string" }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { "type": "string" }, + "description": { "type": "string" } + } + } + ] + } + }, + + "properties": { + "title": { "type": "string" }, + "summary": { "type": ["string", "null"] }, + "deprecated": { "type": "string" }, + + "visibility": { + "type": "string", + "enum": ["public", "private", "protected", "internal"] + }, + "mutability": { + "type": "string", + "enum": ["mutable", "immutable", "constant"] + }, + "scope": { + "type": "string", + "enum": ["instance", "static", "class"] + }, + + "parameters": { + "type": "object", + "additionalProperties": { "$ref": "#/definitions/param_entry" } + }, + + "returns": { + "anyOf": [ + { "type": "string" }, + { "type": ["string", "null"] }, + { "$ref": "#/definitions/typed_entry" } + ] + }, + "yields": { + "anyOf": [{ "type": "string" }, { "$ref": "#/definitions/typed_entry" }] + }, + "receives": { + "anyOf": [{ "type": "string" }, { "$ref": "#/definitions/typed_entry" }] + }, + + "raises": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { + "type": "array", + "items": { "$ref": "#/definitions/typed_entry" } + } + ] + }, + + "warnings": { + "anyOf": [ + { + "type": "object", + "additionalProperties": { "type": "string" } + }, + { + "type": "array", + "items": { "$ref": "#/definitions/typed_entry" } + } + ] + }, + + "see_also": { + "anyOf": [ + { "type": "string" }, + { + "type": "array", + "items": { "type": "string" } + } + ] + }, + + "notes": { "type": ["string", "null"] }, + "references": { + "anyOf": [ + { "type": ["string", "null"] }, + { + "type": "array", + "items": { "type": "string" } + } + ] + }, + + "examples": { + "anyOf": [ + { "type": "string" }, + { + "type": "array", + "items": { "$ref": "#/definitions/example_entry" } + } + ] + }, + + "attributes": { + "type": "object", + "additionalProperties": { "$ref": "#/definitions/attr_entry" } + }, + + "methods": { + "anyOf": [ + { "type": "string" }, + { + "type": "array", + "items": { "type": "string" } + } + ] + } + }, + + "required": ["title"] +} diff --git a/packages/aix/src/aix/settings.py b/packages/aix/src/aix/settings.py new file mode 100644 index 0000000..534911e --- /dev/null +++ b/packages/aix/src/aix/settings.py @@ -0,0 +1,1383 @@ +""" +title: Parse and validate ``.arxproject.toml`` project settings. +""" + +from __future__ import annotations + +import json +import re +import sys + +from dataclasses import dataclass, field +from functools import lru_cache +from importlib.resources import files +from pathlib import Path +from typing import Any, cast + +from jsonschema import ValidationError, validate +from packaging.requirements import InvalidRequirement, Requirement +from packaging.specifiers import InvalidSpecifier, SpecifierSet +from packaging.utils import canonicalize_name + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + +DEFAULT_CONFIG_FILENAME = ".arxproject.toml" +_DEPENDENCY_GROUP_NAME_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") +_DEPENDENCY_GROUP_NORMALIZE_PATTERN = re.compile(r"[-_.]+") +_DEFAULT_SRC_DIR = "src" +_ARXLANG_DISTRIBUTION_NAME = "arxlang" + + +def _default_build_system_dependency(requires_arx: str | None) -> str: + """ + title: Build the default Arx compiler dependency requirement. + parameters: + requires_arx: + type: str | None + returns: + type: str + """ + if requires_arx is None: + return _ARXLANG_DISTRIBUTION_NAME + return f"{_ARXLANG_DISTRIBUTION_NAME}{requires_arx.strip()}" + + +def _requirement_name(value: str) -> str | None: + """ + title: Parse and return one requirement distribution name. + parameters: + value: + type: str + returns: + type: str | None + """ + try: + return Requirement(value).name + except InvalidRequirement: + return None + + +def _has_arxlang_dependency(dependencies: tuple[str, ...]) -> bool: + """ + title: Return whether dependencies already include ``arxlang``. + parameters: + dependencies: + type: tuple[str, Ellipsis] + returns: + type: bool + """ + arxlang_name = canonicalize_name(_ARXLANG_DISTRIBUTION_NAME) + for dependency in dependencies: + name = _requirement_name(dependency) + if name is None: + continue + if canonicalize_name(name) == arxlang_name: + return True + return False + + +def _normalize_build_system_dependencies( + dependencies: tuple[str, ...], + requires_arx: str | None, +) -> tuple[str, ...]: + """ + title: Ensure build-system dependencies include the Arx compiler package. + parameters: + dependencies: + type: tuple[str, Ellipsis] + requires_arx: + type: str | None + returns: + type: tuple[str, Ellipsis] + """ + if _has_arxlang_dependency(dependencies): + return dependencies + return (_default_build_system_dependency(requires_arx), *dependencies) + + +class ArxProjectError(Exception): + """ + title: Raised when ``.arxproject.toml`` cannot be loaded or validated. + """ + + +@dataclass(frozen=True) +class Author: + """ + title: One author entry declared in ``[project].authors``. + attributes: + name: + type: str + email: + type: str | None + """ + + name: str + email: str | None = None + + +@dataclass(frozen=True) +class Project: + """ + title: Parsed project section of .arxproject.toml. + attributes: + name: + type: str + version: + type: str + edition: + type: str | None + description: + type: str | None + license: + type: str | None + authors: + type: tuple[Author, Ellipsis] + dependencies: + type: tuple[str, Ellipsis] + requires_arx: + type: str | None + """ + + name: str + version: str + edition: str | None = None + description: str | None = None + license: str | None = None + authors: tuple[Author, ...] = () + dependencies: tuple[str, ...] = () + requires_arx: str | None = None + + +@dataclass(frozen=True) +class DependencyGroupInclude: + """ + title: Include one named dependency group inside another group. + attributes: + include_group: + type: str + """ + + include_group: str + + +DependencyGroupEntry = str | DependencyGroupInclude + + +@dataclass(frozen=True) +class Environment: + """ + title: Parsed environment section of .arxproject.toml. + attributes: + kind: + type: str | None + name: + type: str | None + path: + type: str | None + """ + + kind: str | None = None + name: str | None = None + path: str | None = None + + +@dataclass(frozen=True) +class Build: + """ + title: Parsed build section of .arxproject.toml. + attributes: + src_dir: + type: str | None + package: + type: str | None + out_dir: + type: str | None + mode: + type: str | None + """ + + src_dir: str | None = None + package: str | None = None + out_dir: str | None = None + mode: str | None = None + + +@dataclass(frozen=True) +class BuildSystem: + """ + title: Parsed build-system section of .arxproject.toml. + attributes: + dependencies: + type: tuple[str, Ellipsis] + """ + + dependencies: tuple[str, ...] = () + + +@dataclass(frozen=True) +class ArxpmDependencyGroup: + """ + title: Legacy placeholder for removed ``[arxpm.*]`` sections. + attributes: + dependencies: + type: tuple[str, Ellipsis] + """ + + dependencies: tuple[str, ...] = () + + +@dataclass(frozen=True) +class Arxpm: + """ + title: Legacy placeholder for the removed ``[arxpm]`` section. + attributes: + dependencies: + type: ArxpmDependencyGroup | None + dependencies_dev: + type: ArxpmDependencyGroup | None + extras: + type: dict[str, Any] + """ + + dependencies: ArxpmDependencyGroup | None = None + dependencies_dev: ArxpmDependencyGroup | None = None + extras: dict[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True) +class Tests: + """ + title: Parsed tests section of .arxproject.toml. + attributes: + paths: + type: tuple[str, Ellipsis] | None + exclude: + type: tuple[str, Ellipsis] | None + file_pattern: + type: str | None + function_pattern: + type: str | None + """ + + paths: tuple[str, ...] | None = None + exclude: tuple[str, ...] | None = None + file_pattern: str | None = None + function_pattern: str | None = None + + +@dataclass(frozen=True) +class ArxProject: + """ + title: Full parsed ``.arxproject.toml`` document. + attributes: + project: + type: Project + environment: + type: Environment | None + build: + type: Build | None + build_system: + type: BuildSystem + dependency_groups: + type: dict[str, tuple[DependencyGroupEntry, Ellipsis]] + arxpm: + type: Arxpm | None + tests: + type: Tests | None + source_path: + type: Path | None + """ + + project: Project + environment: Environment | None = None + build: Build | None = None + build_system: BuildSystem = field(default_factory=BuildSystem) + dependency_groups: dict[str, tuple[DependencyGroupEntry, ...]] = field( + default_factory=dict + ) + arxpm: Arxpm | None = None + tests: Tests | None = None + source_path: Path | None = None + + def __post_init__(self) -> None: + """ + title: Normalize effective build-system defaults. + """ + dependencies = _normalize_build_system_dependencies( + self.build_system.dependencies, + self.project.requires_arx, + ) + object.__setattr__( + self, + "build_system", + BuildSystem(dependencies=dependencies), + ) + + +@lru_cache(maxsize=1) +def _schema() -> dict[str, Any]: + """ + title: Load and cache the ``.arxproject.toml`` JSON schema. + returns: + type: dict[str, Any] + """ + with ( + files("aix.schema") + .joinpath("arxproject.json") + .open(encoding="utf-8") as fh + ): + return cast(dict[str, Any], json.load(fh)) + + +def _build_author(data: dict[str, Any]) -> Author: + """ + title: Build one Author dataclass from its validated mapping. + parameters: + data: + type: dict[str, Any] + returns: + type: Author + """ + return Author(name=data["name"], email=data.get("email")) + + +def _build_project(data: dict[str, Any]) -> Project: + """ + title: Build the Project dataclass from its validated mapping. + parameters: + data: + type: dict[str, Any] + returns: + type: Project + """ + authors = tuple(_build_author(entry) for entry in data.get("authors", [])) + return Project( + name=data["name"], + version=data["version"], + requires_arx=data.get("requires-arx"), + edition=data.get("edition"), + description=data.get("description"), + license=data.get("license"), + authors=authors, + dependencies=tuple(data.get("dependencies", ())), + ) + + +def _build_dependency_group_entry( + data: str | dict[str, Any], +) -> DependencyGroupEntry: + """ + title: Build one dependency-group entry from validated manifest data. + parameters: + data: + type: str | dict[str, Any] + returns: + type: DependencyGroupEntry + """ + if isinstance(data, str): + return data + return DependencyGroupInclude(include_group=data["include-group"]) + + +def _build_dependency_groups( + data: dict[str, Any] | None, +) -> dict[str, tuple[DependencyGroupEntry, ...]]: + """ + title: Build dependency groups from their validated mapping. + parameters: + data: + type: dict[str, Any] | None + returns: + type: dict[str, tuple[DependencyGroupEntry, Ellipsis]] + """ + if data is None: + return {} + + return { + name: tuple(_build_dependency_group_entry(entry) for entry in entries) + for name, entries in data.items() + } + + +def _build_tests(data: dict[str, Any] | None) -> Tests | None: + """ + title: Build the Tests dataclass from its validated mapping. + parameters: + data: + type: dict[str, Any] | None + returns: + type: Tests | None + """ + if data is None: + return None + raw_paths = data.get("paths") + raw_exclude = data.get("exclude") + return Tests( + paths=tuple(raw_paths) if raw_paths is not None else None, + exclude=tuple(raw_exclude) if raw_exclude is not None else None, + file_pattern=data.get("file_pattern"), + function_pattern=data.get("function_pattern"), + ) + + +def _build_build_system( + data: dict[str, Any] | None, + project: Project, +) -> BuildSystem: + """ + title: Build the BuildSystem dataclass with effective defaults. + parameters: + data: + type: dict[str, Any] | None + project: + type: Project + returns: + type: BuildSystem + """ + if data is None: + return BuildSystem( + dependencies=( + _default_build_system_dependency(project.requires_arx), + ) + ) + + dependencies = tuple(data.get("dependencies", ())) + return BuildSystem( + dependencies=_normalize_build_system_dependencies( + dependencies, + project.requires_arx, + ) + ) + + +def _resolved_src_dir(build: Build | None) -> str: + """ + title: Resolve the effective source directory for one project. + parameters: + build: + type: Build | None + returns: + type: str + """ + if build is None or build.src_dir is None: + return _DEFAULT_SRC_DIR + return build.src_dir + + +def resolve_source_root(project: ArxProject) -> Path: + """ + title: Resolve the effective source root from manifest defaults. + parameters: + project: + type: ArxProject + returns: + type: Path + """ + if project.source_path is None: + raise ArxProjectError( + "cannot resolve the project source root without a manifest path" + ) + return ( + project.source_path.parent / _resolved_src_dir(project.build) + ).resolve() + + +def _reject_arxpm_sections(data: dict[str, Any]) -> None: + """ + title: Reject removed ``[arxpm]`` manifest sections with a clear error. + parameters: + data: + type: dict[str, Any] + """ + if "arxpm" not in data: + return + raise ArxProjectError( + ".arxproject.toml does not support [arxpm] sections. " + "Declare dependencies in [project] using " + 'dependencies = ["name", "name>=1.0,<2", "name @ ../path"].' + ) + + +def _reject_toolchain_sections(data: dict[str, Any]) -> None: + """ + title: Reject removed ``[toolchain]`` manifest sections. + parameters: + data: + type: dict[str, Any] + """ + if "toolchain" not in data: + return + raise ArxProjectError( + ".arxproject.toml does not support [toolchain] sections. " + "Declare compiler/build requirements in [build-system] using " + 'dependencies = ["arxlang..."].' + ) + + +def _validate_dependency(value: str, location: str) -> None: + """ + title: Validate one dependency entry from ``.arxproject.toml``. + parameters: + value: + type: str + location: + type: str + """ + try: + Requirement(value) + except InvalidRequirement as err: + raise ArxProjectError( + f".arxproject.toml {location} must be a valid dependency " + 'requirement like "http", "sciarx>=0.0.3,<1", or ' + '"mylib @ ../mylib".' + ) from err + + +def _validate_project(data: dict[str, Any]) -> None: + """ + title: Validate project-only settings rules after schema validation. + parameters: + data: + type: dict[str, Any] + """ + requires_arx = data.get("requires-arx") + if requires_arx is not None: + _validate_requires_arx(requires_arx) + + for index, value in enumerate(data.get("dependencies", ())): + _validate_dependency(value, f"project.dependencies[{index}]") + + +def _validate_build_system_dependency(value: str, location: str) -> None: + """ + title: Validate one installable build-system dependency requirement. + parameters: + value: + type: str + location: + type: str + """ + try: + Requirement(value) + except InvalidRequirement as err: + raise ArxProjectError( + f".arxproject.toml {location} must be a valid dependency " + 'requirement like "arxlang>=1.0,<2".' + ) from err + + +def _validate_build_system( + data: dict[str, Any] | None, + project: dict[str, Any], +) -> None: + """ + title: Validate build-system-only settings after schema validation. + parameters: + data: + type: dict[str, Any] | None + project: + type: dict[str, Any] + """ + raw_dependencies: tuple[str, ...] = () + if data is not None: + raw_dependencies = tuple(cast(list[str], data.get("dependencies", ()))) + + for index, value in enumerate(raw_dependencies): + _validate_build_system_dependency( + value, + f"build-system.dependencies[{index}]", + ) + + if _has_arxlang_dependency(raw_dependencies): + return + + _validate_build_system_dependency( + _default_build_system_dependency( + cast(str | None, project.get("requires-arx")) + ), + "build-system.dependencies default arxlang dependency", + ) + + +def _validate_requires_arx(value: str) -> None: + """ + title: Validate a ``project.requires-arx`` version specifier. + parameters: + value: + type: str + """ + if not value.strip(): + raise ArxProjectError( + ".arxproject.toml project.requires-arx must not be empty." + ) + + try: + SpecifierSet(value) + except InvalidSpecifier as err: + raise ArxProjectError( + ".arxproject.toml project.requires-arx must be a valid " + 'version specifier like ">=1.0,<2".' + ) from err + + +def _validate_dependency_group_name(name: str, location: str) -> None: + """ + title: Validate one dependency-group name. + parameters: + name: + type: str + location: + type: str + """ + if _DEPENDENCY_GROUP_NAME_PATTERN.fullmatch(name) is not None: + return + raise ArxProjectError( + f".arxproject.toml {location} must use only letters, numbers, " + '".", "_" or "-", and start with a letter or number.' + ) + + +def _normalize_dependency_group_name(name: str) -> str: + """ + title: Normalize one dependency-group name for semantic comparison. + parameters: + name: + type: str + returns: + type: str + """ + return _DEPENDENCY_GROUP_NORMALIZE_PATTERN.sub("-", name).lower() + + +def _dependency_group_name_mapping( + dependency_groups: dict[str, list[Any]], +) -> dict[str, str]: + """ + title: Map normalized dependency-group names to their declared names. + parameters: + dependency_groups: + type: dict[str, list[Any]] + returns: + type: dict[str, str] + """ + normalized_names: dict[str, str] = {} + for name in dependency_groups: + normalized_name = _normalize_dependency_group_name(name) + existing_name = normalized_names.get(normalized_name) + if existing_name is not None: + raise ArxProjectError( + ".arxproject.toml [dependency-groups] names " + f'"{existing_name}" and "{name}" normalize to the ' + f'same name "{normalized_name}".' + ) + normalized_names[normalized_name] = name + return normalized_names + + +def _dependency_group_includes(entries: list[Any]) -> tuple[str, ...]: + """ + title: Collect included group names from raw dependency-group entries. + parameters: + entries: + type: list[Any] + returns: + type: tuple[str, Ellipsis] + """ + includes: list[str] = [] + for entry in entries: + if not isinstance(entry, dict): + continue + include_group = entry.get("include-group") + if isinstance(include_group, str): + includes.append(include_group) + return tuple(includes) + + +def _detect_dependency_group_cycles( + dependency_groups: dict[str, list[Any]], + normalized_names: dict[str, str], +) -> None: + """ + title: Reject dependency-group include cycles. + parameters: + dependency_groups: + type: dict[str, list[Any]] + normalized_names: + type: dict[str, str] + """ + visited: set[str] = set() + + def visit(name: str, ancestry: list[str]) -> None: + """ + title: Visit one dependency group while checking for include cycles. + parameters: + name: + type: str + ancestry: + type: list[str] + """ + if name in ancestry: + cycle_start = ancestry.index(name) + cycle = [*ancestry[cycle_start:], name] + cycle_text = " -> ".join(cycle) + raise ArxProjectError( + ".arxproject.toml dependency-groups includes must not " + f"form cycles ({cycle_text})." + ) + + if name in visited: + return + + ancestry.append(name) + for included_name in _dependency_group_includes( + dependency_groups[name] + ): + resolved_name = normalized_names[ + _normalize_dependency_group_name(included_name) + ] + visit(resolved_name, ancestry) + ancestry.pop() + visited.add(name) + + for name in dependency_groups: + visit(name, []) + + +def _validate_dependency_groups(data: dict[str, Any]) -> None: + """ + title: Validate dependency-group rules after schema validation. + parameters: + data: + type: dict[str, Any] + """ + raw_dependency_groups = data.get("dependency-groups") + if raw_dependency_groups is None: + return + + dependency_groups = cast(dict[str, list[Any]], raw_dependency_groups) + + for name in dependency_groups: + _validate_dependency_group_name( + name, + f'[dependency-groups] key "{name}"', + ) + + normalized_names = _dependency_group_name_mapping(dependency_groups) + + for name, entries in dependency_groups.items(): + for index, entry in enumerate(entries): + if isinstance(entry, str): + _validate_dependency( + entry, + f"dependency-groups.{name}[{index}]", + ) + continue + + if not isinstance(entry, dict): + raise ArxProjectError( + ".arxproject.toml dependency-groups." + f"{name}[{index}] must be a dependency string or " + '{ include-group = "name" }.' + ) + + keys = set(entry) + if keys != {"include-group"}: + raise ArxProjectError( + ".arxproject.toml dependency-groups." + f"{name}[{index}] must be exactly " + '{ include-group = "name" }.' + ) + + include_group = entry.get("include-group") + if not isinstance(include_group, str): + raise ArxProjectError( + ".arxproject.toml dependency-groups." + f"{name}[{index}].include-group must be a string." + ) + + _validate_dependency_group_name( + include_group, + (f"dependency-groups.{name}[{index}].include-group"), + ) + + normalized_include_group = _normalize_dependency_group_name( + include_group + ) + if normalized_include_group not in normalized_names: + raise ArxProjectError( + ".arxproject.toml dependency-groups." + f"{name}[{index}] includes unknown group " + f'"{include_group}".' + ) + + _detect_dependency_group_cycles(dependency_groups, normalized_names) + + +def _reject_legacy_environment_kind(data: dict[str, Any] | None) -> None: + """ + title: Reject removed environment kinds with a migration hint. + parameters: + data: + type: dict[str, Any] | None + """ + if data is None: + return + + kind = data.get("kind") + if kind not in {"managed-venv", "existing-venv"}: + return + + raise ArxProjectError( + f'[environment] kind="{kind}" is no longer supported. ' + 'Use kind="venv" instead.' + ) + + +def _validate_environment(data: dict[str, Any] | None) -> None: + """ + title: Validate environment-only settings rules after schema validation. + parameters: + data: + type: dict[str, Any] | None + """ + if data is None: + return + + kind = data["kind"] + if kind == "venv": + if "name" not in data: + return + raise ArxProjectError( + '[environment] kind="venv" does not support "name".' + ) + + if kind == "system": + unsupported = [ + field_name for field_name in ("name", "path") if field_name in data + ] + if not unsupported: + return + fields = ", ".join(f'"{field_name}"' for field_name in unsupported) + raise ArxProjectError( + f'[environment] kind="system" does not support {fields}.' + ) + + if kind == "conda": + if "name" in data or "path" in data: + return + raise ArxProjectError( + '[environment] kind="conda" requires at least one of ' + '"name" or "path".' + ) + + +def _validate_data(data: dict[str, Any]) -> None: + """ + title: Validate parsed ``.arxproject.toml`` data before building models. + parameters: + data: + type: dict[str, Any] + """ + _reject_arxpm_sections(data) + _reject_toolchain_sections(data) + _reject_legacy_environment_kind(data.get("environment")) + + try: + validate(instance=data, schema=_schema()) + except ValidationError as err: + raise ArxProjectError( + f".arxproject.toml failed schema validation: {err.message}" + ) from err + + _validate_project(data["project"]) + _validate_build_system(data.get("build-system"), data["project"]) + _validate_dependency_groups(data) + _validate_environment(data.get("environment")) + + +def _build_arx_project( + data: dict[str, Any], + source_path: Path | None, +) -> ArxProject: + """ + title: Build the ArxProject dataclass from its validated mapping. + parameters: + data: + type: dict[str, Any] + source_path: + type: Path | None + returns: + type: ArxProject + """ + environment_data = data.get("environment") + build_data = data.get("build") + project = _build_project(data["project"]) + return ArxProject( + project=project, + environment=( + Environment(**environment_data) + if environment_data is not None + else None + ), + build=Build(**build_data) if build_data is not None else None, + build_system=_build_build_system(data.get("build-system"), project), + dependency_groups=_build_dependency_groups( + data.get("dependency-groups") + ), + tests=_build_tests(data.get("tests")), + source_path=source_path, + ) + + +def _author_to_mapping(author: Author) -> dict[str, str]: + """ + title: Convert one Author dataclass into a TOML-safe mapping. + parameters: + author: + type: Author + returns: + type: dict[str, str] + """ + data = {"name": author.name} + if author.email is not None: + data["email"] = author.email + return data + + +def _settings_to_data(settings: ArxProject) -> dict[str, Any]: + """ + title: Convert settings dataclasses into raw manifest data. + parameters: + settings: + type: ArxProject + returns: + type: dict[str, Any] + """ + if settings.arxpm is not None: + raise ArxProjectError( + "ArxProject.arxpm is no longer supported. " + "Declare dependencies in project.dependencies instead." + ) + + project: dict[str, Any] = { + "name": settings.project.name, + "version": settings.project.version, + } + if settings.project.requires_arx is not None: + project["requires-arx"] = settings.project.requires_arx + if settings.project.edition is not None: + project["edition"] = settings.project.edition + if settings.project.description is not None: + project["description"] = settings.project.description + if settings.project.license is not None: + project["license"] = settings.project.license + if settings.project.dependencies: + project["dependencies"] = list(settings.project.dependencies) + if settings.project.authors: + project["authors"] = [ + _author_to_mapping(author) for author in settings.project.authors + ] + + data: dict[str, Any] = {"project": project} + + default_build_system_dependencies = _normalize_build_system_dependencies( + (), + settings.project.requires_arx, + ) + if settings.build_system.dependencies != default_build_system_dependencies: + data["build-system"] = { + "dependencies": list(settings.build_system.dependencies) + } + + if settings.dependency_groups: + dependency_groups: dict[str, list[str | dict[str, str]]] = {} + for group_name, entries in settings.dependency_groups.items(): + if not isinstance(group_name, str): + raise ArxProjectError( + "ArxProject.dependency_groups keys must be strings." + ) + + dependency_groups[group_name] = [] + for index, entry in enumerate(entries): + if isinstance(entry, str): + dependency_groups[group_name].append(entry) + continue + + if isinstance(entry, DependencyGroupInclude): + dependency_groups[group_name].append( + {"include-group": entry.include_group} + ) + continue + + raise ArxProjectError( + "ArxProject.dependency_groups." + f"{group_name}[{index}] must be a string or " + "DependencyGroupInclude." + ) + + data["dependency-groups"] = dependency_groups + + if settings.environment is not None: + environment: dict[str, Any] = {} + if settings.environment.kind is not None: + environment["kind"] = settings.environment.kind + if settings.environment.name is not None: + environment["name"] = settings.environment.name + if settings.environment.path is not None: + environment["path"] = settings.environment.path + data["environment"] = environment + + if settings.build is not None: + build: dict[str, Any] = {} + if settings.build.src_dir is not None: + build["src_dir"] = settings.build.src_dir + if settings.build.package is not None: + build["package"] = settings.build.package + if settings.build.out_dir is not None: + build["out_dir"] = settings.build.out_dir + if settings.build.mode is not None: + build["mode"] = settings.build.mode + data["build"] = build + + if settings.tests is not None: + tests: dict[str, Any] = {} + if settings.tests.paths is not None: + tests["paths"] = list(settings.tests.paths) + if settings.tests.exclude is not None: + tests["exclude"] = list(settings.tests.exclude) + if settings.tests.file_pattern is not None: + tests["file_pattern"] = settings.tests.file_pattern + if settings.tests.function_pattern is not None: + tests["function_pattern"] = settings.tests.function_pattern + data["tests"] = tests + + return data + + +def _format_toml_string(value: str) -> str: + """ + title: Quote one TOML basic string with JSON-compatible escapes. + parameters: + value: + type: str + returns: + type: str + """ + return json.dumps(value, ensure_ascii=False) + + +def _append_string_array( + lines: list[str], + key: str, + values: tuple[str, ...] | list[str], +) -> None: + """ + title: Append a canonical multiline string array to TOML output. + parameters: + lines: + type: list[str] + key: + type: str + values: + type: tuple[str, Ellipsis] | list[str] + """ + lines.append(f"{key} = [") + for value in values: + lines.append(f" {_format_toml_string(value)},") + lines.append("]") + + +def _append_dependency_group_array( + lines: list[str], + key: str, + values: tuple[DependencyGroupEntry, ...], +) -> None: + """ + title: Append one dependency-group entry array to TOML output. + parameters: + lines: + type: list[str] + key: + type: str + values: + type: tuple[DependencyGroupEntry, Ellipsis] + """ + lines.append(f"{_format_toml_string(key)} = [") + for value in values: + if isinstance(value, str): + lines.append(f" {_format_toml_string(value)},") + continue + + lines.append( + " { include-group = " + f"{_format_toml_string(value.include_group)} }}," + ) + lines.append("]") + + +def _append_project(lines: list[str], project: Project) -> None: + """ + title: Append the canonical ``[project]`` section. + parameters: + lines: + type: list[str] + project: + type: Project + """ + lines.append("[project]") + lines.append(f"name = {_format_toml_string(project.name)}") + lines.append(f"version = {_format_toml_string(project.version)}") + if project.requires_arx is not None: + lines.append( + f"requires-arx = {_format_toml_string(project.requires_arx)}" + ) + if project.edition is not None: + lines.append(f"edition = {_format_toml_string(project.edition)}") + if project.description is not None: + lines.append( + f"description = {_format_toml_string(project.description)}" + ) + if project.license is not None: + lines.append(f"license = {_format_toml_string(project.license)}") + if project.dependencies: + _append_string_array(lines, "dependencies", project.dependencies) + if project.authors: + lines.append("authors = [") + for author in project.authors: + entries = [f"name = {_format_toml_string(author.name)}"] + if author.email is not None: + entries.append(f"email = {_format_toml_string(author.email)}") + lines.append(f" {{ {', '.join(entries)} }},") + lines.append("]") + + +def _append_dependency_groups( + lines: list[str], + dependency_groups: dict[str, tuple[DependencyGroupEntry, ...]], +) -> None: + """ + title: Append the canonical ``[dependency-groups]`` section. + parameters: + lines: + type: list[str] + dependency_groups: + type: dict[str, tuple[DependencyGroupEntry, Ellipsis]] + """ + if not dependency_groups: + return + + lines.extend(("", "[dependency-groups]")) + for name, entries in dependency_groups.items(): + _append_dependency_group_array(lines, name, entries) + + +def _append_environment( + lines: list[str], + environment: Environment | None, +) -> None: + """ + title: Append the canonical ``[environment]`` section when present. + parameters: + lines: + type: list[str] + environment: + type: Environment | None + """ + if environment is None: + return + lines.extend(("", "[environment]")) + if environment.kind is not None: + lines.append(f"kind = {_format_toml_string(environment.kind)}") + if environment.name is not None: + lines.append(f"name = {_format_toml_string(environment.name)}") + if environment.path is not None: + lines.append(f"path = {_format_toml_string(environment.path)}") + + +def _append_build(lines: list[str], build: Build | None) -> None: + """ + title: Append the canonical ``[build]`` section when present. + parameters: + lines: + type: list[str] + build: + type: Build | None + """ + if build is None: + return + lines.extend(("", "[build]")) + if build.src_dir is not None: + lines.append(f"src_dir = {_format_toml_string(build.src_dir)}") + if build.package is not None: + lines.append(f"package = {_format_toml_string(build.package)}") + if build.out_dir is not None: + lines.append(f"out_dir = {_format_toml_string(build.out_dir)}") + if build.mode is not None: + lines.append(f"mode = {_format_toml_string(build.mode)}") + + +def _append_build_system( + lines: list[str], + build_system: BuildSystem, + project: Project, +) -> None: + """ + title: Append the canonical ``[build-system]`` section when needed. + parameters: + lines: + type: list[str] + build_system: + type: BuildSystem + project: + type: Project + """ + default_dependencies = _normalize_build_system_dependencies( + (), + project.requires_arx, + ) + if build_system.dependencies == default_dependencies: + return + lines.extend(("", "[build-system]")) + _append_string_array(lines, "dependencies", build_system.dependencies) + + +def _append_tests(lines: list[str], tests: Tests | None) -> None: + """ + title: Append the canonical ``[tests]`` section when present. + parameters: + lines: + type: list[str] + tests: + type: Tests | None + """ + if tests is None: + return + lines.extend(("", "[tests]")) + if tests.paths is not None: + _append_string_array(lines, "paths", tests.paths) + if tests.exclude is not None: + _append_string_array(lines, "exclude", tests.exclude) + if tests.file_pattern is not None: + lines.append( + f"file_pattern = {_format_toml_string(tests.file_pattern)}" + ) + if tests.function_pattern is not None: + lines.append( + f"function_pattern = {_format_toml_string(tests.function_pattern)}" + ) + + +def dump_settings(settings: ArxProject) -> str: + """ + title: Serialize settings into canonical ``.arxproject.toml`` text. + parameters: + settings: + type: ArxProject + returns: + type: str + """ + _validate_data(_settings_to_data(settings)) + + lines: list[str] = [] + _append_project(lines, settings.project) + _append_build_system(lines, settings.build_system, settings.project) + _append_dependency_groups(lines, settings.dependency_groups) + _append_environment(lines, settings.environment) + _append_build(lines, settings.build) + _append_tests(lines, settings.tests) + return "\n".join(lines) + "\n" + + +def write_settings( + settings: ArxProject, + path: str | Path = DEFAULT_CONFIG_FILENAME, +) -> Path: + """ + title: Write canonical ``.arxproject.toml`` text to disk. + parameters: + settings: + type: ArxProject + path: + type: str | Path + returns: + type: Path + """ + target = Path(path) + target.write_text(dump_settings(settings), encoding="utf-8") + return target + + +def find_config_file(start: Path | None = None) -> Path | None: + """ + title: Walk upward from ``start`` to locate ``.arxproject.toml``. + parameters: + start: + type: Path | None + returns: + type: Path | None + """ + current = (start or Path.cwd()).resolve() + while True: + candidate = current / DEFAULT_CONFIG_FILENAME + if candidate.is_file(): + return candidate + if current.parent == current: + return None + current = current.parent + + +def load_settings_from_text( + content: str, + source_path: Path | None = None, +) -> ArxProject: + """ + title: Parse and validate one ``.arxproject.toml`` string. + parameters: + content: + type: str + source_path: + type: Path | None + returns: + type: ArxProject + """ + try: + data = tomllib.loads(content) + except tomllib.TOMLDecodeError as err: + raise ArxProjectError( + f"Invalid TOML in .arxproject.toml: {err}" + ) from err + + _validate_data(data) + return _build_arx_project(data, source_path) + + +def load_settings(path: str | Path | None = None) -> ArxProject: + """ + title: Load and validate ``.arxproject.toml`` into a typed dataclass. + parameters: + path: + type: str | Path | None + returns: + type: ArxProject + """ + if path is None: + discovered = find_config_file() + if discovered is None: + raise ArxProjectError( + f"could not find {DEFAULT_CONFIG_FILENAME} in the current " + "directory or any parent" + ) + resolved = discovered + else: + resolved = Path(path) + if not resolved.is_file(): + raise ArxProjectError( + f"{DEFAULT_CONFIG_FILENAME} not found at {resolved}" + ) + + content = resolved.read_text(encoding="utf-8") + return load_settings_from_text(content, source_path=resolved) diff --git a/packages/aix/src/aix/stdlib/__init__.x b/packages/aix/src/aix/stdlib/__init__.x new file mode 100644 index 0000000..5a7a443 --- /dev/null +++ b/packages/aix/src/aix/stdlib/__init__.x @@ -0,0 +1,4 @@ +``` +title: Bundled stdlib package +summary: Root namespace for first-party Arx standard-library modules. +``` diff --git a/packages/aix/src/aix/stdlib/math.x b/packages/aix/src/aix/stdlib/math.x new file mode 100644 index 0000000..71fca65 --- /dev/null +++ b/packages/aix/src/aix/stdlib/math.x @@ -0,0 +1,54 @@ +``` +title: Bundled stdlib math module +summary: Pure-Arx integer helpers shipped with the compiler package. +``` + +fn abs(value: i32) -> i32: + ``` + title: abs + summary: Returns the absolute value of one signed integer. + ``` + if value < 0: + return 0 - value + else: + return value + +fn min(lhs: i32, rhs: i32) -> i32: + ``` + title: min + summary: Returns the smaller of two signed integers. + ``` + if lhs < rhs: + return lhs + else: + return rhs + +fn max(lhs: i32, rhs: i32) -> i32: + ``` + title: max + summary: Returns the larger of two signed integers. + ``` + if lhs > rhs: + return lhs + else: + return rhs + +fn clamp(value: i32, lower: i32, upper: i32) -> i32: + ``` + title: clamp + summary: Restricts one signed integer to the inclusive lower and upper bound. + ``` + if value < lower: + return lower + else: + if value > upper: + return upper + else: + return value + +fn square(value: i32) -> i32: + ``` + title: square + summary: Returns the square of one signed integer value. + ``` + return value * value diff --git a/packages/aix/src/aix/tensor.py b/packages/aix/src/aix/tensor.py new file mode 100644 index 0000000..e8645ad --- /dev/null +++ b/packages/aix/src/aix/tensor.py @@ -0,0 +1,610 @@ +""" +title: Tensor surface helpers for Arx. +summary: >- + Adapt Arx surface tensor syntax to IRx Tensor nodes while keeping user-facing + shape and indexing rules local to Arx. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from math import prod +from typing import cast + +import astx + +from irx.analysis.resolved_nodes import SemanticInfo +from irx.buffer import BufferMutability, BufferOwnership, buffer_view_flags +from irx.builtins.collections.tensor import ( + TENSOR_ELEMENT_TYPE_EXTRA, + TENSOR_FLAGS_EXTRA, + TENSOR_LAYOUT_EXTRA, + TensorLayout, + tensor_buffer_dtype, + tensor_default_strides, + tensor_element_size_bytes, + tensor_is_c_contiguous, + tensor_is_f_contiguous, +) + +TENSOR_SHAPE_ATTR = "_arx_tensor_shape" +TENSOR_SURFACE_ATTR = "_arx_tensor_surface" + + +@dataclass(frozen=True) +class TensorBinding: + """ + title: Static tensor binding metadata. + attributes: + element_type: + type: astx.DataType + layout: + type: TensorLayout + flags: + type: int + """ + + element_type: astx.DataType + layout: TensorLayout + flags: int + + +def _shape_of(data_type: astx.DataType) -> tuple[int, ...] | None: + """ + title: Return the declared tensor shape stored on one type. + parameters: + data_type: + type: astx.DataType + returns: + type: tuple[int, Ellipsis] | None + """ + if isinstance(data_type, astx.TensorType) and data_type.shape is not None: + return data_type.shape + + shape = getattr(data_type, TENSOR_SHAPE_ATTR, None) + if isinstance(shape, tuple) and all(isinstance(dim, int) for dim in shape): + return cast(tuple[int, ...], shape) + return None + + +def _mark_tensor_type( + data_type: astx.TensorType, + shape: tuple[int, ...] | None, +) -> astx.TensorType: + """ + title: Mark one IRx TensorType as originating from Arx syntax. + parameters: + data_type: + type: astx.TensorType + shape: + type: tuple[int, Ellipsis] | None + returns: + type: astx.TensorType + """ + setattr(data_type, TENSOR_SURFACE_ATTR, True) + data_type.shape = shape + if shape is not None: + setattr(data_type, TENSOR_SHAPE_ATTR, shape) + return data_type + + +def is_tensor_type(data_type: astx.DataType | None) -> bool: + """ + title: Return whether one type is an Arx tensor surface type. + parameters: + data_type: + type: astx.DataType | None + returns: + type: bool + """ + return ( + isinstance(data_type, astx.TensorType) + and getattr(data_type, TENSOR_SURFACE_ATTR, False) is True + ) + + +def tensor_shape(data_type: astx.DataType | None) -> tuple[int, ...] | None: + """ + title: Return the declared tensor shape when available. + parameters: + data_type: + type: astx.DataType | None + returns: + type: tuple[int, Ellipsis] | None + """ + if data_type is None: + return None + return _shape_of(data_type) + + +def tensor_type( + element_type: astx.DataType, + shape: tuple[int, ...], +) -> astx.TensorType: + """ + title: Build one static-shape tensor surface type. + parameters: + element_type: + type: astx.DataType + shape: + type: tuple[int, Ellipsis] + returns: + type: astx.TensorType + """ + _element_size_bytes(element_type) + if not shape: + raise ValueError("tensor shapes must include at least one dimension") + if any(dim < 0 for dim in shape): + raise ValueError("tensor dimensions must be non-negative") + return _mark_tensor_type( + astx.TensorType(element_type, shape=shape), + shape, + ) + + +def runtime_tensor_type(element_type: astx.DataType) -> astx.TensorType: + """ + title: Build one runtime-shaped tensor surface type. + parameters: + element_type: + type: astx.DataType + returns: + type: astx.TensorType + """ + _element_size_bytes(element_type) + return _mark_tensor_type(astx.TensorType(element_type), None) + + +def binding_from_type( + data_type: astx.DataType | None, +) -> TensorBinding | None: + """ + title: Build one static tensor binding from one declared type. + parameters: + data_type: + type: astx.DataType | None + returns: + type: TensorBinding | None + """ + if not is_tensor_type(data_type): + return None + + shape = tensor_shape(data_type) + element_type = cast(astx.TensorType, data_type).element_type + if shape is None or element_type is None: + return None + + item_size = _element_size_bytes(element_type) + layout = TensorLayout( + shape=shape, + strides=tensor_default_strides(shape, item_size), + offset_bytes=0, + ) + flags = buffer_view_flags( + BufferOwnership.EXTERNAL_OWNER, + BufferMutability.READONLY, + c_contiguous=tensor_is_c_contiguous(layout, item_size), + f_contiguous=tensor_is_f_contiguous(layout, item_size), + ) + return TensorBinding( + element_type=element_type, + layout=layout, + flags=flags, + ) + + +def attach_binding(node: astx.AST, binding: TensorBinding) -> None: + """ + title: Attach static tensor metadata to one AST node. + parameters: + node: + type: astx.AST + binding: + type: TensorBinding + """ + info = cast(SemanticInfo | None, getattr(node, "semantic", None)) + if info is None or not isinstance(info, SemanticInfo): + info = SemanticInfo() + setattr(node, "semantic", info) + info.extras[TENSOR_LAYOUT_EXTRA] = binding.layout + info.extras[TENSOR_ELEMENT_TYPE_EXTRA] = binding.element_type + info.extras[TENSOR_FLAGS_EXTRA] = binding.flags + + +def coerce_expression( + expr: astx.Expr, + target_type: astx.DataType, + *, + context: str, +) -> astx.Expr: + """ + title: Coerce one parsed expression into one declared tensor type. + parameters: + expr: + type: astx.Expr + target_type: + type: astx.DataType + context: + type: str + returns: + type: astx.Expr + """ + if not is_tensor_type(target_type): + return expr + if isinstance(expr, astx.TensorLiteral): + return expr + if not isinstance(expr, astx.Literal): + return expr + return build_literal_from_literal(expr, target_type, context=context) + + +def default_value(target_type: astx.DataType) -> astx.TensorLiteral: + """ + title: Build one default tensor literal for one declared type. + parameters: + target_type: + type: astx.DataType + returns: + type: astx.TensorLiteral + """ + binding = binding_from_type(target_type) + if binding is None: + raise ValueError("default tensor value requires a static tensor shape") + + count = prod(binding.layout.shape) + scalar = _zero_literal(binding.element_type) + values = tuple(_clone_scalar_literal(scalar) for _ in range(count)) + return _literal_from_values(binding, values) + + +def build_literal_from_literal( + expr: astx.Literal, + target_type: astx.DataType, + *, + context: str, +) -> astx.TensorLiteral: + """ + title: Build one tensor literal from one nested literal value. + parameters: + expr: + type: astx.Literal + target_type: + type: astx.DataType + context: + type: str + returns: + type: astx.TensorLiteral + """ + binding = binding_from_type(target_type) + if binding is None: + if not is_tensor_type(target_type): + raise ValueError("tensor literal target must be a tensor type") + raise ValueError( + "tensor literal target requires a static tensor shape" + ) + + shape, values = _flatten_literal(expr) + if shape != binding.layout.shape: + raise ValueError( + f"{context} has shape {_format_shape(shape)} but the declared " + f"tensor shape is {_format_shape(binding.layout.shape)}" + ) + + for value in values: + _validate_scalar_literal(value, binding.element_type, context=context) + + return _literal_from_values(binding, values) + + +def infer_literal(expr: astx.Literal) -> astx.TensorLiteral: + """ + title: Infer one tensor literal directly from one literal value. + parameters: + expr: + type: astx.Literal + returns: + type: astx.TensorLiteral + """ + shape, values = _flatten_literal(expr) + if not values: + raise ValueError( + "cannot infer a tensor element type from an empty literal" + ) + + element_type = _infer_element_type(values[0]) + for value in values: + _validate_scalar_literal( + value, + element_type, + context="tensor literal", + ) + + binding = cast( + TensorBinding, + binding_from_type(tensor_type(element_type, shape)), + ) + return _literal_from_values(binding, values) + + +def literal_values( + node: astx.TensorLiteral, +) -> tuple[astx.AST, ...]: + """ + title: Return one flattened scalar payload from a tensor literal. + parameters: + node: + type: astx.TensorLiteral + returns: + type: tuple[astx.AST, Ellipsis] + """ + return tuple(node.values) + + +def _literal_from_values( + binding: TensorBinding, + values: tuple[astx.Literal, ...], +) -> astx.TensorLiteral: + """ + title: Build one TensorLiteral and attach static metadata. + parameters: + binding: + type: TensorBinding + values: + type: tuple[astx.Literal, Ellipsis] + returns: + type: astx.TensorLiteral + """ + literal = astx.TensorLiteral( + values, + element_type=binding.element_type, + shape=binding.layout.shape, + strides=binding.layout.strides, + offset_bytes=binding.layout.offset_bytes, + ) + _mark_tensor_type(literal.type_, binding.layout.shape) + attach_binding(literal, binding) + return literal + + +def _element_size_bytes(element_type: astx.DataType) -> int: + """ + title: Return the byte width of one supported tensor scalar type. + parameters: + element_type: + type: astx.DataType + returns: + type: int + """ + if not isinstance( + element_type, + ( + astx.Int8, + astx.Int16, + astx.Int32, + astx.Int64, + astx.Float32, + astx.Float64, + ), + ): + raise ValueError( + "tensor element types currently support only i8, i16, i32, " + "i64, f32, and f64" + ) + + size = tensor_element_size_bytes(element_type) + if size is None: + raise ValueError("unsupported tensor element type") + return size + + +def _dtype_handle(element_type: astx.DataType) -> int: + """ + title: Return one IRx dtype token for one tensor scalar type. + parameters: + element_type: + type: astx.DataType + returns: + type: int + """ + _element_size_bytes(element_type) + handle = tensor_buffer_dtype(element_type) + if handle is None or handle.address is None: + raise ValueError("unsupported tensor element type") + return handle.address + + +def _flatten_literal( + expr: astx.Literal, +) -> tuple[tuple[int, ...], tuple[astx.Literal, ...]]: + """ + title: Flatten one nested tensor literal. + parameters: + expr: + type: astx.Literal + returns: + type: tuple[tuple[int, Ellipsis], tuple[astx.Literal, Ellipsis]] + """ + if not isinstance(expr, (astx.LiteralList, astx.LiteralTuple)): + return (), (expr,) + + if not expr.elements: + return (0,), () + + child_shapes: list[tuple[int, ...]] = [] + child_values: list[astx.Literal] = [] + for element in expr.elements: + if not isinstance(element, astx.Literal): + raise ValueError("tensor literals support only literal elements") + child_shape, flat_values = _flatten_literal(element) + child_shapes.append(child_shape) + child_values.extend(flat_values) + + first_shape = child_shapes[0] + if any(shape != first_shape for shape in child_shapes[1:]): + raise ValueError( + "tensor literals must use a regular rectangular shape" + ) + + return (len(expr.elements), *first_shape), tuple(child_values) + + +def _infer_element_type(value: astx.Literal) -> astx.DataType: + """ + title: Infer one tensor scalar type from one literal. + parameters: + value: + type: astx.Literal + returns: + type: astx.DataType + """ + if isinstance(value, (astx.LiteralInt8, astx.LiteralUTF8Char)): + return astx.Int8() + if isinstance(value, astx.LiteralInt16): + return astx.Int16() + if isinstance(value, astx.LiteralInt32): + return astx.Int32() + if isinstance(value, astx.LiteralInt64): + return astx.Int64() + if isinstance(value, astx.LiteralFloat32): + return astx.Float32() + if isinstance(value, astx.LiteralFloat64): + return astx.Float64() + raise ValueError( + "tensor literals currently support only char, integer, and " + "floating-point scalars" + ) + + +def _validate_scalar_literal( + value: astx.Literal, + element_type: astx.DataType, + *, + context: str, +) -> None: + """ + title: Validate one scalar literal against one tensor element type. + parameters: + value: + type: astx.Literal + element_type: + type: astx.DataType + context: + type: str + """ + if isinstance( + element_type, + (astx.Int8, astx.Int16, astx.Int32, astx.Int64), + ): + if isinstance( + value, + ( + astx.LiteralUTF8Char, + astx.LiteralInt8, + astx.LiteralInt16, + astx.LiteralInt32, + astx.LiteralInt64, + ), + ): + return + raise ValueError( + f"{context} expects integer elements compatible with " + f"{type(element_type).__name__}" + ) + + if isinstance(element_type, (astx.Float32, astx.Float64)): + if isinstance( + value, + ( + astx.LiteralInt8, + astx.LiteralInt16, + astx.LiteralInt32, + astx.LiteralInt64, + astx.LiteralFloat32, + astx.LiteralFloat64, + ), + ): + return + raise ValueError( + f"{context} expects numeric elements compatible with " + f"{type(element_type).__name__}" + ) + + raise ValueError("unsupported tensor element type") + + +def _zero_literal(element_type: astx.DataType) -> astx.Literal: + """ + title: Return one zero-value scalar literal for one tensor type. + parameters: + element_type: + type: astx.DataType + returns: + type: astx.Literal + """ + if isinstance(element_type, astx.Int8): + return astx.LiteralInt8(0) + if isinstance(element_type, astx.Int16): + return astx.LiteralInt16(0) + if isinstance(element_type, astx.Int32): + return astx.LiteralInt32(0) + if isinstance(element_type, astx.Int64): + return astx.LiteralInt64(0) + if isinstance(element_type, astx.Float32): + return astx.LiteralFloat32(0.0) + if isinstance(element_type, astx.Float64): + return astx.LiteralFloat64(0.0) + raise ValueError("unsupported tensor element type") + + +def _clone_scalar_literal(value: astx.Literal) -> astx.Literal: + """ + title: Clone one scalar literal so default values do not reuse one node. + parameters: + value: + type: astx.Literal + returns: + type: astx.Literal + """ + if isinstance(value, astx.LiteralInt8): + return astx.LiteralInt8(value.value) + if isinstance(value, astx.LiteralInt16): + return astx.LiteralInt16(value.value) + if isinstance(value, astx.LiteralInt32): + return astx.LiteralInt32(value.value) + if isinstance(value, astx.LiteralInt64): + return astx.LiteralInt64(value.value) + if isinstance(value, astx.LiteralFloat32): + return astx.LiteralFloat32(value.value) + if isinstance(value, astx.LiteralFloat64): + return astx.LiteralFloat64(value.value) + raise ValueError("unsupported tensor element type") + + +def _format_shape(shape: tuple[int, ...]) -> str: + """ + title: Render one tensor shape for user-facing diagnostics. + parameters: + shape: + type: tuple[int, Ellipsis] + returns: + type: str + """ + if not shape: + return "()" + return "(" + ", ".join(str(dim) for dim in shape) + ")" + + +__all__ = [ + "TensorBinding", + "attach_binding", + "binding_from_type", + "build_literal_from_literal", + "coerce_expression", + "default_value", + "infer_literal", + "is_tensor_type", + "literal_values", + "runtime_tensor_type", + "tensor_shape", + "tensor_type", +] diff --git a/packages/aix/src/aix/testing.py b/packages/aix/src/aix/testing.py new file mode 100644 index 0000000..771af43 --- /dev/null +++ b/packages/aix/src/aix/testing.py @@ -0,0 +1,50 @@ +""" +title: Minimal AIX compiled-test placeholders. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +DEFAULT_TEST_PATHS: tuple[str, ...] = ("tests/aix",) +DEFAULT_TEST_FILE_PATTERN = "test_*.aix" +DEFAULT_TEST_FUNCTION_PATTERN = "test_*" + + +@dataclass(frozen=True) +class AixTestSummary: + """ + title: Summary returned by the placeholder test runner. + attributes: + exit_code: + type: int + """ + + exit_code: int + + +class AixTestRunner: + """ + title: Placeholder for future compiled AIX test execution. + """ + + def __init__(self, **kwargs: object) -> None: + """ + title: Store runner options. + parameters: + kwargs: + type: object + variadic: keyword + """ + self.kwargs = kwargs + + def run(self) -> AixTestSummary: + """ + title: Return an unimplemented-test summary. + returns: + type: AixTestSummary + """ + return AixTestSummary(exit_code=2) + + +ArxTestRunner = AixTestRunner diff --git a/packages/aix/tests/aix/test_hello.aix b/packages/aix/tests/aix/test_hello.aix new file mode 100644 index 0000000..cecc4a5 --- /dev/null +++ b/packages/aix/tests/aix/test_hello.aix @@ -0,0 +1,3 @@ +∴ main ⟦⟧ → ∅ + ⟣ "test hello" +∎ diff --git a/packages/aix/tests/python/conftest.py b/packages/aix/tests/python/conftest.py new file mode 100644 index 0000000..e224aef --- /dev/null +++ b/packages/aix/tests/python/conftest.py @@ -0,0 +1,19 @@ +""" +title: AIX test import path setup. +""" + +from __future__ import annotations + +import sys + +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[4] +for path in ( + ROOT / "packages" / "aix" / "src", + ROOT / "packages" / "astx" / "src", + ROOT / "packages" / "irx" / "src", +): + text = str(path) + if text not in sys.path: + sys.path.insert(0, text) diff --git a/packages/aix/tests/python/test_aix_cli.py b/packages/aix/tests/python/test_aix_cli.py new file mode 100644 index 0000000..6882d77 --- /dev/null +++ b/packages/aix/tests/python/test_aix_cli.py @@ -0,0 +1,36 @@ +""" +title: AIX CLI tests. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from aix.cli import app + +EXAMPLES = Path(__file__).resolve().parents[2] / "examples" + + +def test_cli_help(capsys: pytest.CaptureFixture[str]) -> None: + with pytest.raises(SystemExit) as excinfo: + app(["--help"]) + assert excinfo.value.code == 0 + assert "AIX compiler frontend" in capsys.readouterr().out + + +def test_cli_show_tokens(capsys: pytest.CaptureFixture[str]) -> None: + app(["--show-tokens", str(EXAMPLES / "hello.aix")]) + assert "define" in capsys.readouterr().out + + +def test_cli_show_ast(capsys: pytest.CaptureFixture[str]) -> None: + app(["--show-ast", str(EXAMPLES / "hello.aix")]) + assert "main" in capsys.readouterr().out + + +def test_cli_test_list(capsys: pytest.CaptureFixture[str]) -> None: + tests_dir = Path(__file__).resolve().parents[1] / "aix" + app(["test", "--list", str(tests_dir)]) + assert "test_hello.aix" in capsys.readouterr().out diff --git a/packages/aix/tests/python/test_aix_lexer.py b/packages/aix/tests/python/test_aix_lexer.py new file mode 100644 index 0000000..3007495 --- /dev/null +++ b/packages/aix/tests/python/test_aix_lexer.py @@ -0,0 +1,73 @@ +# ruff: noqa: RUF001 +""" +title: AIX lexer tests. +""" + +from __future__ import annotations + +import pytest + +from aix.lexer import Lexer, LexerError, TokenKind + + +def kinds(source: str) -> list[TokenKind]: + """ + title: Return token kinds excluding EOF. + parameters: + source: + type: str + returns: + type: list[TokenKind] + """ + return [token.kind for token in Lexer(source).tokenize().tokens[:-1]] + + +def test_lex_hello() -> None: + source = '∴ main ⟦⟧ → ∅\n ⟣ "hello"\n∎' + assert kinds(source) == [ + TokenKind.define, + TokenKind.identifier, + TokenKind.semantic_lbracket, + TokenKind.semantic_rbracket, + TokenKind.arrow, + TokenKind.unit, + TokenKind.emit, + TokenKind.string, + TokenKind.end, + ] + + +def test_lex_compact_fibonacci() -> None: + source = "∴fib⟦n:ℕ⟧→ℕ{⊢n≤1⇒n;⊢fib⟦n-1⟧+fib⟦n-2⟧}" + assert TokenKind.define in kinds(source) + assert TokenKind.turnstile in kinds(source) + assert TokenKind.implies in kinds(source) + assert TokenKind.less_equal in kinds(source) + + +def test_lex_unicode_types_and_booleans() -> None: + tokens = Lexer("ℕ ℤ ℝ ℂ 𝔹 ∅ ⊤ ⊥ true false").tokenize().tokens + assert [token.kind for token in tokens[:-1]] == [ + TokenKind.primitive_type, + TokenKind.primitive_type, + TokenKind.primitive_type, + TokenKind.primitive_type, + TokenKind.primitive_type, + TokenKind.unit, + TokenKind.boolean, + TokenKind.boolean, + TokenKind.boolean, + TokenKind.boolean, + ] + + +def test_lex_comments_and_unicode_identifiers() -> None: + tokens = Lexer("∴ μέain ⟦⟧ → ∅ ⍝ ignored\n∎").tokenize().tokens + values = [token.value for token in tokens] + assert "μέain" in values + assert "ignored" not in values + + +def test_reject_unknown_glyph() -> None: + with pytest.raises(LexerError, match="unknown symbol"): + Lexer("☃").tokenize() diff --git a/packages/aix/tests/python/test_aix_parser.py b/packages/aix/tests/python/test_aix_parser.py new file mode 100644 index 0000000..c99b6b9 --- /dev/null +++ b/packages/aix/tests/python/test_aix_parser.py @@ -0,0 +1,90 @@ +# ruff: noqa: RUF001 +""" +title: AIX parser tests. +""" + +from __future__ import annotations + +import astx +import pytest + +from aix.exceptions import ParserException +from aix.lexer import Lexer +from aix.parser import Parser + + +def parse(source: str) -> astx.Module: + """ + title: Parse source into an AIX AST module. + parameters: + source: + type: str + returns: + type: astx.Module + """ + return Parser().parse(Lexer(source).tokenize()) + + +def test_parse_hello_program() -> None: + module = parse('∴ main ⟦⟧ → ∅\n ⟣ "hello"\n∎') + assert len(module.nodes) == 1 + function = module.nodes[0] + assert isinstance(function, astx.FunctionDef) + assert function.prototype.name == "main" + assert len(function.body.nodes) == 1 + + +def test_parse_pretty_fibonacci() -> None: + module = parse( + "∴ fib ⟦ n:ℕ ⟧ → ℕ\n ⊢ n ≤ 1 ⇒ n\n ⊢ fib⟦n - 1⟧ + fib⟦n - 2⟧\n∎" + ) + function = module.nodes[0] + assert isinstance(function, astx.FunctionDef) + assert function.prototype.name == "fib" + assert len(function.prototype.args.nodes) == 1 + assert len(function.body.nodes) == 2 + assert isinstance(function.body.nodes[0], astx.IfStmt) + assert isinstance(function.body.nodes[1], astx.FunctionReturn) + + +def test_parse_compact_fibonacci() -> None: + module = parse("∴fib⟦n:ℕ⟧→ℕ{⊢n≤1⇒n;⊢fib⟦n-1⟧+fib⟦n-2⟧}") + function = module.nodes[0] + assert isinstance(function, astx.FunctionDef) + assert len(function.body.nodes) == 2 + + +def test_parse_metadata_and_binding() -> None: + module = parse( + "κ⟦ι: hello.v1, χ: example⟧\n" + "∴ main ⟦⟧ → ∅\n" + " ⌁ answer:ℕ ≔ 42\n" + " ⟣ answer\n" + "∎" + ) + function = module.nodes[0] + assert isinstance(function, astx.FunctionDef) + assert isinstance(function.body.nodes[0], astx.VariableDeclaration) + + +def test_parse_constant_definition() -> None: + module = parse("∴ answer:ℕ ≔ 42 ∎") + assert isinstance(module.nodes[0], astx.VariableDeclaration) + + +def test_missing_end_error() -> None: + with pytest.raises(ParserException, match="missing block terminator"): + parse("∴ main ⟦⟧ → ∅ ⟣ 1") + + +def test_missing_parameter_type_error() -> None: + with pytest.raises(ParserException, match="expected ':'"): + parse("∴ id ⟦ value ⟧ → ℕ ⊢ value ∎") + + +def test_unsupported_reserved_operator_error() -> None: + with pytest.raises( + ParserException, + match="unsupported reserved operator '⍴'", + ): + parse("∴ main ⟦⟧ → ∅ ⟣ ⍴ 1 ∎") diff --git a/scripts/build.sh b/scripts/build.sh index bd633ad..384489d 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -5,6 +5,7 @@ packages=( packages/astx packages/irx packages/arx + packages/aix ) for package_dir in "${packages[@]}"; do diff --git a/scripts/publish.sh b/scripts/publish.sh index 1f5ee83..156df3b 100755 --- a/scripts/publish.sh +++ b/scripts/publish.sh @@ -5,6 +5,7 @@ packages=( packages/astx packages/irx packages/arx + packages/aix ) for package_dir in "${packages[@]}"; do