diff --git a/README.md b/README.md index 503cd82..8d92d52 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ The ags module facilitates the serialization and deserialization of arbitrary Python objects to a supported backend format, currently JSON, YAML, and the -home grown UCSL. It does so by recursively lowering the objects into the domain +home grown UCSL. It does so by recursively converting the objects to the domain of the backend -- primitive types such as integers, strings and lists -- before passing them on for serialization. Conversely, after deserialization the primitive types are converted back from primitive types to the actual objects. @@ -65,170 +65,99 @@ the relevant submodules (`ags.json` and `ags.yaml`) that take a file-like argument instead. These modules also provide `dumps` and `loads` functions for serialization to/from a string. -## Operation +## Serialization backends -AGS serialization consists of three steps: +AGS currently supports three serialization backends: JSON, YAML and UCSL. Here +we specify how these backends handle type annotations `str`, `int`, `float`, +`complex`, `bool`, `datetime.date`, `datetime.time`, `datetime.datetime`, +`list[T]`, `tuple[T, ...]`, `tuple[T1, T2, T3]`, `dict[K, V]`, +`typing.Optional[T]` or `T|None`, and `typing.Union[T1, T2, T3]` or `T1|T2|T3`: -### Step 1: Lower Python objects to AGS primitives. - -Python objects with a supported type annotation get recursively converted -("lowered") to one of a finite set of AGS primitives. The list of AGS -primitives is: boolean, integer, float, complex, string, bytes, list, -dictionary, date, time, datetime, a primitive for `Optional` annotations and a -primitive for `Union` annotations. - -These are the Python type annotations that are currently supported, and the AGS -primitives that they are converted into. Note that these choices are common to -all the backends that follow. - -- `int`, `float`, `bool`, `complex`, `str`, `bytes`, `date`, `time`, `datetime` - - These types are passed through as-is. - -- `typing.Literal` - - Literal instances of the above primitive types are also passed through unchanged. - -- `typing.Optional` or `T|None` - - Optional data is returned as the optional primitive. - -- `typing.Union` or `T1|T2|T3` - - Unions of multiple types are lowered to a union primitive holding the name - of the type and the value the lowered object. - - Here we run into an issue if any of the options has no `__name__` attribute, - or its value is not descriptive enough ("int", "str") for the purposes of a - data format. This can be solved by annotating the type, e.g, - `typing.Annotated[int, "amount"]`. Note that while this convention is AGS - specific (something we aim to avoid), annotations of this type can be added - freely and do not interfere with static type checkers. - - Note that unions are the main reason that type annotations must be provided - to the `dump` function, as without it the mapping would be strictly - injective from object space. - -- `list`, `tuple` - - Lists and tuples are lowered to lists of their lowered items. Item type - annotations (e.g. `list[int]`) are required. Both uniform tuples (`tuple[int, - ...]`) and fixed length tuples (`tuple[int, str]`) are supported. - -- `dict` - - Dictionaries are lowered to dictionaries with their keys and values - lowered. Annotations for both key and value are mandatory. - -- `dataclasses.dataclass` - - Data classes are lowered to dictionaries based on their attribute - annotations. - -- `enum.Enum` - - Enum objects are identified by their name and lowered to a string. - -- `inspect.Signature` - - A function signature will not generally be used as a type annotation, but it - is supported for dumping and loading of an `inspect.BoundArguments` instance. - This is a convenient way of lowering all arguments of a function to and - from a dictionary. - -- objects that reduce to a single constructor argument (Python >= 3.11) - - Lastly, objects that reduce to their own class and a single argument are - identified by that argument. - - ```python - >>> from typing import Self - >>> - >>> class MyClass: - ... def __init__(self, s: str): - ... self.s = s - ... def __reduce__(self) -> tuple[type[Self], tuple[str]]: - ... return MyClass, (self.s,) - >>> - >>> ags.json.dumps(MyClass("foo"), MyClass) - '"foo"\n' - - ``` - - The particular return annotation for the `__reduce__` method is required for - AGS to recognize this structure. - -### Step 2: Lower AGS primitives to the domain of the backend - -Different backends support different data types. For instance, YAML supports -date objects whereas JSON does not. Depending on the selected backend the -object resulting from step 1 therefore needs to be lowered further in a -preprocessing step before it can be passed on to the serialisation routine. - -Here we list the conversion choices that AGS makes for each of the supported -backends. - -#### JSON +### JSON The JavaScript Object Notation file format defines the primitives number, string, boolean, array of primitives, a mapping from a string to a primitive, and null. -- integer, float, boolean, string, list, dictionary +- `str`, `int`, `float`, `bool` - These types are passed through as-is. + Passed through. -- complex +- `complex` Complex numbers without an imaginary part are converted to a float, the rest to a string in Python notation (e.g. 1+2j). -- bytes +- `bytes` Bytes are converted to a base85 encoded string. Alternatively, if the bytes sequence matches an encoded unicode string, then this string prefixed with the encoding and a colon (like utf8:) is also valid (the colon is excluded from the base85 character set). -- optional +- `datetime.date`, `datetime.time`, `datetime.datetime` - Optional values are returned as either the value or null. + Date objects are converted to a string in ISO 8601 format. -- union +- `list[T]`, `tuple[T, ...]`, `tuple[T1, T2, T3]` - Unions of multiple types are converted to a single item dictionary, where the - key is the name of the type and the value the object. + Lists and tuples are converted to lists of their converted items. Item type + annotations (e.g. `list[int]`) are required. Both uniform tuples (`tuple[int, + ...]`) and fixed length tuples (`tuple[int, str]`) are supported. -- date, time, datetime +- `dict[K, V]` - Date objects are converted to a string in ISO 8601 format. + Dictionaries are converted to dictionaries with their keys and values + converted. Annotations for both key and value are mandatory. + +- `typing.Optional[T]` or `T|None` + + Optional values are converted according to the above rules for `T` or set to + null if left undefined. + +- `typing.Union[T1, T2, T3]` or `T1|T2|T3` -#### YAML + Unions of multiple types are converted to a single item dictionary, where the + key is the name of the type and the value the object. + +### YAML The YAML Ain’t Markup Language is a superset of JSON as of version 1.2, which -means a JSON file is valid YAML, but not any YAML file is valid JSON. YAML -notably adds the date and binary primitives, which AGS supports by passing -them unchanged. +means a JSON file is valid YAML, but every any YAML file is valid JSON. YAML +notably adds the date and binary primitives, which AGS supports by passing them +unchanged. Other Python objects are converted identically to the JSON backend: -- integer, float, boolean, string, list, dictionary, bytes, date, time, datetime +- `str`, `int`, `float`, `bool`, `bytes`, `datetime.date`, `datetime.time`, `datetime.datetime` - These types are passed through as-is. + Passed through. -- complex +- `complex` Complex numbers without an imaginary part are converted to a float, the rest to a string in Python notation (e.g. 1+2j). -- optional +- `list[T]`, `tuple[T, ...]`, `tuple[T1, T2, T3]` + + Lists and tuples are converted to lists of their converted items. Item type + annotations (e.g. `list[int]`) are required. Both uniform tuples (`tuple[int, + ...]`) and fixed length tuples (`tuple[int, str]`) are supported. + +- `dict[K, V]` - Optional values are returned as either the value or null. + Dictionaries are converted to dictionaries with their keys and values + converted. Annotations for both key and value are mandatory. + +- `typing.Optional` or `T|None` -- union + Optional values are converted according to the above rules or set to null if + left undefined. + +- `typing.Union` or `T1|T2|T3` Unions of multiple types are converted to a single item dictionary, where the key is the name of the type and the value the object. -#### UCSL +### UCSL The Ultra Compact Serialisation Language is a custom language specifically designed for the AGS concept. This is an evolution of the @@ -242,20 +171,35 @@ float, a complex number, or even a single item list of any of the above. There are no special characters to be escaped, but nested structures may be enclosed in square brackets to distinguish inner and outer separation characters. -- integer, float, complex, string +- `str` + + Passed through. + +- `int`, `float`, `complex` These types are converted according to the Python string representation. -- boolean +- `bool` Boolean values are converted to the strings "true" or "false", lowercase, even though the capitalized versions and also "yes" and "no" are all supported in deserialization. -- list +- `bytes` + + Bytes are converted to a base85 encoded string. Alternatively, if the bytes + sequence matches an encoded unicode string, then this string prefixed with + the encoding and a colon (like utf8:) is also valid (the colon is excluded + from the base85 character set). + +- `datetime.date`, `datetime.time`, `datetime.datetime` + + Date objects are converted to a string in ISO 8601 format. + +- `list[T]`, `tuple[T, ...]`, `tuple[T1, T2, T3]` - List items are comma joined. Any item that contains a comma in lowered form - is enclosed in square brackets. + List items are comma joined. Any item that contains a comma in UCSL form is + enclosed in square brackets. ```python >>> ags.ucsl.dumps([["foo"], ["bar", "baz"]], list[list[str]]) @@ -272,7 +216,7 @@ in square brackets to distinguish inner and outer separation characters. ``` -- dictionary +- `dict[K, V]` Dictionary items are comma joined, and the key and value equals joined. Any key that contains a comma or equals is enclosed in square brackets; so is any @@ -284,19 +228,12 @@ in square brackets to distinguish inner and outer separation characters. ``` -- bytes - - Bytes are converted to a base85 encoded string. Alternatively, if the bytes - sequence matches an encoded unicode string, then this string prefixed with - the encoding and a colon (like utf8:) is also valid (the colon is excluded - from the base85 character set). - -- optional +- `typing.Optional` or `T|None` An undefined optional value is represented by a single dash (-). Defined - optional values are enclosed in brackets only if they lower to a dash. + optional values are enclosed in brackets only if they convert to a dash. -- union +- `typing.Union` or `T1|T2|T3` Unions of multiple types are converted to the name of the type followed by the object enclosed in square brackets. E.g., from the introduction: @@ -307,17 +244,70 @@ in square brackets to distinguish inner and outer separation characters. ``` -- date, time, datetime +## Other object types - Date objects are converted to a string in ISO 8601 format. +In addition to the primitive object types mentioned above, the following +objects are supported by first converting them to a primitive. As such, these +rules are common to all backends: -### Step 3: Serialization +- `dataclasses.dataclass` + + Data classes are converted to dictionaries based on their attribute + annotations. + +- `enum.Enum` + + Enum objects are identified by their name and converted to a string. -This is simply a matter of passing on the lowered object to the relevant -serialization routine. Note that the JSON routines are part of Python's -included batteries, whereas YAML requires installation of the external PyYAML -module. The UCSL backend doesn't require serialization as its preprocessor -already lowers everything down to a string. +- `inspect.Signature` -Finally, deserialization consists of running to inverse operations of the three -steps in opposite order. + A function signature will not generally be used as a type annotation, but it + is supported for dumping and loading of an `inspect.BoundArguments` instance. + This is a convenient way of converting all arguments of a function to and + from a dictionary. + +- objects that define `__into_ags__` and `__from_ags__` + + Any object can make itself suitable for AGS by defining two special methods + that transform the object into and out of an alternative form which AGS does + know how to handle. Here is an example of a custom class that is serilialized + as a string: + + ```python + >>> from typing import Self + >>> + >>> class MyClass: + ... def __init__(self, s: str): + ... self.s = s + ... def __into_ags__(self) -> str: + ... return self.s + ... @classmethod + ... def __from_ags__(cls, s: str): + ... return cls(self.s) + >>> + >>> ags.json.dumps(MyClass("foo"), MyClass) + '"foo"\n' + + ``` + +- objects that reduce to a single constructor argument (Python >= 3.11) + + Lastly, objects that reduce to their own class and a single argument are + identified by that argument. This is very similar to the `__into_ags__` + method except that this also affects the way the object is pickled. It also + requires a particular annotation for AGS to recognize the pattern, which is + only available as of Python 3.11: + + ```python + >>> from typing import Self + >>> + >>> class MyClass: + ... def __init__(self, s: str): + ... self.s = s + ... def __reduce__(self) -> tuple[type[Self], tuple[str]]: + ... return MyClass, (self.s,) + >>> + >>> ags.json.dumps(MyClass("foo"), MyClass) + '"foo"\n' + + ``` diff --git a/ags/__init__.py b/ags/__init__.py index 2917f54..e75a2e5 100644 --- a/ags/__init__.py +++ b/ags/__init__.py @@ -4,7 +4,7 @@ def _get_backend_for(path): if path.endswith(".json"): from . import json as backend - elif path.endswith(".yml"): + elif path.endswith(".yml") or path.endswith(".yaml"): from . import yaml as backend else: raise ValueError(f"unrecognized file format for path {path!r}") diff --git a/ags/_mapping.py b/ags/_mapping.py index dd066f9..43cc8be 100644 --- a/ags/_mapping.py +++ b/ags/_mapping.py @@ -175,10 +175,8 @@ def mapping_for(T) -> Mapping: (annotation,) = typing.get_args(args) return Reduce(T, mapping_for(annotation)) - if hasattr(T, "__ags_lower__") and hasattr(T, "__ags_unlower__"): - # WARNING: this is an undocumented API that serves a transitionary - # purpose. It may be removed or changed at any point in future. - annotation = inspect.signature(T.__ags_lower__).return_annotation + if hasattr(T, "__into_ags__") and hasattr(T, "__from_ags__"): + annotation = inspect.signature(T.__into_ags__).return_annotation return AGSReduce(T, mapping_for(annotation)) raise ValueError(f"cannot find a mapping for type {T!r}") @@ -446,7 +444,7 @@ class AGSReduce: def lower(self, obj, inject): assert_isinstance(obj, self.T) - return self.mapping.lower(obj.__ags_lower__(), inject) + return self.mapping.lower(obj.__into_ags__(), inject) def unlower(self, obj, surject): - return self.T.__ags_unlower__(self.mapping.unlower(obj, surject)) + return self.T.__from_ags__(self.mapping.unlower(obj, surject)) diff --git a/test.py b/test.py index bd8071a..d14d392 100644 --- a/test.py +++ b/test.py @@ -175,11 +175,11 @@ class A: def __init__(self, x: int): self.x = x - def __ags_lower__(self) -> int: + def __into_ags__(self) -> int: return self.x @classmethod - def __ags_unlower__(cls, obj: int): + def __from_ags__(cls, obj: int): return cls(obj) def __eq__(self, other):