From a208ec6d9628fb0ac0b07b6e46348160cdd9e2f0 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 15:15:54 +0100 Subject: [PATCH 1/8] WIP --- mypyc/doc/index.rst | 1 + mypyc/doc/librt.rst | 2 ++ mypyc/doc/librt_strings.rst | 13 +++++++++++++ 3 files changed, 16 insertions(+) create mode 100644 mypyc/doc/librt_strings.rst diff --git a/mypyc/doc/index.rst b/mypyc/doc/index.rst index 85e7b2808b210..fe683c4188f20 100644 --- a/mypyc/doc/index.rst +++ b/mypyc/doc/index.rst @@ -33,6 +33,7 @@ generate fast code. librt librt_base64 + librt_strings librt_time .. toctree:: diff --git a/mypyc/doc/librt.rst b/mypyc/doc/librt.rst index e418b5356ff61..4f70d9586dced 100644 --- a/mypyc/doc/librt.rst +++ b/mypyc/doc/librt.rst @@ -26,6 +26,8 @@ Follow submodule links in the table to a detailed description of each submodule. - Description * - :doc:`librt.base64 ` - Fast Base64 encoding and decoding + * - :doc:`librt.strings ` + - String and bytes utilities * - :doc:`librt.time ` - Time utilities diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst new file mode 100644 index 0000000000000..9ecae3dae4f25 --- /dev/null +++ b/mypyc/doc/librt_strings.rst @@ -0,0 +1,13 @@ +.. _librt-strings: + +librt.strings +============= + +The ``librt.strings`` module is part of the ``librt`` package on PyPI, and it includes +string and bytes utilities. + +Classes +------- + +Functions +--------- From caedcefc9c7e1be8ddd3868feb677eff747de85e Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 15:20:21 +0100 Subject: [PATCH 2/8] Add documentation placeholders --- mypyc/doc/librt_strings.rst | 142 ++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst index 9ecae3dae4f25..5e63edf984724 100644 --- a/mypyc/doc/librt_strings.rst +++ b/mypyc/doc/librt_strings.rst @@ -9,5 +9,147 @@ string and bytes utilities. Classes ------- +BytesWriter +^^^^^^^^^^^ + +.. class:: BytesWriter + + TODO + + .. method:: append(x: int, /) -> None + + TODO + + .. method:: write(b: bytes | bytearray, /) -> None + + TODO + + .. method:: getvalue() -> bytes + + TODO + + .. method:: truncate(size: i64, /) -> None + + TODO + + .. describe:: len(writer) + + TODO + + .. describe:: writer[i] + + TODO + + .. describe:: writer[i] = x + + TODO + +StringWriter +^^^^^^^^^^^^ + +.. class:: StringWriter + + TODO + + .. method:: append(x: int, /) -> None + + TODO + + .. method:: write(s: str, /) -> None + + TODO + + .. method:: getvalue() -> str + + TODO + + .. describe:: len(writer) + + TODO + + .. describe:: writer[i] + + TODO + Functions --------- + +.. function:: write_i16_le(b: BytesWriter, n: i16, /) -> None + + TODO + +.. function:: write_i16_be(b: BytesWriter, n: i16, /) -> None + + TODO + +.. function:: read_i16_le(b: bytes, index: i64, /) -> i16 + + TODO + +.. function:: read_i16_be(b: bytes, index: i64, /) -> i16 + + TODO + +.. function:: write_i32_le(b: BytesWriter, n: i32, /) -> None + + TODO + +.. function:: write_i32_be(b: BytesWriter, n: i32, /) -> None + + TODO + +.. function:: read_i32_le(b: bytes, index: i64, /) -> i32 + + TODO + +.. function:: read_i32_be(b: bytes, index: i64, /) -> i32 + + TODO + +.. function:: write_i64_le(b: BytesWriter, n: i64, /) -> None + + TODO + +.. function:: write_i64_be(b: BytesWriter, n: i64, /) -> None + + TODO + +.. function:: read_i64_le(b: bytes, index: i64, /) -> i64 + + TODO + +.. function:: read_i64_be(b: bytes, index: i64, /) -> i64 + + TODO + +.. function:: write_f32_le(b: BytesWriter, n: float, /) -> None + + TODO + +.. function:: write_f32_be(b: BytesWriter, n: float, /) -> None + + TODO + +.. function:: read_f32_le(b: bytes, index: i64, /) -> float + + TODO + +.. function:: read_f32_be(b: bytes, index: i64, /) -> float + + TODO + +.. function:: write_f64_le(b: BytesWriter, n: float, /) -> None + + TODO + +.. function:: write_f64_be(b: BytesWriter, n: float, /) -> None + + TODO + +.. function:: read_f64_le(b: bytes, index: i64, /) -> float + + TODO + +.. function:: read_f64_be(b: bytes, index: i64, /) -> float + + TODO From 659327526ea5e80fd03cafe2f8d5a4ad067c895f Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 15:58:57 +0100 Subject: [PATCH 3/8] Various improvements --- mypyc/doc/conf.py | 8 +++- mypyc/doc/librt_strings.rst | 83 +++++++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/mypyc/doc/conf.py b/mypyc/doc/conf.py index fdd98c12a221d..90bca76a202fe 100644 --- a/mypyc/doc/conf.py +++ b/mypyc/doc/conf.py @@ -36,7 +36,13 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions: list[str] = [] +extensions = [ + "sphinx.ext.intersphinx", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst index 5e63edf984724..0653774e4a0e0 100644 --- a/mypyc/doc/librt_strings.rst +++ b/mypyc/doc/librt_strings.rst @@ -4,7 +4,7 @@ librt.strings ============= The ``librt.strings`` module is part of the ``librt`` package on PyPI, and it includes -string and bytes utilities. +low-level string and bytes utilities. Classes ------- @@ -14,81 +14,118 @@ BytesWriter .. class:: BytesWriter - TODO + This class can be used to efficiently construct a bytes object from individual byte values + and from bytes or bytearray objects. It also provides some operations for accessing + and modifying items, but it doesn't support the full sequence interface. + + This can be used as a faster replacement for :py:class:`io.BytesIO` or :py:class:`bytearray` in + compiled code. This is also usually faster than constructing a list of bytes objects and using + the :meth:`bytes.join` method to concatenate them. .. method:: append(x: int, /) -> None - TODO + Append a byte to contents. .. method:: write(b: bytes | bytearray, /) -> None - TODO + Append a bytes or bytearray object to contents. .. method:: getvalue() -> bytes - TODO + Return the contents as a bytes object. .. method:: truncate(size: i64, /) -> None - TODO + Truncate the length of the contents to the given size. - .. describe:: len(writer) + .. describe:: len(writer) → i64 - TODO + Return the length of the contents. - .. describe:: writer[i] + .. describe:: writer[i] → u8 - TODO + Return the byte at a specific index. The index can be negative. .. describe:: writer[i] = x - TODO + Set a byte at a specific index. The index can be negative. StringWriter ^^^^^^^^^^^^ .. class:: StringWriter - TODO + This class can be used to efficiently construct a string object from individual Unicode code + point integer values and from string objects. It also provides some operations for accessing + items, but it doesn't support the full sequence interface. + + ``StringWriter`` can be used as a faster replacement for :py:class:`io.StringIO` in + compiled code. This is also usually faster than constructing a list of str objects and using + the :meth:`str.join` method to concatenate them. + + If you construct a string from individual characters or code points, using integer values + can be much faster than using 1-length strings. You can rely on expressions like ``ord("x")`` + being treated as compile-time integer constants in compiled code. Also ``ord(s[i])`` is + guaranteed to be a very quick operation in compiled code, if ``s`` has type :py:class:`str`. .. method:: append(x: int, /) -> None - TODO + Append a Unicode code point (often representing a character) to the contents. .. method:: write(s: str, /) -> None - TODO + Append a string to contents. .. method:: getvalue() -> str - TODO + Return the contents as a string. - .. describe:: len(writer) + .. describe:: len(writer) → i64 - TODO + Return the length of the contents (number of code points). - .. describe:: writer[i] + .. describe:: writer[i] → i32 - TODO + Return the Unicode code point at a specific index as an integer. The index can be negative. Functions --------- +The ``write_*`` and ``read_*`` functions allow interpreting bytes as packed binary +data. They can be used as (much) more efficient but lower-level alternatives to the +stdlib :mod:`struct` module in compiled code. + +There are no functions for reading or writing individual bytes. ``BytesWriter.append`` can +be used to insert a byte value, and ``b[n]`` can be used to read a byte value. Both +are fast operations in compiled code. + +This example writes two binary values and reads them afterwards:: + + def example() -> None: + b = BytesWriter() + write_i32_le(b, 123) + write_f64_le(b, 4.5) + data = b.getvalue() + + x = read_i32_le(data, 0) + y = read_f64_le(data, 4) + ... + .. function:: write_i16_le(b: BytesWriter, n: i16, /) -> None - TODO + Append a 16-bit integer as a little-endian binary value. .. function:: write_i16_be(b: BytesWriter, n: i16, /) -> None - TODO + Append a 16-bit integer as a big-endian binary value. .. function:: read_i16_le(b: bytes, index: i64, /) -> i16 - TODO + Read a 16-bit integer value starting at the given index as a little-endian binary value. .. function:: read_i16_be(b: bytes, index: i64, /) -> i16 - TODO + Read a 16-bit integer value starting at the given index as a big-endian binary value. .. function:: write_i32_le(b: BytesWriter, n: i32, /) -> None From c8de2d301075a329530eecac896e0966b106ad68 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 16:36:54 +0100 Subject: [PATCH 4/8] Add missing descriptions --- mypyc/doc/librt_strings.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst index 0653774e4a0e0..bc08056dc9a27 100644 --- a/mypyc/doc/librt_strings.rst +++ b/mypyc/doc/librt_strings.rst @@ -129,64 +129,64 @@ This example writes two binary values and reads them afterwards:: .. function:: write_i32_le(b: BytesWriter, n: i32, /) -> None - TODO + Append a 32-bit integer as a little-endian binary value. .. function:: write_i32_be(b: BytesWriter, n: i32, /) -> None - TODO + Append a 32-bit integer as a big-endian binary value. .. function:: read_i32_le(b: bytes, index: i64, /) -> i32 - TODO + Read a 32-bit integer value starting at the given index as a little-endian binary value. .. function:: read_i32_be(b: bytes, index: i64, /) -> i32 - TODO + Read a 32-bit integer value starting at the given index as a big-endian binary value. .. function:: write_i64_le(b: BytesWriter, n: i64, /) -> None - TODO + Append a 64-bit integer as a little-endian binary value. .. function:: write_i64_be(b: BytesWriter, n: i64, /) -> None - TODO + Append a 64-bit integer as a big-endian binary value. .. function:: read_i64_le(b: bytes, index: i64, /) -> i64 - TODO + Read a 64-bit integer value starting at the given index as a little-endian binary value. .. function:: read_i64_be(b: bytes, index: i64, /) -> i64 - TODO + Read a 64-bit integer value starting at the given index as a big-endian binary value. .. function:: write_f32_le(b: BytesWriter, n: float, /) -> None - TODO + Append a 32-bit floating-point value as a little-endian binary value. .. function:: write_f32_be(b: BytesWriter, n: float, /) -> None - TODO + Append a 32-bit floating-point value as a big-endian binary value. .. function:: read_f32_le(b: bytes, index: i64, /) -> float - TODO + Read a 32-bit floating-point value starting at the given index as a little-endian binary value. .. function:: read_f32_be(b: bytes, index: i64, /) -> float - TODO + Read a 32-bit floating-point value starting at the given index as a big-endian binary value. .. function:: write_f64_le(b: BytesWriter, n: float, /) -> None - TODO + Append a 64-bit floating-point value as a little-endian binary value. .. function:: write_f64_be(b: BytesWriter, n: float, /) -> None - TODO + Append a 64-bit floating-point value as a big-endian binary value. .. function:: read_f64_le(b: bytes, index: i64, /) -> float - TODO + Read a 64-bit floating-point value starting at the given index as a little-endian binary value. .. function:: read_f64_be(b: bytes, index: i64, /) -> float - TODO + Read a 64-bit floating-point value starting at the given index as a big-endian binary value. From 5c7b33f9e16e73fc5878934065377411353fc63c Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 16:43:45 +0100 Subject: [PATCH 5/8] Minor updates --- mypyc/doc/librt.rst | 2 +- mypyc/doc/librt_strings.rst | 30 ++++++++++++++++++++---------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/mypyc/doc/librt.rst b/mypyc/doc/librt.rst index 4f70d9586dced..f18cc93c80294 100644 --- a/mypyc/doc/librt.rst +++ b/mypyc/doc/librt.rst @@ -3,7 +3,7 @@ Librt overview ============== -The `librt `_ package defines fast +The `librt `__ package defines fast primitive operations that are optimized for code compiled using mypyc. It has carefully selected efficient alternatives for certain Python standard library features. diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst index bc08056dc9a27..ae1750d8aa1fa 100644 --- a/mypyc/doc/librt_strings.rst +++ b/mypyc/doc/librt_strings.rst @@ -121,11 +121,13 @@ This example writes two binary values and reads them afterwards:: .. function:: read_i16_le(b: bytes, index: i64, /) -> i16 - Read a 16-bit integer value starting at the given index as a little-endian binary value. + Read a 16-bit integer value starting at the given index as a little-endian binary value + (2 bytes). .. function:: read_i16_be(b: bytes, index: i64, /) -> i16 - Read a 16-bit integer value starting at the given index as a big-endian binary value. + Read a 16-bit integer value starting at the given index as a big-endian binary value + (2 bytes). .. function:: write_i32_le(b: BytesWriter, n: i32, /) -> None @@ -137,11 +139,13 @@ This example writes two binary values and reads them afterwards:: .. function:: read_i32_le(b: bytes, index: i64, /) -> i32 - Read a 32-bit integer value starting at the given index as a little-endian binary value. + Read a 32-bit integer value starting at the given index as a little-endian binary value + (4 bytes). .. function:: read_i32_be(b: bytes, index: i64, /) -> i32 - Read a 32-bit integer value starting at the given index as a big-endian binary value. + Read a 32-bit integer value starting at the given index as a big-endian binary value + (4 bytes). .. function:: write_i64_le(b: BytesWriter, n: i64, /) -> None @@ -153,11 +157,13 @@ This example writes two binary values and reads them afterwards:: .. function:: read_i64_le(b: bytes, index: i64, /) -> i64 - Read a 64-bit integer value starting at the given index as a little-endian binary value. + Read a 64-bit integer value starting at the given index as a little-endian binary value + (8 bytes). .. function:: read_i64_be(b: bytes, index: i64, /) -> i64 - Read a 64-bit integer value starting at the given index as a big-endian binary value. + Read a 64-bit integer value starting at the given index as a big-endian binary value + (8 bytes). .. function:: write_f32_le(b: BytesWriter, n: float, /) -> None @@ -169,11 +175,13 @@ This example writes two binary values and reads them afterwards:: .. function:: read_f32_le(b: bytes, index: i64, /) -> float - Read a 32-bit floating-point value starting at the given index as a little-endian binary value. + Read a 32-bit floating-point value starting at the given index as a little-endian binary value + (4 bytes). .. function:: read_f32_be(b: bytes, index: i64, /) -> float - Read a 32-bit floating-point value starting at the given index as a big-endian binary value. + Read a 32-bit floating-point value starting at the given index as a big-endian binary value + (4 bytes). .. function:: write_f64_le(b: BytesWriter, n: float, /) -> None @@ -185,8 +193,10 @@ This example writes two binary values and reads them afterwards:: .. function:: read_f64_le(b: bytes, index: i64, /) -> float - Read a 64-bit floating-point value starting at the given index as a little-endian binary value. + Read a 64-bit floating-point value starting at the given index as a little-endian binary value + (8 bytes). .. function:: read_f64_be(b: bytes, index: i64, /) -> float - Read a 64-bit floating-point value starting at the given index as a big-endian binary value. + Read a 64-bit floating-point value starting at the given index as a big-endian binary value + (8 bytes). From d6713d66c469d136475de1820f33da01f219e314 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 16:47:33 +0100 Subject: [PATCH 6/8] Also document ord('x') in str_operations.rst --- mypyc/doc/str_operations.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mypyc/doc/str_operations.rst b/mypyc/doc/str_operations.rst index 443e90a71bc7a..ad12fef94d3aa 100644 --- a/mypyc/doc/str_operations.rst +++ b/mypyc/doc/str_operations.rst @@ -89,3 +89,6 @@ Functions * ``len(s: str)`` * ``ord(s: str)`` + + * Calls with a literal argument are treated as compile-time integer constants (e.g. + ``ord("A")`` is equivalent to 65). From 2bf1ce2646f52380856617be39dbac61ae6081d1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Apr 2026 15:53:23 +0000 Subject: [PATCH 7/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/doc/conf.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mypyc/doc/conf.py b/mypyc/doc/conf.py index 90bca76a202fe..e6b2bfaf77b76 100644 --- a/mypyc/doc/conf.py +++ b/mypyc/doc/conf.py @@ -36,13 +36,9 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ - "sphinx.ext.intersphinx", -] +extensions = ["sphinx.ext.intersphinx"] -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), -} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From 2afacbaaa02d190ece1373b3bd1dbd796470c5c1 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 29 Apr 2026 16:57:59 +0100 Subject: [PATCH 8/8] Add cross reference in str_operations.rst --- mypyc/doc/librt_strings.rst | 2 ++ mypyc/doc/str_operations.rst | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/mypyc/doc/librt_strings.rst b/mypyc/doc/librt_strings.rst index ae1750d8aa1fa..1213f39623d61 100644 --- a/mypyc/doc/librt_strings.rst +++ b/mypyc/doc/librt_strings.rst @@ -50,6 +50,8 @@ BytesWriter Set a byte at a specific index. The index can be negative. +.. _librt-string-writer: + StringWriter ^^^^^^^^^^^^ diff --git a/mypyc/doc/str_operations.rst b/mypyc/doc/str_operations.rst index ad12fef94d3aa..8f2daba02b72d 100644 --- a/mypyc/doc/str_operations.rst +++ b/mypyc/doc/str_operations.rst @@ -15,6 +15,11 @@ Construction * ``repr(x: int)`` * ``repr(x: object)`` +.. note:: + + :ref:`librt.strings.StringWriter ` can be used to efficiently + construct strings in compiled code. + Operators ---------