From d342861aac73d7526816691d8c1bee532dc41362 Mon Sep 17 00:00:00 2001 From: Jerry Date: Fri, 27 Feb 2026 19:06:26 -0800 Subject: [PATCH] Fix decode_array double-reading length bytes for arrays with 24+ items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom decode_array override in serialization.py called _decode_length to check for indefinite-length arrays, then delegated to the original decode_array which called _decode_length again. For arrays with fewer than 24 items, the length is encoded directly in the subtype (no stream bytes consumed), so the double call was harmless. For 24+ items, CBOR uses multi-byte length encoding (e.g. 98 18 for 24 items) and _decode_length reads from the stream — the second call consumed actual array content as a length byte, corrupting the decode. Replace the _decode_length call with a simple subtype == 31 check, which is sufficient to detect indefinite-length arrays without consuming any bytes from the stream. This bug only affected cbor2pure, not the cbor2 C extension. --- pycardano/serialization.py | 6 +++--- test/pycardano/test_serialization.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/pycardano/serialization.py b/pycardano/serialization.py index bc6e1cec..141342b9 100644 --- a/pycardano/serialization.py +++ b/pycardano/serialization.py @@ -196,9 +196,9 @@ def wrapper(cls, value: Primitive): def decode_array(self, subtype: int) -> Sequence[Any]: # Major tag 4 - length = self._decode_length(subtype, allow_indefinite=True) - - if length is None: + if subtype == 31: + # Indefinite length array — delegate to the original decoder, then wrap + # the result in IndefiniteFrozenList to preserve indefinite encoding. ret = IndefiniteFrozenList(list(self.decode_array(subtype=subtype))) ret.freeze() return ret diff --git a/test/pycardano/test_serialization.py b/test/pycardano/test_serialization.py index 256caf62..a4352f5c 100644 --- a/test/pycardano/test_serialization.py +++ b/test/pycardano/test_serialization.py @@ -1134,6 +1134,38 @@ class MyTest(ArrayCBORSerializable): assert isinstance(MyTest.from_cbor(a.to_cbor()).a, IndefiniteList) +def test_decode_array_with_24_or_more_items(): + """Test that definite-length arrays with 24+ items decode correctly. + + Regression test for a bug where the custom decode_array override called + _decode_length (consuming stream bytes), then delegated to the original + decode_array which called _decode_length again. For arrays with < 24 items + the length is encoded in the subtype itself (no extra bytes), so the double + call was harmless. For 24+ items, CBOR uses multi-byte length encoding + (e.g. 98 18 for 24 items) and the second _decode_length call consumed + actual array content, corrupting the stream. + """ + + @dataclass + class LargeDatum(PlutusData): + CONSTR_ID = 1 + data: List[bytes] + + hello = b"Hello world!" + + # Exactly 24 items — the threshold where CBOR switches to 2-byte length + datum24 = LargeDatum(data=[hello] * 24) + restored24 = LargeDatum.from_cbor(datum24.to_cbor()) + assert len(restored24.data) == 24 + assert all(x == hello for x in restored24.data) + + # 25 items — above the threshold + datum25 = LargeDatum(data=[hello] * 25) + restored25 = LargeDatum.from_cbor(datum25.to_cbor()) + assert len(restored25.data) == 25 + assert all(x == hello for x in restored25.data) + + def test_liqwid_tx(): with open("test/resources/cbors/liqwid.json") as f: cbor_hex = json.load(f).get("cborHex")