From d342861aac73d7526816691d8c1bee532dc41362 Mon Sep 17 00:00:00 2001
From: Jerry <jerrycgh@gmail.com>
Date: Fri, 27 Feb 2026 19:06:26 -0800
Subject: [PATCH] Fix decode_array double-reading length bytes for arrays with
 24+ items
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The custom decode_array override in serialization.py called _decode_length
to check for indefinite-length arrays, then delegated to the original
decode_array which called _decode_length again. For arrays with fewer than
24 items, the length is encoded directly in the subtype (no stream bytes
consumed), so the double call was harmless. For 24+ items, CBOR uses
multi-byte length encoding (e.g. 98 18 for 24 items) and _decode_length
reads from the stream — the second call consumed actual array content as
a length byte, corrupting the decode.

Replace the _decode_length call with a simple subtype == 31 check, which
is sufficient to detect indefinite-length arrays without consuming any
bytes from the stream.

This bug only affected cbor2pure, not the cbor2 C extension.
---
 pycardano/serialization.py           |  6 +++---
 test/pycardano/test_serialization.py | 32 ++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/pycardano/serialization.py b/pycardano/serialization.py
index bc6e1cec..141342b9 100644
--- a/pycardano/serialization.py
+++ b/pycardano/serialization.py
@@ -196,9 +196,9 @@ def wrapper(cls, value: Primitive):
 
 def decode_array(self, subtype: int) -> Sequence[Any]:
     # Major tag 4
-    length = self._decode_length(subtype, allow_indefinite=True)
-
-    if length is None:
+    if subtype == 31:
+        # Indefinite length array — delegate to the original decoder, then wrap
+        # the result in IndefiniteFrozenList to preserve indefinite encoding.
         ret = IndefiniteFrozenList(list(self.decode_array(subtype=subtype)))
         ret.freeze()
         return ret
diff --git a/test/pycardano/test_serialization.py b/test/pycardano/test_serialization.py
index 256caf62..a4352f5c 100644
--- a/test/pycardano/test_serialization.py
+++ b/test/pycardano/test_serialization.py
@@ -1134,6 +1134,38 @@ class MyTest(ArrayCBORSerializable):
     assert isinstance(MyTest.from_cbor(a.to_cbor()).a, IndefiniteList)
 
 
+def test_decode_array_with_24_or_more_items():
+    """Test that definite-length arrays with 24+ items decode correctly.
+
+    Regression test for a bug where the custom decode_array override called
+    _decode_length (consuming stream bytes), then delegated to the original
+    decode_array which called _decode_length again. For arrays with < 24 items
+    the length is encoded in the subtype itself (no extra bytes), so the double
+    call was harmless. For 24+ items, CBOR uses multi-byte length encoding
+    (e.g. 98 18 for 24 items) and the second _decode_length call consumed
+    actual array content, corrupting the stream.
+    """
+
+    @dataclass
+    class LargeDatum(PlutusData):
+        CONSTR_ID = 1
+        data: List[bytes]
+
+    hello = b"Hello world!"
+
+    # Exactly 24 items — the threshold where CBOR switches to 2-byte length
+    datum24 = LargeDatum(data=[hello] * 24)
+    restored24 = LargeDatum.from_cbor(datum24.to_cbor())
+    assert len(restored24.data) == 24
+    assert all(x == hello for x in restored24.data)
+
+    # 25 items — above the threshold
+    datum25 = LargeDatum(data=[hello] * 25)
+    restored25 = LargeDatum.from_cbor(datum25.to_cbor())
+    assert len(restored25.data) == 25
+    assert all(x == hello for x in restored25.data)
+
+
 def test_liqwid_tx():
     with open("test/resources/cbors/liqwid.json") as f:
         cbor_hex = json.load(f).get("cborHex")