From 161494e02377e6cc0373fb3334524fb341632830 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Tue, 21 Apr 2026 23:52:35 +0200 Subject: [PATCH 1/3] Fix OverflowError in _get_slice on 32-bit platforms (#145) On 32-bit platforms, len(range(first_elt, last_elt+sign, step)) overflows Py_ssize_t when the bitmap spans values near 2**31-1 (e.g. BitMap([0, 2**31-1])). Replace with abs(last_elt - first_elt) + 1, which is pure Python int arithmetic and cannot overflow. Apply the same fix to AbstractBitMap64._get_slice, where the same overflow would occur on any platform for bitmaps spanning [0, 2**63-1]. Also restore pytest test.py in the wheel build CI (dropped in fd5bd437). --- .github/workflows/buildwheels.yml | 5 +-- pyroaring/abstract_bitmap.pxi | 54 +++++++++++++++++-------------- test.py | 15 +++++++++ 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/.github/workflows/buildwheels.yml b/.github/workflows/buildwheels.yml index 72f56a9..79f5941 100644 --- a/.github/workflows/buildwheels.yml +++ b/.github/workflows/buildwheels.yml @@ -53,12 +53,13 @@ jobs: uses: pypa/cibuildwheel@v3.4.0 env: CIBW_TEST_REQUIRES: hypothesis pytest - CIBW_TEST_COMMAND: "python {project}/cydoctest.py -v" # full test command: py.test {project}/test.py -v + CIBW_TEST_COMMAND: "pytest {project}/test.py -v && python {project}/cydoctest.py -v" CIBW_SKIP: "cp38-*" CIBW_ARCHS_LINUX: ${{ runner.arch == 'X64' && 'auto' || 'auto armv7l' }} CIBW_ARCHS_MACOS: ${{ runner.arch == 'X64' && 'auto' || 'auto universal2' }} CIBW_ARCHS_WINDOWS: "auto ARM64" - CIBW_TEST_SKIP: "*-win_arm64" + # armv7l runs under QEMU (too slow for the full test suite); win_arm64 is cross-compiled + CIBW_TEST_SKIP: "*-win_arm64 *-manylinux_armv7l *-musllinux_armv7l" CIBW_BUILD_FRONTEND: "build" MACOSX_DEPLOYMENT_TARGET: "14.0" # fix build for macos-15-intel diff --git a/pyroaring/abstract_bitmap.pxi b/pyroaring/abstract_bitmap.pxi index f7079c0..e2aef3e 100644 --- a/pyroaring/abstract_bitmap.pxi +++ b/pyroaring/abstract_bitmap.pxi @@ -701,18 +701,21 @@ cdef class AbstractBitMap: assert len(r) > 0 first_elt = self._get_elt(start) last_elt = self._get_elt(stop-sign) - values = range(first_elt, last_elt+sign, step) - if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap - return self & self.__class__(values, copy_on_write=self.copy_on_write) - else: # generic case - if step < 0: - start = r[-1] - stop = r[0] + 1 - step = -step - else: - start = r[0] - stop = r[-1] + 1 - return self._generic_get_slice(start, stop, step) + if abs(step) == 1: + # Compute range size without len() to avoid OverflowError on 32-bit platforms + # when first_elt and last_elt span a range exceeding Py_ssize_t. + size = abs(last_elt - first_elt) + 1 + if size <= len(self) / 100: # contiguous and small chunk of the bitmap + return self & self.__class__(range(first_elt, last_elt+sign, step), copy_on_write=self.copy_on_write) + # generic case + if step < 0: + start = r[-1] + stop = r[0] + 1 + step = -step + else: + start = r[0] + stop = r[-1] + 1 + return self._generic_get_slice(start, stop, step) cdef _generic_get_slice(self, uint32_t start, uint32_t stop, uint32_t step): """Assume that start, stop and step > 0 and that the result will not be empty.""" @@ -1163,18 +1166,21 @@ cdef class AbstractBitMap64: assert len(r) > 0 first_elt = self._get_elt(start) last_elt = self._get_elt(stop-sign) - values = range(first_elt, last_elt+sign, step) - if abs(step) == 1 and len(values) <= len(self) / 100: # contiguous and small chunk of the bitmap - return self & self.__class__(values) - else: # generic case - if step < 0: - start = r[-1] - stop = r[0] + 1 - step = -step - else: - start = r[0] - stop = r[-1] + 1 - return self._generic_get_slice(start, stop, step) + if abs(step) == 1: + # Compute range size without len() to avoid OverflowError on platforms + # where Py_ssize_t cannot hold the distance between first_elt and last_elt. + size = abs(last_elt - first_elt) + 1 + if size <= len(self) / 100: # contiguous and small chunk of the bitmap + return self & self.__class__(range(first_elt, last_elt+sign, step)) + # generic case + if step < 0: + start = r[-1] + stop = r[0] + 1 + step = -step + else: + start = r[0] + stop = r[-1] + 1 + return self._generic_get_slice(start, stop, step) cdef _generic_get_slice(self, uint64_t start, uint64_t stop, uint64_t step): """Assume that start, stop and step > 0 and that the result will not be empty.""" diff --git a/test.py b/test.py index be51da7..202b6ea 100755 --- a/test.py +++ b/test.py @@ -1821,6 +1821,21 @@ def test_small_list(self, cls: type[EitherBitMap], collection: list[int]) -> Non string_repr = string_repr.replace("BitMap64", "BitMap") # we redefined BitMap64 to BitMap at the top of this file assert bm == eval(string_repr) + def test_large_range_repr_32bit(self) -> None: + # Regression test for https://github.com/Ezibenroc/PyRoaringBitMap/issues/145 + # len(range(first_elt, last_elt+1)) overflows Py_ssize_t on 32-bit platforms. + ns = {"BitMap": pyroaring.BitMap} + bm = pyroaring.BitMap([0, 2**31 - 1]) + assert bm == eval(repr(bm), ns) + bm = pyroaring.BitMap([0, 2**32 - 1]) + assert bm == eval(repr(bm), ns) + + def test_large_range_repr_64bit(self) -> None: + # Same overflow, latent on 64-bit: a BitMap64 spanning [0, 2**63-1] also + # overflows Py_ssize_t even on 64-bit platforms. + bm = pyroaring.BitMap64([0, 2**63 - 1]) + assert bm == eval(repr(bm), {"BitMap64": pyroaring.BitMap64}) + @settings(suppress_health_check=HealthCheck) @given(bitmap_cls, large_list_of_uin32) def test_large_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: From b002b4f8aafe560cf0fe5eb74bef3792f99a44ad Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Wed, 22 Apr 2026 00:26:15 +0200 Subject: [PATCH 2/3] Do not skip tests in buildwheels --- .github/workflows/buildwheels.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/buildwheels.yml b/.github/workflows/buildwheels.yml index 79f5941..8f49cc0 100644 --- a/.github/workflows/buildwheels.yml +++ b/.github/workflows/buildwheels.yml @@ -58,8 +58,6 @@ jobs: CIBW_ARCHS_LINUX: ${{ runner.arch == 'X64' && 'auto' || 'auto armv7l' }} CIBW_ARCHS_MACOS: ${{ runner.arch == 'X64' && 'auto' || 'auto universal2' }} CIBW_ARCHS_WINDOWS: "auto ARM64" - # armv7l runs under QEMU (too slow for the full test suite); win_arm64 is cross-compiled - CIBW_TEST_SKIP: "*-win_arm64 *-manylinux_armv7l *-musllinux_armv7l" CIBW_BUILD_FRONTEND: "build" MACOSX_DEPLOYMENT_TARGET: "14.0" # fix build for macos-15-intel From d27bb0584f70f05a9005c4e20c26f5674f3e3938 Mon Sep 17 00:00:00 2001 From: Tom Cornebize Date: Wed, 22 Apr 2026 22:31:21 +0200 Subject: [PATCH 3/3] Better test --- test.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/test.py b/test.py index 202b6ea..8e13e6f 100755 --- a/test.py +++ b/test.py @@ -424,6 +424,12 @@ def test_slice_select_none( assume(step != 0) self.check_slice(cls, values, start, stop, step, cow) + def test_get_slice_large_span(self) -> None: + # Regression for https://github.com/Ezibenroc/PyRoaringBitMap/issues/145 + bm = BitMap([0, 2**31 - 1 if is_32_bits else 2**63 - 1]) + assert bm[:] == bm + assert bm[::-1] == bm + @given(bitmap_cls, hyp_collection, st.booleans()) def test_simple_rank( self, @@ -1821,21 +1827,6 @@ def test_small_list(self, cls: type[EitherBitMap], collection: list[int]) -> Non string_repr = string_repr.replace("BitMap64", "BitMap") # we redefined BitMap64 to BitMap at the top of this file assert bm == eval(string_repr) - def test_large_range_repr_32bit(self) -> None: - # Regression test for https://github.com/Ezibenroc/PyRoaringBitMap/issues/145 - # len(range(first_elt, last_elt+1)) overflows Py_ssize_t on 32-bit platforms. - ns = {"BitMap": pyroaring.BitMap} - bm = pyroaring.BitMap([0, 2**31 - 1]) - assert bm == eval(repr(bm), ns) - bm = pyroaring.BitMap([0, 2**32 - 1]) - assert bm == eval(repr(bm), ns) - - def test_large_range_repr_64bit(self) -> None: - # Same overflow, latent on 64-bit: a BitMap64 spanning [0, 2**63-1] also - # overflows Py_ssize_t even on 64-bit platforms. - bm = pyroaring.BitMap64([0, 2**63 - 1]) - assert bm == eval(repr(bm), {"BitMap64": pyroaring.BitMap64}) - @settings(suppress_health_check=HealthCheck) @given(bitmap_cls, large_list_of_uin32) def test_large_list(self, cls: type[EitherBitMap], collection: list[int]) -> None: