Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 44 additions & 13 deletions xrspatial/polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,11 +474,34 @@ def _to_geopandas(
column_name: str,
):
import geopandas as gpd
import shapely
from shapely.geometry import Polygon

# Convert list of point arrays to shapely Polygons.
polygons = list(map(
lambda points: Polygon(points[0], points[1:]), polygon_points))
if hasattr(shapely, 'polygons'):
# Shapely 2.0+: batch-construct hole-free polygons via
# linearrings -> polygons pipeline (both are C-level batch ops).
no_holes = [i for i, pts in enumerate(polygon_points)
if len(pts) == 1]

if len(no_holes) == len(polygon_points):
# All hole-free: batch create LinearRings then Polygons.
rings = [shapely.linearrings(pts[0])
for pts in polygon_points]
polygons = list(shapely.polygons(rings))
else:
polygons = [None] * len(polygon_points)
if no_holes:
rings = [shapely.linearrings(polygon_points[i][0])
for i in no_holes]
batch = shapely.polygons(rings)
for idx, poly in zip(no_holes, batch):
polygons[idx] = poly
for i, pts in enumerate(polygon_points):
if polygons[i] is None:
polygons[i] = Polygon(pts[0], pts[1:])
else:
# Shapely < 2.0 fallback.
polygons = [Polygon(pts[0], pts[1:]) for pts in polygon_points]

df = gpd.GeoDataFrame({column_name: column, "geometry": polygons})
return df
Expand Down Expand Up @@ -823,32 +846,40 @@ def _trace_rings(edge_set):
return rings


@ngjit
def _simplify_ring(ring):
"""Remove collinear (redundant) vertices from a closed ring."""
n = len(ring) - 1 # exclude closing point
if n <= 3:
return ring
keep = []
# Single pass into pre-allocated output.
out = np.empty((n + 1, 2), dtype=np.float64)
count = 0
for i in range(n):
prev = ring[(i - 1) % n]
curr = ring[i]
nxt = ring[(i + 1) % n]
if not ((prev[0] == curr[0] == nxt[0]) or
(prev[1] == curr[1] == nxt[1])):
keep.append(curr)
if len(keep) < n:
keep.append(keep[0])
return np.array(keep, dtype=np.float64)
pi = (i - 1) % n
ni = (i + 1) % n
if not ((ring[pi, 0] == ring[i, 0] == ring[ni, 0]) or
(ring[pi, 1] == ring[i, 1] == ring[ni, 1])):
out[count, 0] = ring[i, 0]
out[count, 1] = ring[i, 1]
count += 1
if count < n:
out[count, 0] = out[0, 0]
out[count, 1] = out[0, 1]
count += 1
return out[:count].copy()
return ring


@ngjit
def _signed_ring_area(ring):
"""Shoelace formula for signed area of a closed ring."""
x = ring[:, 0]
y = ring[:, 1]
return 0.5 * (np.dot(x[:-1], y[1:]) - np.dot(x[1:], y[:-1]))


@ngjit
def _point_in_ring(px, py, ring):
"""Ray-casting point-in-polygon test."""
n = len(ring) - 1
Expand Down
81 changes: 81 additions & 0 deletions xrspatial/tests/test_polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,84 @@ def test_polygonize_dask_cupy_matches_numpy(chunks):
assert val in areas_dcp, f"Value {val} missing from dask+cupy result"
assert_allclose(areas_dcp[val], areas_np[val],
err_msg=f"Area mismatch for value {val}")


# --- Performance-related regression tests (#1008) ---

def test_polygonize_1008_jit_merge_helpers():
"""JIT-compiled _simplify_ring, _signed_ring_area, _point_in_ring."""
from ..polygonize import _point_in_ring, _signed_ring_area, _simplify_ring

# Unit square: CCW exterior.
square = np.array([
[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]], dtype=np.float64)

assert_allclose(_signed_ring_area(square), 1.0)

# Point inside.
assert _point_in_ring(0.5, 0.5, square) is True
# Point outside.
assert _point_in_ring(2.0, 2.0, square) is False

# Square with collinear midpoints on each edge.
with_collinear = np.array([
[0, 0], [0.5, 0], [1, 0], [1, 0.5], [1, 1],
[0.5, 1], [0, 1], [0, 0.5], [0, 0]], dtype=np.float64)
simplified = _simplify_ring(with_collinear)
# Should remove the midpoints, leaving 4 corners + closing point.
assert simplified.shape == (5, 2)
assert_allclose(_signed_ring_area(simplified), 1.0)

# Ring with no collinear points should be returned unchanged.
triangle = np.array([
[0, 0], [2, 0], [1, 2], [0, 0]], dtype=np.float64)
assert _simplify_ring(triangle) is triangle


@dask_array_available
def test_polygonize_1008_dask_merge_many_boundary_polygons():
"""Dask merge with many boundary-crossing polygons of the same value.

Checkerboard pattern in small chunks forces many boundary polygons
through the merge path, exercising the JIT-compiled helpers.
"""
# 8x8 checkerboard, chunks of 4x4.
data = np.zeros((8, 8), dtype=np.int32)
data[::2, ::2] = 1
data[1::2, 1::2] = 1

raster_np = xr.DataArray(data)
vals_np, polys_np = polygonize(raster_np, connectivity=4)
areas_np = _area_by_value(vals_np, polys_np)

raster_da = xr.DataArray(da.from_array(data, chunks=(4, 4)))
vals_da, polys_da = polygonize(raster_da, connectivity=4)
areas_da = _area_by_value(vals_da, polys_da)

for val in areas_np:
assert val in areas_da
assert_allclose(areas_da[val], areas_np[val],
err_msg=f"Area mismatch for value {val}")


@pytest.mark.skipif(gpd is None, reason="geopandas not installed")
def test_polygonize_1008_geopandas_batch_with_holes():
"""Batch shapely construction: mix of hole-free and holed polygons."""
# Outer ring of 0s with inner block of 1s containing a 2 (hole in 1).
data = np.zeros((6, 6), dtype=np.int32)
data[1:5, 1:5] = 1
data[2:4, 2:4] = 2

raster = xr.DataArray(data)
df = polygonize(raster, return_type="geopandas", connectivity=4)

assert isinstance(df, gpd.GeoDataFrame)
assert len(df) == 3 # values 0, 1, 2

# Value 1 polygon should have a hole (the 2-region).
row_1 = df[df.DN == 1].iloc[0]
geom = row_1.geometry
assert len(list(geom.interiors)) == 1

# Total area should equal raster size.
assert_allclose(df.geometry.area.sum(), 36.0)
Loading