diff --git a/xrspatial/polygonize.py b/xrspatial/polygonize.py index 40ca8c6a..3286d542 100644 --- a/xrspatial/polygonize.py +++ b/xrspatial/polygonize.py @@ -474,11 +474,34 @@ def _to_geopandas( column_name: str, ): import geopandas as gpd + import shapely from shapely.geometry import Polygon - # Convert list of point arrays to shapely Polygons. - polygons = list(map( - lambda points: Polygon(points[0], points[1:]), polygon_points)) + if hasattr(shapely, 'polygons'): + # Shapely 2.0+: batch-construct hole-free polygons via + # linearrings -> polygons pipeline (both are C-level batch ops). + no_holes = [i for i, pts in enumerate(polygon_points) + if len(pts) == 1] + + if len(no_holes) == len(polygon_points): + # All hole-free: batch create LinearRings then Polygons. + rings = [shapely.linearrings(pts[0]) + for pts in polygon_points] + polygons = list(shapely.polygons(rings)) + else: + polygons = [None] * len(polygon_points) + if no_holes: + rings = [shapely.linearrings(polygon_points[i][0]) + for i in no_holes] + batch = shapely.polygons(rings) + for idx, poly in zip(no_holes, batch): + polygons[idx] = poly + for i, pts in enumerate(polygon_points): + if polygons[i] is None: + polygons[i] = Polygon(pts[0], pts[1:]) + else: + # Shapely < 2.0 fallback. + polygons = [Polygon(pts[0], pts[1:]) for pts in polygon_points] df = gpd.GeoDataFrame({column_name: column, "geometry": polygons}) return df @@ -823,25 +846,32 @@ def _trace_rings(edge_set): return rings +@ngjit def _simplify_ring(ring): """Remove collinear (redundant) vertices from a closed ring.""" n = len(ring) - 1 # exclude closing point if n <= 3: return ring - keep = [] + # Single pass into pre-allocated output. + out = np.empty((n + 1, 2), dtype=np.float64) + count = 0 for i in range(n): - prev = ring[(i - 1) % n] - curr = ring[i] - nxt = ring[(i + 1) % n] - if not ((prev[0] == curr[0] == nxt[0]) or - (prev[1] == curr[1] == nxt[1])): - keep.append(curr) - if len(keep) < n: - keep.append(keep[0]) - return np.array(keep, dtype=np.float64) + pi = (i - 1) % n + ni = (i + 1) % n + if not ((ring[pi, 0] == ring[i, 0] == ring[ni, 0]) or + (ring[pi, 1] == ring[i, 1] == ring[ni, 1])): + out[count, 0] = ring[i, 0] + out[count, 1] = ring[i, 1] + count += 1 + if count < n: + out[count, 0] = out[0, 0] + out[count, 1] = out[0, 1] + count += 1 + return out[:count].copy() return ring +@ngjit def _signed_ring_area(ring): """Shoelace formula for signed area of a closed ring.""" x = ring[:, 0] @@ -849,6 +879,7 @@ def _signed_ring_area(ring): return 0.5 * (np.dot(x[:-1], y[1:]) - np.dot(x[1:], y[:-1])) +@ngjit def _point_in_ring(px, py, ring): """Ray-casting point-in-polygon test.""" n = len(ring) - 1 diff --git a/xrspatial/tests/test_polygonize.py b/xrspatial/tests/test_polygonize.py index 0d581257..bf27a784 100644 --- a/xrspatial/tests/test_polygonize.py +++ b/xrspatial/tests/test_polygonize.py @@ -457,3 +457,84 @@ def test_polygonize_dask_cupy_matches_numpy(chunks): assert val in areas_dcp, f"Value {val} missing from dask+cupy result" assert_allclose(areas_dcp[val], areas_np[val], err_msg=f"Area mismatch for value {val}") + + +# --- Performance-related regression tests (#1008) --- + +def test_polygonize_1008_jit_merge_helpers(): + """JIT-compiled _simplify_ring, _signed_ring_area, _point_in_ring.""" + from ..polygonize import _point_in_ring, _signed_ring_area, _simplify_ring + + # Unit square: CCW exterior. + square = np.array([ + [0, 0], [1, 0], [1, 1], [0, 1], [0, 0]], dtype=np.float64) + + assert_allclose(_signed_ring_area(square), 1.0) + + # Point inside. + assert _point_in_ring(0.5, 0.5, square) is True + # Point outside. + assert _point_in_ring(2.0, 2.0, square) is False + + # Square with collinear midpoints on each edge. + with_collinear = np.array([ + [0, 0], [0.5, 0], [1, 0], [1, 0.5], [1, 1], + [0.5, 1], [0, 1], [0, 0.5], [0, 0]], dtype=np.float64) + simplified = _simplify_ring(with_collinear) + # Should remove the midpoints, leaving 4 corners + closing point. + assert simplified.shape == (5, 2) + assert_allclose(_signed_ring_area(simplified), 1.0) + + # Ring with no collinear points should be returned unchanged. + triangle = np.array([ + [0, 0], [2, 0], [1, 2], [0, 0]], dtype=np.float64) + assert _simplify_ring(triangle) is triangle + + +@dask_array_available +def test_polygonize_1008_dask_merge_many_boundary_polygons(): + """Dask merge with many boundary-crossing polygons of the same value. + + Checkerboard pattern in small chunks forces many boundary polygons + through the merge path, exercising the JIT-compiled helpers. + """ + # 8x8 checkerboard, chunks of 4x4. + data = np.zeros((8, 8), dtype=np.int32) + data[::2, ::2] = 1 + data[1::2, 1::2] = 1 + + raster_np = xr.DataArray(data) + vals_np, polys_np = polygonize(raster_np, connectivity=4) + areas_np = _area_by_value(vals_np, polys_np) + + raster_da = xr.DataArray(da.from_array(data, chunks=(4, 4))) + vals_da, polys_da = polygonize(raster_da, connectivity=4) + areas_da = _area_by_value(vals_da, polys_da) + + for val in areas_np: + assert val in areas_da + assert_allclose(areas_da[val], areas_np[val], + err_msg=f"Area mismatch for value {val}") + + +@pytest.mark.skipif(gpd is None, reason="geopandas not installed") +def test_polygonize_1008_geopandas_batch_with_holes(): + """Batch shapely construction: mix of hole-free and holed polygons.""" + # Outer ring of 0s with inner block of 1s containing a 2 (hole in 1). + data = np.zeros((6, 6), dtype=np.int32) + data[1:5, 1:5] = 1 + data[2:4, 2:4] = 2 + + raster = xr.DataArray(data) + df = polygonize(raster, return_type="geopandas", connectivity=4) + + assert isinstance(df, gpd.GeoDataFrame) + assert len(df) == 3 # values 0, 1, 2 + + # Value 1 polygon should have a hole (the 2-region). + row_1 = df[df.DN == 1].iloc[0] + geom = row_1.geometry + assert len(list(geom.interiors)) == 1 + + # Total area should equal raster size. + assert_allclose(df.geometry.area.sum(), 36.0)