diff --git a/benchmarking/cli.py b/benchmarking/cli.py index c73ad8d..6628e33 100644 --- a/benchmarking/cli.py +++ b/benchmarking/cli.py @@ -104,12 +104,16 @@ savi, temporal_mean, temporal_std, - texture_entropy, + haralick_features, ) from eo_processor._core import trend_analysis from eo_processor import zonal_stats except ImportError as exc: # pragma: no cover - print("Failed to import eo_processor. Have you installed/built it?", exc, file=sys.stderr) + print( + "Failed to import eo_processor. Have you installed/built it?", + exc, + file=sys.stderr, + ) sys.exit(1) @@ -134,7 +138,10 @@ class BenchmarkResult: baseline_max_s: Optional[float] = None speedup_vs_numpy: Optional[float] = None baseline_throughput_elems: Optional[float] = None - baseline_kind: Optional[str] = None # e.g. 'broadcast', 'streaming', 'naive', 'prefix' + baseline_kind: Optional[str] = ( + None # e.g. 'broadcast', 'streaming', 'naive', 'prefix' + ) + # -------------------------------------------------------------------------------------- # Argument Parsing @@ -143,25 +150,55 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace: parser = argparse.ArgumentParser( description="Benchmark eo-processor Rust-accelerated functions." ) - parser.add_argument("--compare-numpy", action="store_true", - help="Time a NumPy baseline where feasible.") - parser.add_argument("--functions", nargs="+", - help="Explicit list of functions to benchmark (overrides --group).") - parser.add_argument("--group", choices=["spectral", "temporal", "distances", "processes", "zonal", "morphology", "texture", "all"], - default="spectral", help="Predefined function group.") - parser.add_argument("--zones-count", type=int, default=100, help="Number of unique zones for zonal stats.") + parser.add_argument( + "--compare-numpy", + action="store_true", + help="Time a NumPy baseline where feasible.", + ) + parser.add_argument( + "--functions", + nargs="+", + help="Explicit list of functions to benchmark (overrides --group).", + ) + parser.add_argument( + "--group", + choices=[ + "spectral", + "temporal", + "distances", + "processes", + "zonal", + "morphology", + "texture", + "all", + ], + default="spectral", + help="Predefined function group.", + ) + parser.add_argument( + "--zones-count", + type=int, + default=100, + help="Number of unique zones for zonal stats.", + ) parser.add_argument("--height", type=int, default=2048) parser.add_argument("--width", type=int, default=2048) parser.add_argument("--time", type=int, default=12) parser.add_argument("--points-a", type=int, default=2000) parser.add_argument("--points-b", type=int, default=2000) - parser.add_argument("--texture-window", type=int, default=3, help="Window size for texture entropy.") + parser.add_argument( + "--texture-window", type=int, default=3, help="Window size for texture entropy." + ) parser.add_argument("--point-dim", type=int, default=4) parser.add_argument("--minkowski-p", type=float, default=3.0) parser.add_argument("--ma-window", type=int, default=5) parser.add_argument("--ma-stride", type=int, default=4) - parser.add_argument("--ma-baseline", choices=["naive", "prefix"], default="naive", - help="Baseline style for moving averages: naive (O(T*W)) or prefix (O(T)).") + parser.add_argument( + "--ma-baseline", + choices=["naive", "prefix"], + default="naive", + help="Baseline style for moving averages: naive (O(T*W)) or prefix (O(T)).", + ) parser.add_argument("--loops", type=int, default=10) parser.add_argument("--warmups", type=int, default=5) parser.add_argument("--seed", type=int, default=42) @@ -169,12 +206,17 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace: parser.add_argument("--quiet", action="store_true") parser.add_argument("--md-out", type=str) parser.add_argument("--rst-out", type=str) - parser.add_argument("--size-sweep", nargs="+", - help="List of sizes: HxW or T=val:HxW for sweeps.") - parser.add_argument("--distance-baseline", choices=["broadcast", "streaming", "both"], - default="broadcast") - parser.add_argument("--stress", action="store_true", - help="Use larger stress-test sizes.") + parser.add_argument( + "--size-sweep", nargs="+", help="List of sizes: HxW or T=val:HxW for sweeps." + ) + parser.add_argument( + "--distance-baseline", + choices=["broadcast", "streaming", "both"], + default="broadcast", + ) + parser.add_argument( + "--stress", action="store_true", help="Use larger stress-test sizes." + ) return parser.parse_args(argv) @@ -228,12 +270,17 @@ def compute_elements(func_name: str, shape_info: dict[str, int], args) -> Option }: h, w = shape_info["height"], shape_info["width"] return h * w - if func_name in {"temporal_mean", "temporal_std", "median", - "moving_average_temporal", "moving_average_temporal_stride", - "pixelwise_transform"}: + if func_name in { + "temporal_mean", + "temporal_std", + "median", + "moving_average_temporal", + "moving_average_temporal_stride", + "pixelwise_transform", + }: t, h, w = shape_info["time"], shape_info["height"], shape_info["width"] return t * h * w - if func_name == "texture_entropy": + if func_name == "haralick_features": h, w = shape_info["height"], shape_info["width"] return h * w if func_name == "trend_analysis": @@ -245,7 +292,11 @@ def compute_elements(func_name: str, shape_info: dict[str, int], args) -> Option "chebyshev_distance", "minkowski_distance", }: - n, m, d = shape_info["points_a"], shape_info["points_b"], shape_info["point_dim"] + n, m, d = ( + shape_info["points_a"], + shape_info["points_b"], + shape_info["point_dim"], + ) return n * m * d return None @@ -253,7 +304,9 @@ def compute_elements(func_name: str, shape_info: dict[str, int], args) -> Option # -------------------------------------------------------------------------------------- # Synthetic Data Factories # -------------------------------------------------------------------------------------- -def make_spectral_inputs(height: int, width: int, seed: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: +def make_spectral_inputs( + height: int, width: int, seed: int +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: rng = np.random.default_rng(seed) nir = rng.uniform(0.2, 0.9, size=(height, width)).astype(np.float64) red = rng.uniform(0.05, 0.4, size=(height, width)).astype(np.float64) @@ -261,7 +314,9 @@ def make_spectral_inputs(height: int, width: int, seed: int) -> Tuple[np.ndarray return nir, red, blue -def make_temporal_stack(time_dim: int, height: int, width: int, seed: int) -> np.ndarray: +def make_temporal_stack( + time_dim: int, height: int, width: int, seed: int +) -> np.ndarray: rng = np.random.default_rng(seed) return rng.uniform(0.0, 1.0, size=(time_dim, height, width)).astype(np.float64) @@ -269,14 +324,15 @@ def make_temporal_stack(time_dim: int, height: int, width: int, seed: int) -> np def make_trend_series(length: int, seed: int) -> List[float]: rng = np.random.default_rng(seed) # Generate a sample time series with a break (similar to benchmark_trend.py) - y = np.concatenate([ - np.linspace(0, 10, length // 2), - np.linspace(10, 0, length // 2) - ]) + rng.normal(0, 0.5, length) + y = np.concatenate( + [np.linspace(0, 10, length // 2), np.linspace(10, 0, length // 2)] + ) + rng.normal(0, 0.5, length) return y.tolist() -def make_distance_points(n: int, m: int, dim: int, seed: int) -> Tuple[np.ndarray, np.ndarray]: +def make_distance_points( + n: int, m: int, dim: int, seed: int +) -> Tuple[np.ndarray, np.ndarray]: rng = np.random.default_rng(seed) a = rng.normal(0.0, 1.0, size=(n, dim)).astype(np.float64) b = rng.normal(0.0, 1.0, size=(m, dim)).astype(np.float64) @@ -308,13 +364,13 @@ def run_single_benchmark( post_red: np.ndarray = np.empty((0, 0)) pre_swir2: np.ndarray = np.empty((0, 0)) post_swir2: np.ndarray = np.empty((0, 0)) - + # Initialize baseline variables baseline_kind: Optional[str] = None baseline_timings: List[float] = [] supports_baseline = False baseline_fn: Optional[Callable[[], Any]] = None - + # Prepare inputs if func_name in { "ndvi", @@ -329,11 +385,15 @@ def run_single_benchmark( "delta_nbr", "normalized_difference", }: - nir, red, blue = make_spectral_inputs(shape_info["height"], shape_info["width"], seed) + nir, red, blue = make_spectral_inputs( + shape_info["height"], shape_info["width"], seed + ) if func_name == "ndvi": call = lambda: ndvi(nir, red) elif func_name == "ndwi": - call = lambda: ndwi(nir, red) # using nir as second arg as green is first logically + call = lambda: ndwi( + nir, red + ) # using nir as second arg as green is first logically elif func_name == "evi": call = lambda: evi(nir, red, blue) elif func_name == "savi": @@ -351,47 +411,73 @@ def run_single_benchmark( elif func_name == "gci": call = lambda: gci(nir, red) elif func_name == "delta_ndvi": - pre_nir, pre_red, _ = make_spectral_inputs(shape_info["height"], shape_info["width"], seed) - post_nir, post_red, _ = make_spectral_inputs(shape_info["height"], shape_info["width"], seed + 1) + pre_nir, pre_red, _ = make_spectral_inputs( + shape_info["height"], shape_info["width"], seed + ) + post_nir, post_red, _ = make_spectral_inputs( + shape_info["height"], shape_info["width"], seed + 1 + ) call = lambda: delta_ndvi(pre_nir, pre_red, post_nir, post_red) elif func_name == "delta_nbr": - pre_nir, _, pre_swir2 = make_spectral_inputs(shape_info["height"], shape_info["width"], seed) - post_nir, _, post_swir2 = make_spectral_inputs(shape_info["height"], shape_info["width"], seed + 1) + pre_nir, _, pre_swir2 = make_spectral_inputs( + shape_info["height"], shape_info["width"], seed + ) + post_nir, _, post_swir2 = make_spectral_inputs( + shape_info["height"], shape_info["width"], seed + 1 + ) call = lambda: delta_nbr(pre_nir, pre_swir2, post_nir, post_swir2) else: # normalized_difference call = lambda: normalized_difference(nir, red) shape_desc = f"{shape_info['height']}x{shape_info['width']}" elif func_name in {"temporal_mean", "temporal_std", "median"}: - cube = make_temporal_stack(shape_info["time"], shape_info["height"], shape_info["width"], seed) + cube = make_temporal_stack( + shape_info["time"], shape_info["height"], shape_info["width"], seed + ) if func_name == "temporal_mean": call = lambda: temporal_mean(cube) elif func_name == "temporal_std": call = lambda: temporal_std(cube) else: call = lambda: median(cube) - shape_desc = f"{shape_info['time']}x{shape_info['height']}x{shape_info['width']}" + shape_desc = ( + f"{shape_info['time']}x{shape_info['height']}x{shape_info['width']}" + ) elif func_name == "trend_analysis": series = make_trend_series(shape_info["time"], seed) # threshold=5.0 from benchmark_trend.py call = lambda: trend_analysis(series, threshold=5.0) shape_desc = f"T={shape_info['time']}" - elif func_name in {"moving_average_temporal", "moving_average_temporal_stride", "pixelwise_transform"}: - cube = make_temporal_stack(shape_info["time"], shape_info["height"], shape_info["width"], seed) + elif func_name in { + "moving_average_temporal", + "moving_average_temporal_stride", + "pixelwise_transform", + }: + cube = make_temporal_stack( + shape_info["time"], shape_info["height"], shape_info["width"], seed + ) if func_name == "moving_average_temporal": - call = lambda: moving_average_temporal(cube, window=ma_window, skip_na=True, mode="same") + call = lambda: moving_average_temporal( + cube, window=ma_window, skip_na=True, mode="same" + ) elif func_name == "moving_average_temporal_stride": - call = lambda: moving_average_temporal_stride(cube, window=ma_window, stride=ma_stride, skip_na=True, mode="same") + call = lambda: moving_average_temporal_stride( + cube, window=ma_window, stride=ma_stride, skip_na=True, mode="same" + ) else: # pixelwise_transform - call = lambda: pixelwise_transform(cube, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0) + call = lambda: pixelwise_transform( + cube, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0 + ) extra = "" if func_name.startswith("moving_average"): extra = f"(win={ma_window}" if func_name == "moving_average_temporal_stride": extra += f", stride={ma_stride}" extra += ")" - shape_desc = f"{shape_info['time']}x{shape_info['height']}x{shape_info['width']}{extra}" + shape_desc = ( + f"{shape_info['time']}x{shape_info['height']}x{shape_info['width']}{extra}" + ) elif func_name in { "euclidean_distance", @@ -400,7 +486,10 @@ def run_single_benchmark( "minkowski_distance", }: pts_a, pts_b = make_distance_points( - shape_info["points_a"], shape_info["points_b"], shape_info["point_dim"], seed + shape_info["points_a"], + shape_info["points_b"], + shape_info["point_dim"], + seed, ) if func_name == "euclidean_distance": call = lambda: euclidean_distance(pts_a, pts_b) @@ -413,20 +502,30 @@ def run_single_benchmark( shape_desc = f"N={shape_info['points_a']}, M={shape_info['points_b']}, D={shape_info['point_dim']}" elif func_name == "zonal_stats": # Generate random values and random zones - values = np.random.uniform(0, 100, size=(shape_info["height"], shape_info["width"])).astype(np.float64) - zones = np.random.randint(0, zones_count, size=(shape_info["height"], shape_info["width"]), dtype=np.int64) + values = np.random.uniform( + 0, 100, size=(shape_info["height"], shape_info["width"]) + ).astype(np.float64) + zones = np.random.randint( + 0, + zones_count, + size=(shape_info["height"], shape_info["width"]), + dtype=np.int64, + ) call = lambda: zonal_stats(values, zones) - shape_desc = f"{shape_info['height']}x{shape_info['width']} (Zones={zones_count})" - + shape_desc = ( + f"{shape_info['height']}x{shape_info['width']} (Zones={zones_count})" + ) + if compare_numpy: supports_baseline = True baseline_kind = "naive_loop" + # Naive NumPy baseline: iterate unique zones def numpy_zonal(): unique_zones = np.unique(zones) res = {} for z in unique_zones: - mask = (zones == z) + mask = zones == z z_vals = values[mask] if z_vals.size > 0: res[z] = { @@ -438,49 +537,64 @@ def numpy_zonal(): "std": np.std(z_vals), } return res + baseline_fn = numpy_zonal - elif func_name in ("binary_dilation", "binary_erosion", "binary_opening", "binary_closing"): + elif func_name in ( + "binary_dilation", + "binary_erosion", + "binary_opening", + "binary_closing", + ): # Data generation: Binary image (0 or 1) # Use uint8 for input as expected by Rust - data = np.random.randint(0, 2, size=(shape_info["height"], shape_info["width"]), dtype=np.uint8) + data = np.random.randint( + 0, 2, size=(shape_info["height"], shape_info["width"]), dtype=np.uint8 + ) kernel_size = 3 - + call = lambda: getattr(eo_processor, func_name)(data, kernel_size) - shape_desc = f"{shape_info['height']}x{shape_info['width']} (Kernel={kernel_size})" - + shape_desc = ( + f"{shape_info['height']}x{shape_info['width']} (Kernel={kernel_size})" + ) + # NumPy baseline (using slicing for vectorization, as scipy might be missing) if compare_numpy: supports_baseline = True baseline_kind = "numpy_slicing" + def numpy_morph(): # Naive vectorized implementation using slicing # This is O(K*K * N) where K is kernel size rows, cols = data.shape radius = kernel_size // 2 - + dilated = None eroded = None if "dilation" in func_name or "closing" in func_name: # Dilation logic - padded = np.pad(data, radius, mode='constant', constant_values=0) + padded = np.pad(data, radius, mode="constant", constant_values=0) out = np.zeros_like(data) for kr in range(kernel_size): for kc in range(kernel_size): # Shift and accumulate - out = np.maximum(out, padded[kr:kr+rows, kc:kc+cols]) + out = np.maximum( + out, padded[kr : kr + rows, kc : kc + cols] + ) dilated = out - + if "erosion" in func_name or "opening" in func_name: # Erosion logic # For binary erosion, padding with 1s is typical to avoid border effects # if the image is mostly 1s. If padded with 0s, erosion at border will be 0. # Let's assume standard behavior for binary images where 0 is background. - padded = np.pad(data, radius, mode='constant', constant_values=1) + padded = np.pad(data, radius, mode="constant", constant_values=1) out = np.ones_like(data) for kr in range(kernel_size): for kc in range(kernel_size): - out = np.minimum(out, padded[kr:kr+rows, kc:kc+cols]) + out = np.minimum( + out, padded[kr : kr + rows, kc : kc + cols] + ) eroded = out if func_name == "binary_dilation": @@ -490,46 +604,70 @@ def numpy_morph(): elif func_name == "binary_opening": # Erosion then Dilation # Re-run dilation on 'eroded' - padded_d = np.pad(eroded, radius, mode='constant', constant_values=0) + padded_d = np.pad( + eroded, radius, mode="constant", constant_values=0 + ) out_d = np.zeros_like(eroded) for kr in range(kernel_size): for kc in range(kernel_size): - out_d = np.maximum(out_d, padded_d[kr:kr+rows, kc:kc+cols]) + out_d = np.maximum( + out_d, padded_d[kr : kr + rows, kc : kc + cols] + ) return out_d elif func_name == "binary_closing": # Dilation then Erosion # Re-run erosion on 'dilated' - padded_e = np.pad(dilated, radius, mode='constant', constant_values=1) + padded_e = np.pad( + dilated, radius, mode="constant", constant_values=1 + ) out_e = np.ones_like(dilated) for kr in range(kernel_size): for kc in range(kernel_size): - out_e = np.minimum(out_e, padded_e[kr:kr+rows, kc:kc+cols]) + out_e = np.minimum( + out_e, padded_e[kr : kr + rows, kc : kc + cols] + ) return out_e baseline_fn = numpy_morph - elif func_name == "texture_entropy": - # Generate random values - values = np.random.uniform(0, 255, size=(shape_info["height"], shape_info["width"])).astype(np.float64) + elif func_name == "haralick_features": + # Generate quantized integer data + levels = 8 + values = np.random.randint( + 0, levels, size=(shape_info["height"], shape_info["width"]) + ).astype(np.uint8) + import xarray as xr + data = xr.DataArray(values, dims=("y", "x")) window_size = args.texture_window - call = lambda: texture_entropy(values, window_size) - shape_desc = f"{shape_info['height']}x{shape_info['width']} (Window={window_size})" + call = lambda: haralick_features(data, window_size=window_size, levels=levels) + shape_desc = f"{shape_info['height']}x{shape_info['width']} (Window={window_size}, Levels={levels})" if compare_numpy: supports_baseline = True - baseline_kind = "scipy_convolve" - # Naive NumPy baseline using scipy.ndimage.generic_filter for entropy + baseline_kind = "skimage_generic" try: + from skimage.feature import graycomatrix, graycoprops from scipy.ndimage import generic_filter - def numpy_entropy(window): - _, counts = np.unique(window, return_counts=True) - probabilities = counts / len(window) - return -np.sum(probabilities * np.log2(probabilities)) + def skimage_haralick_window(window): + # This function is called for each window by generic_filter. + # It computes the GLCM and then the properties. + glcm = graycomatrix( + window, + distances=[1], + angles=[0, np.pi / 4, np.pi / 2, 3 * np.pi / 4], + levels=levels, + symmetric=True, + normed=True, + ) + # We only need one value (e.g., contrast) for the timing benchmark. + # The correctness is checked in the tests. + return graycoprops(glcm, "contrast").mean() - baseline_fn = lambda: generic_filter(values, numpy_entropy, size=window_size) + baseline_fn = lambda: generic_filter( + values, skimage_haralick_window, size=window_size, mode="reflect" + ) except ImportError: - # Fallback if scipy is not installed - baseline_fn = None + baseline_fn = None # Scikit-image or SciPy not installed else: # pragma: no cover raise ValueError(f"Unknown function: {func_name}") @@ -571,10 +709,14 @@ def numpy_entropy(window): baseline_fn = lambda: (nir / red) - 1.0 elif func_name == "delta_ndvi": supports_baseline = True - baseline_fn = lambda: ((pre_nir - pre_red) / (pre_nir + pre_red)) - ((post_nir - post_red) / (post_nir + post_red)) + baseline_fn = lambda: ((pre_nir - pre_red) / (pre_nir + pre_red)) - ( + (post_nir - post_red) / (post_nir + post_red) + ) elif func_name == "delta_nbr": supports_baseline = True - baseline_fn = lambda: ((pre_nir - pre_swir2) / (pre_nir + pre_swir2)) - ((post_nir - post_swir2) / (post_nir + post_swir2)) + baseline_fn = lambda: ((pre_nir - pre_swir2) / (pre_nir + pre_swir2)) - ( + (post_nir - post_swir2) / (post_nir + post_swir2) + ) elif func_name == "normalized_difference": supports_baseline = True baseline_fn = lambda: (nir - red) / (nir + red) @@ -591,6 +733,7 @@ def numpy_entropy(window): supports_baseline = True if ma_baseline_style == "naive": baseline_kind = "naive" + # Naive same-mode baseline (variable edges) O(T*W); skip NaN logic mirrored def _ma_baseline(): arr = cube @@ -609,9 +752,11 @@ def _ma_baseline(): else: out[t] = valid.mean(axis=0) return out + baseline_fn = _ma_baseline else: baseline_kind = "prefix" + # Prefix-sum baseline with NaN handling def _ma_prefix(): arr = cube @@ -628,13 +773,19 @@ def _ma_prefix(): start = max(0, t - half_left) end = min(T - 1, t + half_right) total_sum = csum[end] - (csum[start - 1] if start > 0 else 0) - total_count = ccount[end] - (ccount[start - 1] if start > 0 else 0) + total_count = ccount[end] - ( + ccount[start - 1] if start > 0 else 0 + ) with np.errstate(invalid="ignore", divide="ignore"): - out[t] = np.where(total_count > 0, total_sum / total_count, np.nan) + out[t] = np.where( + total_count > 0, total_sum / total_count, np.nan + ) return out + baseline_fn = _ma_prefix elif func_name == "moving_average_temporal_stride": supports_baseline = True + def _ma_stride_baseline(): # Compute naive moving average then stride sample arr = cube @@ -653,6 +804,7 @@ def _ma_stride_baseline(): full.append(valid.mean(axis=0)) full_arr = np.stack(full, axis=0) return full_arr[::ma_stride] + baseline_fn = _ma_stride_baseline elif func_name == "pixelwise_transform": supports_baseline = True @@ -671,6 +823,7 @@ def _ma_stride_baseline(): None, ) ) + # Streaming baseline (no large 3D temporary; pure Python loop, shows algorithmic parity) def streaming_euclid(): out = np.empty((pts_a.shape[0], pts_b.shape[0]), dtype=np.float64) @@ -678,42 +831,67 @@ def streaming_euclid(): diff = pts_a[i] - pts_b out[i] = np.sqrt(np.sum(diff * diff, axis=1)) return out - baseline_fn = broadcast_euclid if distance_baseline == "broadcast" else streaming_euclid + + baseline_fn = ( + broadcast_euclid + if distance_baseline == "broadcast" + else streaming_euclid + ) elif func_name == "manhattan_distance": supports_baseline = True baseline_kind = distance_baseline - broadcast_manhattan = lambda: np.abs(pts_a[:, None, :] - pts_b[None, :, :]).sum(axis=2) + broadcast_manhattan = lambda: np.abs( + pts_a[:, None, :] - pts_b[None, :, :] + ).sum(axis=2) + def streaming_manhattan(): out = np.empty((pts_a.shape[0], pts_b.shape[0]), dtype=np.float64) for i in range(pts_a.shape[0]): diff = np.abs(pts_a[i] - pts_b) out[i] = np.sum(diff, axis=1) return out - baseline_fn = broadcast_manhattan if distance_baseline == "broadcast" else streaming_manhattan + + baseline_fn = ( + broadcast_manhattan + if distance_baseline == "broadcast" + else streaming_manhattan + ) elif func_name == "chebyshev_distance": supports_baseline = True baseline_kind = distance_baseline - broadcast_cheby = lambda: np.abs(pts_a[:, None, :] - pts_b[None, :, :]).max(axis=2) + broadcast_cheby = lambda: np.abs(pts_a[:, None, :] - pts_b[None, :, :]).max( + axis=2 + ) + def streaming_cheby(): out = np.empty((pts_a.shape[0], pts_b.shape[0]), dtype=np.float64) for i in range(pts_a.shape[0]): diff = np.abs(pts_a[i] - pts_b) out[i] = np.max(diff, axis=1) return out - baseline_fn = broadcast_cheby if distance_baseline == "broadcast" else streaming_cheby + + baseline_fn = ( + broadcast_cheby if distance_baseline == "broadcast" else streaming_cheby + ) elif func_name == "minkowski_distance": supports_baseline = True baseline_kind = distance_baseline broadcast_minkowski = lambda: ( np.abs(pts_a[:, None, :] - pts_b[None, :, :]) ** args.minkowski_p ).sum(axis=2) ** (1.0 / args.minkowski_p) + def streaming_minkowski(): out = np.empty((pts_a.shape[0], pts_b.shape[0]), dtype=np.float64) for i in range(pts_a.shape[0]): diff = np.abs(pts_a[i] - pts_b) ** args.minkowski_p out[i] = np.sum(diff, axis=1) ** (1.0 / args.minkowski_p) return out - baseline_fn = broadcast_minkowski if distance_baseline == "broadcast" else streaming_minkowski + + baseline_fn = ( + broadcast_minkowski + if distance_baseline == "broadcast" + else streaming_minkowski + ) # Timed loops timings: List[float] = [] @@ -780,9 +958,14 @@ def streaming_minkowski(): # -------------------------------------------------------------------------------------- # Reporting # -------------------------------------------------------------------------------------- -def format_result_row(r: BenchmarkResult, compare_numpy: bool = False, show_elements: bool = True, show_shape: bool = True) -> str: +def format_result_row( + r: BenchmarkResult, + compare_numpy: bool = False, + show_elements: bool = True, + show_shape: bool = True, +) -> str: tput = ( - f"{r.throughput_elems/1e6:.2f}M elems/s" + f"{r.throughput_elems / 1e6:.2f}M elems/s" if r.throughput_elems is not None else "-" ) @@ -790,37 +973,37 @@ def format_result_row(r: BenchmarkResult, compare_numpy: bool = False, show_elem mem_str = f"{r.memory_mb:.1f} MB" if r.memory_mb is not None else "-" row = ( f"{r.name:22} " - f"{r.mean_s*1000:9.2f} ms " - f"{r.stdev_s*1000:7.2f} ms " - f"{r.min_s*1000:7.2f} ms " - f"{r.max_s*1000:7.2f} ms " + f"{r.mean_s * 1000:9.2f} ms " + f"{r.stdev_s * 1000:7.2f} ms " + f"{r.min_s * 1000:7.2f} ms " + f"{r.max_s * 1000:7.2f} ms " ) if show_elements: row += f"{elem_str:>12} " - - row += ( - f"{tput:>15} " - f"{mem_str:>10} " - ) - + + row += f"{tput:>15} {mem_str:>10} " + if compare_numpy: if r.baseline_mean_s is not None: - base_mean = f"{r.baseline_mean_s*1000:9.2f} ms" + base_mean = f"{r.baseline_mean_s * 1000:9.2f} ms" base_tput = ( - f"{r.baseline_throughput_elems/1e6:.2f}M elems/s" + f"{r.baseline_throughput_elems / 1e6:.2f}M elems/s" if r.baseline_throughput_elems else "-" ) if r.speedup_vs_numpy >= 1.0: speedup = f"{r.speedup_vs_numpy:.2f}x" else: - speedup = f"-{1.0-r.speedup_vs_numpy:.2f}x" - + speedup = f"-{1.0 - r.speedup_vs_numpy:.2f}x" + # Calculate throughput difference - if r.throughput_elems is not None and r.baseline_throughput_elems is not None: + if ( + r.throughput_elems is not None + and r.baseline_throughput_elems is not None + ): diff = r.throughput_elems - r.baseline_throughput_elems arrow = "↑" if diff >= 0 else "↓" - diff_str = f"{arrow} {abs(diff)/1e6:.2f}M" + diff_str = f"{arrow} {abs(diff) / 1e6:.2f}M" else: diff_str = "-" @@ -830,25 +1013,29 @@ def format_result_row(r: BenchmarkResult, compare_numpy: bool = False, show_elem if show_shape: row += f"{r.shape_description}" - + return row -def print_header(compare_numpy: bool = False, show_elements: bool = True, show_shape: bool = True): +def print_header( + compare_numpy: bool = False, show_elements: bool = True, show_shape: bool = True +): header = ( f"{'Function':22} {'Mean':>9} {'StDev':>7} {'Min':>7} {'Max':>7} " ) if show_elements: header += f"{'Elements':>12} " - + header += f"{'Throughput':>15} {'RSS Mem':>10} " if compare_numpy: - header += f"{'NumPy Mean':>12} {'NumPy Tput':>15} {'Speedup':>9} {'Tput Diff':>12} " - + header += ( + f"{'NumPy Mean':>12} {'NumPy Tput':>15} {'Speedup':>9} {'Tput Diff':>12} " + ) + if show_shape: header += "Shape" - + print(header) print("-" * len(header)) return len(header) @@ -917,7 +1104,7 @@ def resolve_functions(group: str, explicit: Optional[List[str]]) -> List[str]: "binary_erosion", "binary_opening", "binary_closing", - "texture_entropy", + "haralick_features", ] if group == "morphology": return [ @@ -927,7 +1114,7 @@ def resolve_functions(group: str, explicit: Optional[List[str]]) -> List[str]: "binary_closing", ] if group == "texture": - return ["texture_entropy"] + return ["haralick_features"] raise ValueError(f"Unknown group: {group}") @@ -1041,42 +1228,57 @@ def main(argv: Optional[Sequence[str]] = None) -> int: print("=" * 34) print(f"Python: {platform.python_version()} Platform: {platform.platform()}") print(f"Loops: {args.loops} Warmups: {args.warmups} Seed: {args.seed}") - + # Log threading configuration thread_vars = [ - "OMP_NUM_THREADS", - "MKL_NUM_THREADS", - "OPENBLAS_NUM_THREADS", - "VECLIB_MAXIMUM_THREADS", - "NUMEXPR_NUM_THREADS" + "OMP_NUM_THREADS", + "MKL_NUM_THREADS", + "OPENBLAS_NUM_THREADS", + "VECLIB_MAXIMUM_THREADS", + "NUMEXPR_NUM_THREADS", + ] + env_settings = [ + f"{var}={os.environ.get(var, 'Not Set')}" for var in thread_vars ] - env_settings = [f"{var}={os.environ.get(var, 'Not Set')}" for var in thread_vars] print(f"Threading: {', '.join(env_settings)}") - + print(f"Group: {args.group} Functions: {', '.join(funcs)}") - + # Check uniformity of elements all_elements = [r.elements for r in results if r.elements is not None] unique_elements = set(all_elements) uniform_elements = len(unique_elements) == 1 elements_val = all_elements[0] if uniform_elements and all_elements else None - + if uniform_elements and elements_val is not None: - print(f"Elements: {elements_val:,}") + print(f"Elements: {elements_val:,}") # Check uniformity of shape - all_shapes = [r.shape_description for r in results if r.shape_description is not None] + all_shapes = [ + r.shape_description for r in results if r.shape_description is not None + ] unique_shapes = set(all_shapes) uniform_shapes = len(unique_shapes) == 1 shape_val = all_shapes[0] if uniform_shapes and all_shapes else None if uniform_shapes and shape_val is not None: - print(f"Shape: {shape_val}") + print(f"Shape: {shape_val}") print() - header_len = print_header(args.compare_numpy, show_elements=not uniform_elements, show_shape=not uniform_shapes) + header_len = print_header( + args.compare_numpy, + show_elements=not uniform_elements, + show_shape=not uniform_shapes, + ) for r in results: - print(format_result_row(r, args.compare_numpy, show_elements=not uniform_elements, show_shape=not uniform_shapes)) + print( + format_result_row( + r, + args.compare_numpy, + show_elements=not uniform_elements, + show_shape=not uniform_shapes, + ) + ) print("-" * header_len) print("Throughput reported as processed elements per second (approximation).") print() @@ -1141,21 +1343,39 @@ def main(argv: Optional[Sequence[str]] = None) -> int: lines.append("") lines.append("## Results") lines.append("") - lines.append("| Function | Mean (ms) | StDev (ms) | Min (ms) | Max (ms) | Elements | Rust Throughput (M elems/s) | NumPy Throughput (M elems/s) | Speedup vs NumPy | Shape |") - lines.append("|----------|-----------|------------|----------|----------|----------|------------------------|------------------|-------|") + lines.append( + "| Function | Mean (ms) | StDev (ms) | Min (ms) | Max (ms) | Elements | Rust Throughput (M elems/s) | NumPy Throughput (M elems/s) | Speedup vs NumPy | Shape |" + ) + lines.append( + "|----------|-----------|------------|----------|----------|----------|------------------------|------------------|-------|" + ) for r in results: mean_ms = r.mean_s * 1000 stdev_ms = r.stdev_s * 1000 min_ms = r.min_s * 1000 max_ms = r.max_s * 1000 elems = f"{r.elements:,}" if r.elements is not None else "-" - tput = f"{(r.throughput_elems/1e6):.2f}" if r.throughput_elems is not None else "-" - speedup = f"{r.speedup_vs_numpy:.2f}x" if r.speedup_vs_numpy is not None else "-" - btput = f"{(r.baseline_throughput_elems/1e6):.2f}" if r.baseline_throughput_elems is not None else "-" - lines.append(f"| {r.name} | {mean_ms:.2f} | {stdev_ms:.2f} | {min_ms:.2f} | {max_ms:.2f} | {elems} | {tput} | {btput} | {speedup} | {r.shape_description} |") + tput = ( + f"{(r.throughput_elems / 1e6):.2f}" + if r.throughput_elems is not None + else "-" + ) + speedup = ( + f"{r.speedup_vs_numpy:.2f}x" if r.speedup_vs_numpy is not None else "-" + ) + btput = ( + f"{(r.baseline_throughput_elems / 1e6):.2f}" + if r.baseline_throughput_elems is not None + else "-" + ) + lines.append( + f"| {r.name} | {mean_ms:.2f} | {stdev_ms:.2f} | {min_ms:.2f} | {max_ms:.2f} | {elems} | {tput} | {btput} | {speedup} | {r.shape_description} |" + ) lines.append("") if args.compare_numpy and r.baseline_kind is not None: - lines.append("> Speedup vs NumPy = (NumPy mean time / Rust mean time); values > 1 indicate Rust is faster.") + lines.append( + "> Speedup vs NumPy = (NumPy mean time / Rust mean time); values > 1 indicate Rust is faster." + ) if r.baseline_kind: lines.append(f"> NumPy baseline kind used: {r.baseline_kind}.") with open(args.md_out, "w", encoding="utf-8") as f_md: @@ -1193,24 +1413,56 @@ def main(argv: Optional[Sequence[str]] = None) -> int: # Determine column widths rows = [] for r in results: - mean_ms = f"{r.mean_s*1000:.2f}" - stdev_ms = f"{r.stdev_s*1000:.2f}" - min_ms = f"{r.min_s*1000:.2f}" - max_ms = f"{r.max_s*1000:.2f}" + mean_ms = f"{r.mean_s * 1000:.2f}" + stdev_ms = f"{r.stdev_s * 1000:.2f}" + min_ms = f"{r.min_s * 1000:.2f}" + max_ms = f"{r.max_s * 1000:.2f}" elems = f"{r.elements:,}" if r.elements is not None else "-" - tput = f"{(r.throughput_elems/1e6):.2f}" if r.throughput_elems is not None else "-" - btput = f"{(r.baseline_throughput_elems/1e6):.2f}" if r.baseline_throughput_elems is not None else "-" - speedup = f"{r.speedup_vs_numpy:.2f}x" if r.speedup_vs_numpy is not None else "-" - rows.append([r.name, mean_ms, stdev_ms, min_ms, max_ms, elems, tput, btput, speedup, r.shape_description]) + tput = ( + f"{(r.throughput_elems / 1e6):.2f}" + if r.throughput_elems is not None + else "-" + ) + btput = ( + f"{(r.baseline_throughput_elems / 1e6):.2f}" + if r.baseline_throughput_elems is not None + else "-" + ) + speedup = ( + f"{r.speedup_vs_numpy:.2f}x" if r.speedup_vs_numpy is not None else "-" + ) + rows.append( + [ + r.name, + mean_ms, + stdev_ms, + min_ms, + max_ms, + elems, + tput, + btput, + speedup, + r.shape_description, + ] + ) # Compute column widths - col_widths = [max(len(h), *(len(row[i]) for row in rows)) for i, h in enumerate(header_cols)] + col_widths = [ + max(len(h), *(len(row[i]) for row in rows)) + for i, h in enumerate(header_cols) + ] def grid_sep(char="="): return "+" + "+".join(char * (w + 2) for w in col_widths) + "+" def grid_row(values): - return "|" + "|".join(f" {v}{' ' * (w - len(v))} " for v, w in zip(values, col_widths)) + "|" + return ( + "|" + + "|".join( + f" {v}{' ' * (w - len(v))} " for v, w in zip(values, col_widths) + ) + + "|" + ) # Header rst.append(grid_sep("=")) @@ -1222,7 +1474,9 @@ def grid_row(values): rst.append(grid_sep("=")) rst.append("") if args.compare_numpy: - rst.append("Speedup vs NumPy = (NumPy mean time / Rust mean time); values > 1 indicate Rust is faster.") + rst.append( + "Speedup vs NumPy = (NumPy mean time / Rust mean time); values > 1 indicate Rust is faster." + ) rst.append("") with open(args.rst_out, "w", encoding="utf-8") as f_rst: f_rst.write("\n".join(rst)) diff --git a/docs/source/conf.py b/docs/source/conf.py index d3d7d75..ddbe3e8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -35,6 +35,7 @@ author = "Benjamin Smith" copyright = f"{datetime.now():%Y}, {author}" + # Attempt to get version from package; fall back gracefully. def _read_version(): try: @@ -44,6 +45,7 @@ def _read_version(): # On RTD before build of extension module this can fail. return "0.0.0" + version = _read_version() release = version @@ -156,6 +158,7 @@ def _read_version(): .. |NumPy| replace:: **NumPy** """ + # --------------------------------------------------------------------------- # Custom hook: ensure README & QUICKSTART included without duplication # --------------------------------------------------------------------------- @@ -174,11 +177,15 @@ def _link_external_markdown(): try: text = src_path.read_text(encoding="utf-8") # Avoid writing if unchanged - if not dest_path.exists() or dest_path.read_text(encoding="utf-8") != text: + if ( + not dest_path.exists() + or dest_path.read_text(encoding="utf-8") != text + ): dest_path.write_text(text, encoding="utf-8") except Exception as e: print(f"[conf.py] Warning: could not process {fname}: {e}") + _link_external_markdown() # --------------------------------------------------------------------------- @@ -213,6 +220,7 @@ def _link_external_markdown(): distributed chunking with Rust's local parallel execution. """ + def setup(app): # Make the note available as a config value app.add_config_value("rust_arch_note", RUST_ARCH_NOTE, "env") diff --git a/examples/basic_usage.py b/examples/basic_usage.py index a2b2e11..3b5784f 100644 --- a/examples/basic_usage.py +++ b/examples/basic_usage.py @@ -32,7 +32,7 @@ print("Example 2: NDVI with 2D arrays (100x100 image)") print("-" * 40) nir_2d = np.random.rand(100, 100) * 0.8 + 0.2 # NIR values between 0.2 and 1.0 -red_2d = np.random.rand(100, 100) * 0.4 # Red values between 0.0 and 0.4 +red_2d = np.random.rand(100, 100) * 0.4 # Red values between 0.0 and 0.4 ndvi_2d_result = ndvi(nir_2d, red_2d) print(f"NIR shape: {nir_2d.shape}") print(f"Red shape: {red_2d.shape}") @@ -56,11 +56,13 @@ # Example 4: NDWI with 2D arrays print("Example 4: NDWI with 2D arrays (50x50)") print("-" * 40) -green_2d = np.random.rand(50, 50) * 0.5 + 0.1 # Green between 0.1 and 0.6 -nir_2d = np.random.rand(50, 50) * 0.4 + 0.1 # NIR between 0.1 and 0.5 +green_2d = np.random.rand(50, 50) * 0.5 + 0.1 # Green between 0.1 and 0.6 +nir_2d = np.random.rand(50, 50) * 0.4 + 0.1 # NIR between 0.1 and 0.5 ndwi_2d = ndwi(green_2d, nir_2d) print(f"NDWI shape: {ndwi_2d.shape}") -print(f"NDWI stats -> min: {ndwi_2d.min():.4f} max: {ndwi_2d.max():.4f} mean: {ndwi_2d.mean():.4f}") +print( + f"NDWI stats -> min: {ndwi_2d.min():.4f} max: {ndwi_2d.max():.4f} mean: {ndwi_2d.mean():.4f}" +) print() # Example 5: Enhanced Vegetation Index (EVI) 1D @@ -79,12 +81,14 @@ # Example 6: Enhanced Vegetation Index (EVI) 2D print("Example 6: Enhanced Vegetation Index (EVI) 2D (60x60)") print("-" * 40) -nir_evi_2d = np.random.rand(60, 60) * 0.6 + 0.2 # 0.2 - 0.8 -red_evi_2d = np.random.rand(60, 60) * 0.4 + 0.1 # 0.1 - 0.5 -blue_evi_2d = np.random.rand(60, 60) * 0.2 + 0.05 # 0.05 - 0.25 +nir_evi_2d = np.random.rand(60, 60) * 0.6 + 0.2 # 0.2 - 0.8 +red_evi_2d = np.random.rand(60, 60) * 0.4 + 0.1 # 0.1 - 0.5 +blue_evi_2d = np.random.rand(60, 60) * 0.2 + 0.05 # 0.05 - 0.25 evi_2d = evi(nir_evi_2d, red_evi_2d, blue_evi_2d) print(f"EVI shape: {evi_2d.shape}") -print(f"EVI stats -> min: {evi_2d.min():.4f} max: {evi_2d.max():.4f} mean: {evi_2d.mean():.4f}") +print( + f"EVI stats -> min: {evi_2d.min():.4f} max: {evi_2d.max():.4f} mean: {evi_2d.mean():.4f}" +) print() # Example 7: Generic normalized difference @@ -102,6 +106,7 @@ print("Example 8: Performance (1000x1000 NDVI)") print("-" * 40) import time + size = 1000 nir_large = np.random.rand(size, size) red_large = np.random.rand(size, size) @@ -114,7 +119,9 @@ ndvi_numpy = (nir_large - red_large) / (nir_large + red_large) t_numpy = time.time() - t0 -print(f"Rust: {t_rust*1000:.2f} ms NumPy: {t_numpy*1000:.2f} ms Speedup: {t_numpy/t_rust:.2f}x Match: {np.allclose(ndvi_rust, ndvi_numpy, rtol=1e-10)}") +print( + f"Rust: {t_rust * 1000:.2f} ms NumPy: {t_numpy * 1000:.2f} ms Speedup: {t_numpy / t_rust:.2f}x Match: {np.allclose(ndvi_rust, ndvi_numpy, rtol=1e-10)}" +) print() # Example 9: Temporal statistics & median composite @@ -127,7 +134,9 @@ composite_img = composite(ts, method="median") print(f"mean shape: {mean_img.shape}, std shape: {std_img.shape}") -print(f"median shape: {median_img.shape}, composite (median) identical: {np.allclose(median_img, composite_img)}") +print( + f"median shape: {median_img.shape}, composite (median) identical: {np.allclose(median_img, composite_img)}" +) print(f"mean range: [{mean_img.min():.4f}, {mean_img.max():.4f}]") print(f"std range: [{std_img.min():.4f}, {std_img.max():.4f}]") print(f"median range: [{median_img.min():.4f}, {median_img.max():.4f}]") diff --git a/examples/map_blocks.py b/examples/map_blocks.py index b4dd85d..b8c24f4 100644 --- a/examples/map_blocks.py +++ b/examples/map_blocks.py @@ -29,12 +29,15 @@ print(" pip install eo-processor[dask] xarray dask") raise + # Helper to report timing and basic stats def report(name: str, result_array): """Print name, time and some basic stats of the computed result.""" if isinstance(result_array, xr.DataArray): arr = result_array - data = arr.values if not isinstance(arr.data, da.Array) else arr.compute().values + data = ( + arr.values if not isinstance(arr.data, da.Array) else arr.compute().values + ) elif isinstance(result_array, da.Array): data = result_array.compute() else: @@ -85,8 +88,12 @@ def example_map_blocks_vs_apply_ufunc(): nir_dask = da.from_array(nir_np) red_dask = da.from_array(red_np) - nir_xr = xr.DataArray(nir_dask, dims=("y", "x"), coords={"y": np.arange(size), "x": np.arange(size)}) - red_xr = xr.DataArray(red_dask, dims=("y", "x"), coords={"y": np.arange(size), "x": np.arange(size)}) + nir_xr = xr.DataArray( + nir_dask, dims=("y", "x"), coords={"y": np.arange(size), "x": np.arange(size)} + ) + red_xr = xr.DataArray( + red_dask, dims=("y", "x"), coords={"y": np.arange(size), "x": np.arange(size)} + ) # Use apply_ufunc which can run the underlying Rust UDF in parallel. print("Timing: xarray.apply_ufunc (dask='parallelized') ...") @@ -100,7 +107,7 @@ def example_map_blocks_vs_apply_ufunc(): dask="parallelized", vectorize=False, output_dtypes=[float], - dask_gufunc_kwargs={'allow_rechunk': True} + dask_gufunc_kwargs={"allow_rechunk": True}, ) # Trigger computation and measure ndvi_xr_ufunc_computed = ndvi_xr_ufunc.compute() @@ -112,6 +119,7 @@ def example_map_blocks_vs_apply_ufunc(): # xarray.map_blocks applies a function block-by-block. The function should accept # numpy arrays (blocks) and return a numpy array or xarray DataArray for that block. print("Timing: xarray.map_blocks ...") + # define block function that uses the Rust ndvi on numpy blocks def block_ndvi(darr_chunk: xr.DataArray): # ds will have 'nir' and 'red' DataArrays for the current block @@ -132,7 +140,6 @@ def block_ndvi(darr_chunk: xr.DataArray): print("Preparing stacked xarray for map_blocks...") - # Build xarray.DataArray inputs (already defined as nir_xr/red_xr) start = time.time() stacked_xr = xr.concat( @@ -158,6 +165,7 @@ def block_ndvi(darr_chunk: xr.DataArray): # 4) dask.array.map_blocks on the underlying dask arrays print("Timing: dask.array.map_blocks ...") + def dask_block_ndvi(nir_block, red_block): # this will be called with numpy arrays per block res_arr = ndvi(nir_block, red_block) @@ -179,9 +187,20 @@ def dask_block_ndvi(nir_block, red_block): # Quick consistency checks (allow tiny floating differences) print("Sanity checks (all_close to NumPy baseline):") - print("apply_ufunc ~ baseline:", np.allclose(ndvi_xr_ufunc_computed, ndvi_numpy, equal_nan=True, atol=1e-10)) - print("xarray.map_blocks ~ baseline:", np.allclose(ndvi_xr_mapblocks_computed, ndvi_numpy, equal_nan=True, atol=1e-10)) - print("dask.map_blocks ~ baseline:", np.allclose(ndvi_dask_mapblocks_computed, ndvi_numpy, equal_nan=True, atol=1e-10)) + print( + "apply_ufunc ~ baseline:", + np.allclose(ndvi_xr_ufunc_computed, ndvi_numpy, equal_nan=True, atol=1e-10), + ) + print( + "xarray.map_blocks ~ baseline:", + np.allclose(ndvi_xr_mapblocks_computed, ndvi_numpy, equal_nan=True, atol=1e-10), + ) + print( + "dask.map_blocks ~ baseline:", + np.allclose( + ndvi_dask_mapblocks_computed, ndvi_numpy, equal_nan=True, atol=1e-10 + ), + ) print() # Summary timings diff --git a/examples/morphology_example.py b/examples/morphology_example.py index 4a3ffb0..6c44bf0 100644 --- a/examples/morphology_example.py +++ b/examples/morphology_example.py @@ -2,10 +2,12 @@ import eo_processor from eo_processor import binary_dilation, binary_erosion, binary_opening, binary_closing + def print_grid(arr): for row in arr: print(" ".join(str(x) for x in row)) + def main(): print("EO Processor - Morphology Example") print("=================================") @@ -15,8 +17,8 @@ def main(): # A 3x3 block in the center with some noise input_arr = np.zeros((7, 7), dtype=np.uint8) input_arr[2:5, 2:5] = 1 - input_arr[0, 0] = 1 # Noise pixel - input_arr[3, 3] = 0 # Hole in the center + input_arr[0, 0] = 1 # Noise pixel + input_arr[3, 3] = 0 # Hole in the center print("\nInput Pattern:") print_grid(input_arr) @@ -51,5 +53,6 @@ def main(): print_grid(closed) print("(Note: Hole at [3,3] is filled, noise at [0,0] remains)") + if __name__ == "__main__": main() diff --git a/examples/processes_examples.py b/examples/processes_examples.py index 5f945be..05a988a 100644 --- a/examples/processes_examples.py +++ b/examples/processes_examples.py @@ -108,7 +108,9 @@ def example_4d(): def example_chain(): cube = np.random.rand(12, 64, 64) ma = moving_average_temporal(cube, window=5, mode="same") - stretched = pixelwise_transform(ma, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0) + stretched = pixelwise_transform( + ma, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0 + ) print("\nChaining example:") print("Input cube shape:", cube.shape) print("Moving average shape:", ma.shape) @@ -150,7 +152,9 @@ def example_perf(): assert np.allclose(rust_out, naive_out, atol=1e-12) print("\nPerformance (1D series length 200k, window=21):") print(f"Rust moving_average_temporal: {rust_t:.4f}s") - print(f"Naive Python version : {naive_t:.4f}s (speedup ~{naive_t / rust_t:.2f}x)") + print( + f"Naive Python version : {naive_t:.4f}s (speedup ~{naive_t / rust_t:.2f}x)" + ) # --------------------------------------------------------------------------- @@ -194,7 +198,9 @@ def example_dask(): # --------------------------------------------------------------------------- def example_pixelwise(): arr = np.array([[0.05, 0.5, 1.2], [0.8, -0.3, 0.4]]) - scaled = pixelwise_transform(arr, scale=1.5, offset=-0.1, clamp_min=0.0, clamp_max=1.0) + scaled = pixelwise_transform( + arr, scale=1.5, offset=-0.1, clamp_min=0.0, clamp_max=1.0 + ) print("\nPixelwise transform:") print("Input:\n", arr) print("Scaled & clamped:\n", scaled) diff --git a/examples/random_forest_example.py b/examples/random_forest_example.py index 657d755..759ca15 100644 --- a/examples/random_forest_example.py +++ b/examples/random_forest_example.py @@ -6,9 +6,10 @@ from eo_processor import random_forest_predict # Add the parent directory to the Python path to allow importing from `tests` -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) from tests.utils import sklearn_to_json + def main(): """Main function to run the random forest example.""" # Generate synthetic data @@ -37,5 +38,6 @@ def main(): accuracy = np.mean(predictions == sklearn_predictions) print(f"Accuracy compared to scikit-learn: {accuracy * 100:.2f}%") + if __name__ == "__main__": main() diff --git a/examples/spatial_distances.py b/examples/spatial_distances.py index 0438d4d..84c3f1f 100644 --- a/examples/spatial_distances.py +++ b/examples/spatial_distances.py @@ -37,6 +37,7 @@ # Import internal core module (distance functions live here) from eo_processor import _core + def demo_point_to_point_distance(): print("Demo 0a: Distance from single point to single point") print("-" * 40) @@ -54,6 +55,7 @@ def demo_point_to_point_distance(): print("Chebyshev distance:\n", dist_cheby) print() + def demo_point_to_array_distance(): print("Demo 0: Distance from single point to array of points") print("-" * 40) @@ -78,6 +80,7 @@ def demo_point_to_array_distance(): print("Chebyshev distances:\n", dist_cheby) print() + def demo_small_points(): print("Demo 1: Small 2D point sets") print("-" * 40) @@ -109,8 +112,10 @@ def demo_small_points(): print("Chebyshev distances:\n", dist_cheby) print("Minkowski(p=2) distances:\n", dist_mink_p2) # Consistency checks (allow tiny floating differences) - print("Check: euclidean == minkowski(p=2):", - np.allclose(dist_euclid, dist_mink_p2, atol=1e-12)) + print( + "Check: euclidean == minkowski(p=2):", + np.allclose(dist_euclid, dist_mink_p2, atol=1e-12), + ) print() @@ -128,14 +133,26 @@ def demo_random_points(n: int = 5, m: int = 4, dim: int = 3, seed: int = 42): print(f"Generated points_a (n={n}, dim={dim})") print(f"Generated points_b (m={m}, dim={dim})") - print("Euclidean summary: min={:.4f} max={:.4f} mean={:.4f}".format( - dist_euclid.min(), dist_euclid.max(), dist_euclid.mean())) - print("Manhattan summary: min={:.4f} max={:.4f} mean={:.4f}".format( - dist_manhat.min(), dist_manhat.max(), dist_manhat.mean())) - print("Chebyshev summary: min={:.4f} max={:.4f} mean={:.4f}".format( - dist_cheby.min(), dist_cheby.max(), dist_cheby.mean())) - print("Minkowski(p=3) summary: min={:.4f} max={:.4f} mean={:.4f}".format( - dist_mink_p3.min(), dist_mink_p3.max(), dist_mink_p3.mean())) + print( + "Euclidean summary: min={:.4f} max={:.4f} mean={:.4f}".format( + dist_euclid.min(), dist_euclid.max(), dist_euclid.mean() + ) + ) + print( + "Manhattan summary: min={:.4f} max={:.4f} mean={:.4f}".format( + dist_manhat.min(), dist_manhat.max(), dist_manhat.mean() + ) + ) + print( + "Chebyshev summary: min={:.4f} max={:.4f} mean={:.4f}".format( + dist_cheby.min(), dist_cheby.max(), dist_cheby.mean() + ) + ) + print( + "Minkowski(p=3) summary: min={:.4f} max={:.4f} mean={:.4f}".format( + dist_mink_p3.min(), dist_mink_p3.max(), dist_mink_p3.mean() + ) + ) print() # Inequality relationships: @@ -144,10 +161,8 @@ def demo_random_points(n: int = 5, m: int = 4, dim: int = 3, seed: int = 42): # Chebyshev <= Euclidean <= Manhattan # We demonstrate general range bounds. print("Pairwise comparison checks:") - print("All(Euclidean <= Manhattan):", - np.all(dist_euclid <= dist_manhat + 1e-12)) - print("All(Chebyshev <= Euclidean):", - np.all(dist_cheby <= dist_euclid + 1e-12)) + print("All(Euclidean <= Manhattan):", np.all(dist_euclid <= dist_manhat + 1e-12)) + print("All(Chebyshev <= Euclidean):", np.all(dist_cheby <= dist_euclid + 1e-12)) print() @@ -191,13 +206,19 @@ def demo_minkowski_transitions(p_values=(1.0, 2.0, 3.0, 10.0)): cheby_ref = _core.chebyshev_distance(points_a, points_b) print("Sanity relationships:") - print("p=1 (Minkowski) == Manhattan:", - np.allclose(dist_sets[1.0], manhattan_ref, atol=1e-12)) - print("p=2 (Minkowski) == Euclidean:", - np.allclose(dist_sets[2.0], euclid_ref, atol=1e-12)) + print( + "p=1 (Minkowski) == Manhattan:", + np.allclose(dist_sets[1.0], manhattan_ref, atol=1e-12), + ) + print( + "p=2 (Minkowski) == Euclidean:", + np.allclose(dist_sets[2.0], euclid_ref, atol=1e-12), + ) # Large p approximates Chebyshev (not exact for finite p) - print("p=10 approximates Chebyshev (mean abs diff):", - np.abs(dist_sets[10.0] - cheby_ref).mean()) + print( + "p=10 approximates Chebyshev (mean abs diff):", + np.abs(dist_sets[10.0] - cheby_ref).mean(), + ) print() print("Distance mean progression by p:") diff --git a/examples/spectral_indices_extended.py b/examples/spectral_indices_extended.py index ab874fa..f8ad509 100644 --- a/examples/spectral_indices_extended.py +++ b/examples/spectral_indices_extended.py @@ -51,6 +51,7 @@ # Utility helpers # --------------------------------------------------------------------------- + def header(title: str) -> None: print("=" * 72) print(title) @@ -209,12 +210,20 @@ def time_func(fn, loops=3): nbr_rust_mean, nbr_rust_min, nbr_rust_max = time_func(rust_nbr) nbr_np_mean, nbr_np_min, nbr_np_max = time_func(numpy_nbr) - print(f"NDVI Rust mean: {ndvi_rust_mean:.4f}s (min {ndvi_rust_min:.4f} max {ndvi_rust_max:.4f})") - print(f"NDVI NumPy mean: {ndvi_np_mean:.4f}s (min {ndvi_np_min:.4f} max {ndvi_np_max:.4f})") + print( + f"NDVI Rust mean: {ndvi_rust_mean:.4f}s (min {ndvi_rust_min:.4f} max {ndvi_rust_max:.4f})" + ) + print( + f"NDVI NumPy mean: {ndvi_np_mean:.4f}s (min {ndvi_np_min:.4f} max {ndvi_np_max:.4f})" + ) print(f"NDVI Speedup (NumPy/Rust): {ndvi_np_mean / ndvi_rust_mean:.2f}x") - print(f"NBR Rust mean: {nbr_rust_mean:.4f}s (min {nbr_rust_min:.4f} max {nbr_rust_max:.4f})") - print(f"NBR NumPy mean: {nbr_np_mean:.4f}s (min {nbr_np_min:.4f} max {nbr_np_max:.4f})") + print( + f"NBR Rust mean: {nbr_rust_mean:.4f}s (min {nbr_rust_min:.4f} max {nbr_rust_max:.4f})" + ) + print( + f"NBR NumPy mean: {nbr_np_mean:.4f}s (min {nbr_np_min:.4f} max {nbr_np_max:.4f})" + ) print(f"NBR Speedup (NumPy/Rust): {nbr_np_mean / nbr_rust_mean:.2f}x") @@ -261,7 +270,9 @@ def multi_band_cube_demo(): # Example masked area (simulate cloud or invalid pixels) mask = rng.random((H, W)) < 0.1 mean_ndvi_masked = np.where(mask, np.nan, mean_ndvi) - print(f"Applied synthetic mask (10% NaNs) to mean NDVI - finite count: {np.isfinite(mean_ndvi_masked).sum()}/{H*W}") + print( + f"Applied synthetic mask (10% NaNs) to mean NDVI - finite count: {np.isfinite(mean_ndvi_masked).sum()}/{H * W}" + ) # --------------------------------------------------------------------------- diff --git a/examples/temporal_operations.py b/examples/temporal_operations.py index cf5bee7..20c5ce1 100644 --- a/examples/temporal_operations.py +++ b/examples/temporal_operations.py @@ -45,9 +45,9 @@ mean_numpy_large = np.mean(large_array, axis=0) time_numpy = time.time() - start_numpy -print(f"Rust implementation: {time_rust*1000:.2f} ms") -print(f"NumPy implementation: {time_numpy*1000:.2f} ms") -print(f"Speedup: {time_numpy/time_rust:.2f}x") +print(f"Rust implementation: {time_rust * 1000:.2f} ms") +print(f"NumPy implementation: {time_numpy * 1000:.2f} ms") +print(f"Speedup: {time_numpy / time_rust:.2f}x") print() # Example 4: Performance comparison for temporal_std @@ -64,9 +64,9 @@ std_numpy_large = np.std(large_array, axis=0, ddof=1) time_numpy = time.time() - start_numpy -print(f"Rust implementation: {time_rust*1000:.2f} ms") -print(f"NumPy implementation: {time_numpy*1000:.2f} ms") -print(f"Speedup: {time_numpy/time_rust:.2f}x") +print(f"Rust implementation: {time_rust * 1000:.2f} ms") +print(f"NumPy implementation: {time_numpy * 1000:.2f} ms") +print(f"Speedup: {time_numpy / time_rust:.2f}x") # Example 5: NaN handling & temporal composite (median) print("Example 5: NaN handling & temporal composite") @@ -85,6 +85,8 @@ print(f"skip_na mean NaN count: {np.isnan(mean_skip).sum()}") print(f"strict mean NaN count: {np.isnan(mean_strict).sum()}") print(f"std skip_na NaN count: {np.isnan(std_skip).sum()}") -print(f"median == composite(median): {np.allclose(median_comp, composite_median, equal_nan=True)}") +print( + f"median == composite(median): {np.allclose(median_comp, composite_median, equal_nan=True)}" +) print(f"median range: [{np.nanmin(median_comp):.4f}, {np.nanmax(median_comp):.4f}]") print() diff --git a/examples/temporal_sum_composite_example.py b/examples/temporal_sum_composite_example.py index 934e8d9..56dca96 100644 --- a/examples/temporal_sum_composite_example.py +++ b/examples/temporal_sum_composite_example.py @@ -1,6 +1,7 @@ import numpy as np from eo_processor import temporal_sum, temporal_composite + def run_temporal_examples(): """Demonstrate temporal_sum and temporal_composite.""" print("--- Temporal Sum and Composite Example ---") @@ -42,10 +43,13 @@ def run_temporal_examples(): weighted_median = temporal_composite(test_stack, weights=pixel_weights) assert weighted_median.shape == (1, 1, 1) # The median should be 3 because its weight (10) is > half the total weight (14 / 2 = 7) - assert abs(weighted_median[0, 0, 0] - 3.0) < 1e-9, "Weighted median calculation is incorrect" + assert abs(weighted_median[0, 0, 0] - 3.0) < 1e-9, ( + "Weighted median calculation is incorrect" + ) print("temporal_composite correctly computes the weighted median.") print("\n--- Example Complete ---") + if __name__ == "__main__": run_temporal_examples() diff --git a/examples/xarray_dask_usage.py b/examples/xarray_dask_usage.py index e326bec..aac4144 100644 --- a/examples/xarray_dask_usage.py +++ b/examples/xarray_dask_usage.py @@ -61,10 +61,18 @@ def example_xarray_basic(): rng = np.random.default_rng(0) nir = xr.DataArray(rng.uniform(0.3, 0.9, size=(h, w)), dims=["y", "x"], name="NIR") red = xr.DataArray(rng.uniform(0.05, 0.4, size=(h, w)), dims=["y", "x"], name="Red") - green = xr.DataArray(rng.uniform(0.1, 0.6, size=(h, w)), dims=["y", "x"], name="Green") - blue = xr.DataArray(rng.uniform(0.02, 0.25, size=(h, w)), dims=["y", "x"], name="Blue") - swir1 = xr.DataArray(rng.uniform(0.2, 0.5, size=(h, w)), dims=["y", "x"], name="SWIR1") - swir2 = xr.DataArray(rng.uniform(0.15, 0.4, size=(h, w)), dims=["y", "x"], name="SWIR2") + green = xr.DataArray( + rng.uniform(0.1, 0.6, size=(h, w)), dims=["y", "x"], name="Green" + ) + blue = xr.DataArray( + rng.uniform(0.02, 0.25, size=(h, w)), dims=["y", "x"], name="Blue" + ) + swir1 = xr.DataArray( + rng.uniform(0.2, 0.5, size=(h, w)), dims=["y", "x"], name="SWIR1" + ) + swir2 = xr.DataArray( + rng.uniform(0.15, 0.4, size=(h, w)), dims=["y", "x"], name="SWIR2" + ) # Compute indices (eager, small arrays) indices = { @@ -164,7 +172,16 @@ def block_indices(nir_blk, red_blk, green_blk): print("Result stacked shape (channels, y, x):", stacked.shape) comp = stacked.compute() - print("Channels:", comp.shape[0], "NDVI mean:", comp[0].mean(), "SAVI mean:", comp[1].mean(), "GCI mean:", comp[2].mean()) + print( + "Channels:", + comp.shape[0], + "NDVI mean:", + comp[0].mean(), + "SAVI mean:", + comp[1].mean(), + "GCI mean:", + comp[2].mean(), + ) print("map_blocks multi-index computed ✔\n") @@ -234,7 +251,9 @@ def example_multiband_dataset(): # Compute indices, add as new DataArrays ds["NDVI"] = xr.DataArray(ndvi(ds["NIR"].data, ds["Red"].data), dims=["y", "x"]) - ds["SAVI"] = xr.DataArray(savi(ds["NIR"].data, ds["Red"].data, L=0.5), dims=["y", "x"]) + ds["SAVI"] = xr.DataArray( + savi(ds["NIR"].data, ds["Red"].data, L=0.5), dims=["y", "x"] + ) ds["NBR"] = xr.DataArray(nbr(ds["NIR"].data, ds["SWIR2"].data), dims=["y", "x"]) ds["NDMI"] = xr.DataArray(ndmi(ds["NIR"].data, ds["SWIR1"].data), dims=["y", "x"]) ds["GCI"] = xr.DataArray(gci(ds["NIR"].data, ds["Green"].data), dims=["y", "x"]) diff --git a/examples/zonal_stats_example.py b/examples/zonal_stats_example.py index 6436d3e..bba1950 100644 --- a/examples/zonal_stats_example.py +++ b/examples/zonal_stats_example.py @@ -2,6 +2,7 @@ import eo_processor from eo_processor import zonal_stats + def main(): print("EO Processor - Zonal Statistics Example") print("=======================================") @@ -10,10 +11,10 @@ def main(): # -------------------- shape = (100, 100) print(f"\nGenerating {shape[0]}x{shape[1]} random data...") - + # Values: Random float data (e.g., NDVI or reflectance) values = np.random.uniform(0.0, 1.0, size=shape) - + # Add some NaNs to demonstrate handling values[10:20, 10:20] = np.nan print("Added NaNs to a 10x10 region.") @@ -21,7 +22,7 @@ def main(): # Zones: Random integer labels (e.g., field IDs, classification classes) # 5 distinct zones (0 to 4) zones = np.random.randint(0, 5, size=shape, dtype=np.int64) - + print(f"Zones: {np.unique(zones)}") # 2. Run Zonal Statistics @@ -33,13 +34,17 @@ def main(): # 3. Display Results # ------------------ print("\nResults:") - print(f"{'Zone ID':<10} {'Count':<10} {'Mean':<10} {'Min':<10} {'Max':<10} {'Std':<10}") + print( + f"{'Zone ID':<10} {'Count':<10} {'Mean':<10} {'Min':<10} {'Max':<10} {'Std':<10}" + ) print("-" * 65) # Sort by zone ID for display for zone_id in sorted(stats.keys()): zs = stats[zone_id] - print(f"{zone_id:<10} {zs.count:<10} {zs.mean:<10.4f} {zs.min:<10.4f} {zs.max:<10.4f} {zs.std:<10.4f}") + print( + f"{zone_id:<10} {zs.count:<10} {zs.mean:<10.4f} {zs.min:<10.4f} {zs.max:<10.4f} {zs.std:<10.4f}" + ) # 4. Accessing specific stats # --------------------------- @@ -50,5 +55,6 @@ def main(): print(f" Count: {z1.count}") print(f" Mean: {z1.mean:.4f}") + if __name__ == "__main__": main() diff --git a/pyproject.toml b/pyproject.toml index e21e5a0..dbc4765 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,8 @@ dependencies = [ "maturin>=1.9.6", "numpy>=1.20.0", "structlog>=23.1.0", + "xarray>=2023.0.0", + "dask[array]>=2023.0.0", ] [project.scripts] @@ -40,10 +42,9 @@ dev = [ "tox>=4.25.0", "scipy>=1.10.0", "scikit-learn>=1.0", + "scikit-image>=0.18.0", ] dask = [ - "dask[array]>=2023.0.0", - "xarray>=2023.0.0", # PyArrow dropped support for Python 3.8 in version 18.0.0 "pyarrow>=17.0.0", "scipy>=1.10.0", diff --git a/python/eo_processor/__init__.py b/python/eo_processor/__init__.py index 14cbe89..76e6bad 100644 --- a/python/eo_processor/__init__.py +++ b/python/eo_processor/__init__.py @@ -51,11 +51,13 @@ complex_classification as _complex_classification, random_forest_predict as _random_forest_predict, random_forest_train as _random_forest_train, + haralick_features as _haralick_features, ) -from ._core import texture_entropy as _texture_entropy import logging import structlog import numpy as np +import xarray as xr +from functools import partial # Configure structlog for structured, extensible logging structlog.configure( @@ -131,19 +133,28 @@ "binary_closing", "detect_breakpoints", "complex_classification", - "texture_entropy", + "haralick_features", "random_forest_predict", "random_forest_train", ] -def random_forest_train(features, labels, n_estimators=100, min_samples_split=2, max_depth=None, max_features=None): +def random_forest_train( + features, + labels, + n_estimators=100, + min_samples_split=2, + max_depth=None, + max_features=None, +): """ Train a random forest model. """ if max_features is None: max_features = int(np.sqrt(features.shape[1])) - return _random_forest_train(features, labels, n_estimators, min_samples_split, max_depth, max_features) + return _random_forest_train( + features, labels, n_estimators, min_samples_split, max_depth, max_features + ) def random_forest_predict(model_json, features): @@ -167,11 +178,117 @@ def complex_classification(blue, green, red, nir, swir1, swir2, temp): return _complex_classification(blue, green, red, nir, swir1, swir2, temp) -def texture_entropy(input, window_size): - """ - Compute the entropy of a 2D array over a moving window. - """ - return _texture_entropy(input, window_size) +def _apply_haralick(data_block, window_size, levels, boundary, dtype): + """Helper to apply Haralick features and handle dask chunk boundaries.""" + # If the original block is smaller than the window, no features can be calculated. + if data_block.shape[0] < window_size or data_block.shape[1] < window_size: + empty_shape = (data_block.shape[0], data_block.shape[1]) + return ( + np.full(empty_shape, np.nan, dtype=dtype), + np.full(empty_shape, np.nan, dtype=dtype), + np.full(empty_shape, np.nan, dtype=dtype), + np.full(empty_shape, np.nan, dtype=dtype), + ) + + # Pad the block to handle boundaries correctly + padded_block = np.pad(data_block, pad_width=boundary, mode="reflect") + + # Calculate features on the padded block + contrast, dissimilarity, homogeneity, entropy = _haralick_features( + padded_block, window_size, levels + ) + + # Un-pad the results to match the original chunk's dimensions + return ( + contrast[boundary:-boundary, boundary:-boundary], + dissimilarity[boundary:-boundary, boundary:-boundary], + homogeneity[boundary:-boundary, boundary:-boundary], + entropy[boundary:-boundary, boundary:-boundary], + ) + + +def haralick_features( + data: xr.DataArray, + window_size: int = 3, + levels: int = 8, + features: list = None, +) -> xr.DataArray: + """ + Calculate Haralick texture features over a sliding window. + + This function is designed to work with Dask-backed xarray DataArrays, + allowing for parallel, out-of-memory computation. + + :param data: Input 2D xarray.DataArray. Values should be integers, + ideally quantized to the specified number of levels. + :param window_size: The size of the square window for GLCM calculation. + :param levels: Number of gray levels to use for the GLCM. The input data + should be quantized to this range [0, levels-1]. + :param features: List of feature names to compute. Defaults to all four: + ['contrast', 'dissimilarity', 'homogeneity', 'entropy']. + :return: An xarray.DataArray with a new 'feature' dimension containing + the calculated texture metrics. + """ + if features is None: + features = ["contrast", "dissimilarity", "homogeneity", "entropy"] + + if data.ndim != 2: + raise ValueError("Input data must be a 2D xarray.DataArray.") + + # Quantize data to the specified number of levels + if data.max() > levels - 1: + log.warning( + "Data contains values greater than `levels`-1. " + "Quantizing data to the range [0, levels-1]." + ) + data = (data / data.max() * (levels - 1)).astype(np.uint8) + else: + data = data.astype(np.uint8) + + # Dask requires specifying the output template. + template = xr.DataArray( + np.empty((len(features), data.shape[0], data.shape[1]), dtype=np.float64), + dims=("feature",) + data.dims, + coords={"feature": features}, + ) + + # Calculate boundary overlap for dask chunks + boundary = window_size // 2 + + # Use a partial function to pass static arguments to map_blocks + apply_func = partial( + _apply_haralick, + window_size=window_size, + levels=levels, + boundary=boundary, + dtype=template.dtype, + ) + + # `map_blocks` applies the function to each Dask chunk. + # We must specify the output chunks, which we can derive from the input. + # We get a tuple of arrays from our rust function, one for each feature + contrast, dissimilarity, homogeneity, entropy = xr.apply_ufunc( + apply_func, + data, + input_core_dims=[("y", "x")], + dask="parallelized", + output_dtypes=[template.dtype] * 4, + output_core_dims=(("y", "x"), ("y", "x"), ("y", "x"), ("y", "x")), + dask_gufunc_kwargs=dict(allow_rechunk=True), + ) + + # Combine the results into a single DataArray + feature_map = { + "contrast": contrast, + "dissimilarity": dissimilarity, + "homogeneity": homogeneity, + "entropy": entropy, + } + + result = xr.concat([feature_map[f] for f in features], dim="feature") + result["feature"] = features + + return result def normalized_difference(a, b): diff --git a/python/eo_processor/__init__.pyi b/python/eo_processor/__init__.pyi index ee1fa17..41687e4 100644 --- a/python/eo_processor/__init__.pyi +++ b/python/eo_processor/__init__.pyi @@ -39,7 +39,9 @@ log: structlog.stdlib.BoundLogger def normalized_difference(a: NumericArray, b: NumericArray) -> NDArray[np.float64]: ... def ndvi(nir: NumericArray, red: NumericArray) -> NDArray[np.float64]: ... def ndwi(green: NumericArray, nir: NumericArray) -> NDArray[np.float64]: ... -def savi(nir: NumericArray, red: NumericArray, L: float = ...) -> NDArray[np.float64]: ... +def savi( + nir: NumericArray, red: NumericArray, L: float = ... +) -> NDArray[np.float64]: ... def nbr(nir: NumericArray, swir2: NumericArray) -> NDArray[np.float64]: ... def ndmi(nir: NumericArray, swir1: NumericArray) -> NDArray[np.float64]: ... def nbr2(swir1: NumericArray, swir2: NumericArray) -> NDArray[np.float64]: ... @@ -56,12 +58,17 @@ def delta_nbr( post_nir: NumericArray, post_swir2: NumericArray, ) -> NDArray[np.float64]: ... -def enhanced_vegetation_index(nir: NumericArray, red: NumericArray, blue: NumericArray) -> NDArray[np.float64]: ... +def enhanced_vegetation_index( + nir: NumericArray, red: NumericArray, blue: NumericArray +) -> NDArray[np.float64]: ... + evi = enhanced_vegetation_index # Temporal reducers & composites def median(arr: NumericArray, skip_na: bool = ...) -> NDArray[np.float64]: ... -def composite(arr: NumericArray, method: str = ..., **kwargs) -> NDArray[np.float64]: ... +def composite( + arr: NumericArray, method: str = ..., **kwargs +) -> NDArray[np.float64]: ... def temporal_mean(arr: NumericArray, skip_na: bool = ...) -> NDArray[np.float64]: ... def temporal_std(arr: NumericArray, skip_na: bool = ...) -> NDArray[np.float64]: ... @@ -90,10 +97,18 @@ def pixelwise_transform( ) -> NDArray[np.float64]: ... # Distance functions (pairwise) -def euclidean_distance(points_a: NumericArray, points_b: NumericArray) -> NDArray[np.float64]: ... -def manhattan_distance(points_a: NumericArray, points_b: NumericArray) -> NDArray[np.float64]: ... -def chebyshev_distance(points_a: NumericArray, points_b: NumericArray) -> NDArray[np.float64]: ... -def minkowski_distance(points_a: NumericArray, points_b: NumericArray, p: float) -> NDArray[np.float64]: ... +def euclidean_distance( + points_a: NumericArray, points_b: NumericArray +) -> NDArray[np.float64]: ... +def manhattan_distance( + points_a: NumericArray, points_b: NumericArray +) -> NDArray[np.float64]: ... +def chebyshev_distance( + points_a: NumericArray, points_b: NumericArray +) -> NDArray[np.float64]: ... +def minkowski_distance( + points_a: NumericArray, points_b: NumericArray, p: float +) -> NDArray[np.float64]: ... # Masking utilities def mask_vals( @@ -133,9 +148,17 @@ def mask_with_scl( ) -> NDArray[np.float64]: ... # Morphology functions -def binary_dilation(input: NDArray[np.uint8], kernel_size: int = ...) -> NDArray[np.uint8]: ... -def binary_erosion(input: NDArray[np.uint8], kernel_size: int = ...) -> NDArray[np.uint8]: ... -def binary_opening(input: NDArray[np.uint8], kernel_size: int = ...) -> NDArray[np.uint8]: ... -def binary_closing(input: NDArray[np.uint8], kernel_size: int = ...) -> NDArray[np.uint8]: ... +def binary_dilation( + input: NDArray[np.uint8], kernel_size: int = ... +) -> NDArray[np.uint8]: ... +def binary_erosion( + input: NDArray[np.uint8], kernel_size: int = ... +) -> NDArray[np.uint8]: ... +def binary_opening( + input: NDArray[np.uint8], kernel_size: int = ... +) -> NDArray[np.uint8]: ... +def binary_closing( + input: NDArray[np.uint8], kernel_size: int = ... +) -> NDArray[np.uint8]: ... # Raises ValueError if p < 1.0 diff --git a/scripts/eo_cli.py b/scripts/eo_cli.py index 34b90c0..4e42689 100644 --- a/scripts/eo_cli.py +++ b/scripts/eo_cli.py @@ -163,16 +163,17 @@ # Argument Parsing # --------------------------------------------------------------------------- + def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( prog="eo_cli.py", - description="Compute EO spectral indices from .npy band files." + description="Compute EO spectral indices from .npy band files.", ) p.add_argument( "--index", nargs="+", required=True, - help="One or more indices to compute. See script header for supported names." + help="One or more indices to compute. See script header for supported names.", ) # Common band arguments @@ -194,40 +195,48 @@ def build_parser() -> argparse.ArgumentParser: p.add_argument("--post-swir2") # SAVI parameter - p.add_argument("--savi-l", type=float, default=0.5, help="Soil brightness factor L for SAVI (default 0.5).") + p.add_argument( + "--savi-l", + type=float, + default=0.5, + help="Soil brightness factor L for SAVI (default 0.5).", + ) # Mask p.add_argument( "--mask", - help="Optional .npy mask file (same shape). Values of 0 become NaN in inputs before computation." + help="Optional .npy mask file (same shape). Values of 0 become NaN in inputs before computation.", ) # Output control p.add_argument("--out", help="Output file path if computing a single index.") - p.add_argument("--out-dir", help="Directory for multiple index outputs (auto-named .npy).") - p.add_argument("--png-preview", help="Optional PNG preview path (only valid for single index).") + p.add_argument( + "--out-dir", + help="Directory for multiple index outputs (auto-named .npy).", + ) + p.add_argument( + "--png-preview", help="Optional PNG preview path (only valid for single index)." + ) p.add_argument( "--dtype", default="float32", choices=["float32", "float64"], - help="Output dtype for saved .npy files (default float32)." + help="Output dtype for saved .npy files (default float32).", ) p.add_argument( "--clamp", nargs=2, type=float, metavar=("MIN", "MAX"), - help="Clamp output before saving (applied prior to dtype conversion)." + help="Clamp output before saving (applied prior to dtype conversion).", ) p.add_argument( "--allow-missing", action="store_true", - help="Skip indices missing required bands instead of failing." + help="Skip indices missing required bands instead of failing.", ) p.add_argument( - "--list", - action="store_true", - help="List supported indices and exit." + "--list", action="store_true", help="List supported indices and exit." ) return p @@ -236,6 +245,7 @@ def build_parser() -> argparse.ArgumentParser: # I/O Helpers # --------------------------------------------------------------------------- + def load_npy(path: str) -> np.ndarray: if not path: raise ValueError("Empty path provided.") @@ -243,13 +253,17 @@ def load_npy(path: str) -> np.ndarray: raise FileNotFoundError(path) arr = np.load(path) if arr.ndim not in (1, 2): - raise ValueError(f"Only 1D or 2D arrays supported. Got shape {arr.shape} for {path}") + raise ValueError( + f"Only 1D or 2D arrays supported. Got shape {arr.shape} for {path}" + ) return arr def apply_mask(arr: np.ndarray, mask: np.ndarray) -> np.ndarray: if arr.shape != mask.shape: - raise ValueError(f"Mask shape {mask.shape} does not match array shape {arr.shape}") + raise ValueError( + f"Mask shape {mask.shape} does not match array shape {arr.shape}" + ) return np.where(mask == 0, np.nan, arr) @@ -295,6 +309,7 @@ def save_png(path: str, arr: np.ndarray, clamp: Optional[List[float]] = None): # Index Computation Dispatcher # --------------------------------------------------------------------------- + def compute_index(name: str, bands: Dict[str, np.ndarray], savi_l: float) -> np.ndarray: spec = INDEX_SPECS[name] func = spec["func"] # type: ignore @@ -320,13 +335,11 @@ def compute_index(name: str, bands: Dict[str, np.ndarray], savi_l: float) -> np. return func(bands["nir"], bands["green"]) if name == "delta_ndvi": return func( - bands["pre_nir"], bands["pre_red"], - bands["post_nir"], bands["post_red"] + bands["pre_nir"], bands["pre_red"], bands["post_nir"], bands["post_red"] ) if name == "delta_nbr": return func( - bands["pre_nir"], bands["pre_swir2"], - bands["post_nir"], bands["post_swir2"] + bands["pre_nir"], bands["pre_swir2"], bands["post_nir"], bands["post_swir2"] ) raise ValueError(f"Unhandled index: {name}") @@ -335,6 +348,7 @@ def compute_index(name: str, bands: Dict[str, np.ndarray], savi_l: float) -> np. # Main Workflow # --------------------------------------------------------------------------- + def main(argv: Optional[List[str]] = None) -> int: parser = build_parser() args = parser.parse_args(argv) @@ -401,7 +415,10 @@ def main(argv: Optional[List[str]] = None) -> int: arr = apply_mask(arr, mask_arr) loaded_bands[band] = arr except Exception as exc: - print(f"[ERROR] Failed loading band '{band}' from {path}: {exc}", file=sys.stderr) + print( + f"[ERROR] Failed loading band '{band}' from {path}: {exc}", + file=sys.stderr, + ) return 1 results: Dict[str, np.ndarray] = {} diff --git a/scripts/generate_coverage_badge.py b/scripts/generate_coverage_badge.py index f63af0a..20ea927 100644 --- a/scripts/generate_coverage_badge.py +++ b/scripts/generate_coverage_badge.py @@ -53,16 +53,16 @@ def _color_for_coverage(pct_int: int) -> str: Derive color similar to shields.io thresholds. """ if pct_int >= 90: - return "#4c1" # bright green + return "#4c1" # bright green if pct_int >= 80: - return "#97CA00" # greenish + return "#97CA00" # greenish if pct_int >= 70: - return "#a4a61d" # olive / yellow-green + return "#a4a61d" # olive / yellow-green if pct_int >= 60: - return "#dfb317" # yellow + return "#dfb317" # yellow if pct_int >= 50: - return "#fe7d37" # orange - return "#e05d44" # red + return "#fe7d37" # orange + return "#e05d44" # red def _compute_widths(left_text: str, right_text: str) -> Tuple[int, int, int]: @@ -127,10 +127,10 @@ def generate_coverage_badge(xml_path: str, output_path: str) -> None: - {left_label} - {left_label} - {right_value} - {right_value} + {left_label} + {left_label} + {right_value} + {right_value} """ @@ -146,7 +146,9 @@ def generate_coverage_badge(xml_path: str, output_path: str) -> None: if __name__ == "__main__": if len(sys.argv) != 3: - print("Usage: python generate_coverage_badge.py ") + print( + "Usage: python generate_coverage_badge.py " + ) sys.exit(1) xml_file = sys.argv[1] diff --git a/scripts/jules_session_manager.py b/scripts/jules_session_manager.py index 065010a..25301ff 100644 --- a/scripts/jules_session_manager.py +++ b/scripts/jules_session_manager.py @@ -9,10 +9,8 @@ # Jules API Base URL BASE_URL = "https://jules.googleapis.com/v1alpha" -HEADERS = { - "X-Goog-Api-Key": JULES_API_KEY, - "Content-Type": "application/json" -} +HEADERS = {"X-Goog-Api-Key": JULES_API_KEY, "Content-Type": "application/json"} + def find_active_session(): """Searches for an existing Active/Paused session for the current branch.""" @@ -36,7 +34,9 @@ def find_active_session(): source = session.get("sourceContext", {}).get("source", "") if REPO_NAME in source: # Check branch match - github_context = session.get("sourceContext", {}).get("githubRepoContext", {}) + github_context = session.get("sourceContext", {}).get( + "githubRepoContext", {} + ) starting_branch = github_context.get("startingBranch") if starting_branch == BRANCH_NAME: @@ -44,11 +44,12 @@ def find_active_session(): # The 'state' is the key—we are only interested in active/paused sessions if state in ["ACTIVE", "PAUSED", "PLANNING"]: print(f"Found existing session: {session['name']} in state {state}") - return session['name'] + return session["name"] print("No active or paused session found for this branch.") return None + def send_fix_message(session_name): """Sends a message to an existing session to resume work.""" send_message_url = f"{BASE_URL}/{session_name}:sendMessage" @@ -72,9 +73,12 @@ def send_fix_message(session_name): # Log error but don't fail the CI step itself pass + if __name__ == "__main__": if not JULES_API_KEY or not REPO_NAME or not BRANCH_NAME: - print("ERROR: Missing required environment variables (API Key, Repo, or Branch). Skipping Jules Fix.") + print( + "ERROR: Missing required environment variables (API Key, Repo, or Branch). Skipping Jules Fix." + ) exit(0) # 1. Check for an existing session @@ -86,4 +90,6 @@ def send_fix_message(session_name): else: # 3. If no session exists, this is the time to create a new one, # but based on your request, we skip this step to avoid task limits. - print("Task not started. If needed, a new session would be created here, which would consume a task quota.") + print( + "Task not started. If needed, a new session would be created here, which would consume a task quota." + ) diff --git a/scripts/version.py b/scripts/version.py index e92201f..818eb5d 100644 --- a/scripts/version.py +++ b/scripts/version.py @@ -13,6 +13,7 @@ python scripts/version.py major # Increment major version (0.1.0 -> 1.0.0) python scripts/version.py set 1.2.3 # Set specific version """ + import argparse import re import sys @@ -22,6 +23,7 @@ # --- Utility Functions (Kept as is) --- + def parse_version(version_str: str) -> Tuple[int, int, int]: """Parse semantic version string into major, minor, patch.""" match = re.match(r"^(\d+)\.(\d+)\.(\d+)(?:-.*)?(?:\+.*)?$", version_str) @@ -55,6 +57,7 @@ def get_current_version() -> str: # --- Update Functions (MODIFIED/NEW) --- + def update_pyproject_version(new_version: str) -> None: """Update version in pyproject.toml.""" pyproject_path = Path("pyproject.toml") @@ -63,7 +66,7 @@ def update_pyproject_version(new_version: str) -> None: # Update version field in [project] new_content = re.sub( r'^(version\s*=\s*)"[^"]+"', - fr'\1"{new_version}"', + rf'\1"{new_version}"', content, flags=re.MULTILINE, ) @@ -85,7 +88,7 @@ def update_cargo_version(new_version: str) -> None: # Use re.sub with a non-greedy match to ensure it only hits the one line new_content = re.sub( r'(\[package\][\s\S]*?^version\s*=\s*)"[^"]+"', - fr'\1"{new_version}"', + rf'\1"{new_version}"', content, flags=re.MULTILINE | re.IGNORECASE, ) @@ -101,7 +104,10 @@ def update_init_version(new_version: str) -> None: # The user-facing module is likely 'python/eo_processor/__init__.py' init_path = Path("python/eo_processor/__init__.py") if not init_path.exists(): - print(f"Warning: {init_path} not found. Skipping __version__ update.", file=sys.stderr) + print( + f"Warning: {init_path} not found. Skipping __version__ update.", + file=sys.stderr, + ) return content = init_path.read_text() @@ -120,13 +126,14 @@ def update_init_version(new_version: str) -> None: def update_all_versions(new_version: str) -> None: """Update version in all files.""" update_pyproject_version(new_version) - update_cargo_version(new_version) # <-- NEW RUST VERSION UPDATE + update_cargo_version(new_version) # <-- NEW RUST VERSION UPDATE update_init_version(new_version) # Removed the update_server_version function call # --- Main Logic (Kept as is, but relies on new functions) --- + def increment_version(increment_type: str) -> str: """Increment version based on type (major, minor, patch).""" current = get_current_version() diff --git a/src/classification.rs b/src/classification.rs index 5c61421..9fb4dfb 100644 --- a/src/classification.rs +++ b/src/classification.rs @@ -35,12 +35,7 @@ impl DecisionTree { } } - pub fn fit( - &mut self, - features: &[Vec], - labels: &[f64], - n_features_to_consider: usize, - ) { + pub fn fit(&mut self, features: &[Vec], labels: &[f64], n_features_to_consider: usize) { self.root = Some(self.build_tree(features, labels, 0, n_features_to_consider)); } diff --git a/src/lib.rs b/src/lib.rs index 8e35638..c2d9ef3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -99,7 +99,7 @@ fn _core(_py: Python, m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(workflows::complex_classification, m)?)?; // --- Texture --- - m.add_function(wrap_pyfunction!(texture::texture_entropy, m)?)?; + m.add_function(wrap_pyfunction!(texture::haralick_features_py, m)?)?; // --- Classification --- m.add_function(wrap_pyfunction!(classification::random_forest_predict, m)?)?; diff --git a/src/texture.rs b/src/texture.rs index cecc868..d6edaf2 100644 --- a/src/texture.rs +++ b/src/texture.rs @@ -1,48 +1,178 @@ -use ndarray::{Array2, Axis}; +use ndarray::{s, Array2}; use numpy::{IntoPyArray, PyArray2, PyReadonlyArray2}; use pyo3::prelude::*; use rayon::prelude::*; -use std::collections::HashMap; +type HaralickPyResult = PyResult<( + Py>, + Py>, + Py>, + Py>, +)>; + +// Calculates the Gray-Level Co-occurrence Matrix (GLCM) for a given window. +fn glcm(window: &Array2, levels: u8, dx: isize, dy: isize) -> Array2 { + let (height, width) = (window.dim().0, window.dim().1); + let mut glcm = Array2::::zeros((levels as usize, levels as usize)); + + for y in 0..height { + for x in 0..width { + let next_y = y as isize + dy; + let next_x = x as isize + dx; + + if next_y >= 0 && next_y < height as isize && next_x >= 0 && next_x < width as isize { + let i = window[[y, x]] as usize; + let j = window[[next_y as usize, next_x as usize]] as usize; + if i < levels as usize && j < levels as usize { + glcm[[i, j]] += 1.0; + } + } + } + } + glcm +} + +// Normalizes the GLCM by dividing by the sum of its elements. +fn normalize_glcm(glcm: &mut Array2) { + let sum = glcm.sum(); + if sum > 0.0 { + *glcm /= sum; + } +} + +// Calculates the 'contrast' Haralick feature. +fn contrast(glcm: &Array2) -> f64 { + let mut contrast = 0.0; + for ((i, j), &p) in glcm.indexed_iter() { + contrast += (i as f64 - j as f64).powi(2) * p; + } + contrast +} + +// Calculates the 'dissimilarity' Haralick feature. +fn dissimilarity(glcm: &Array2) -> f64 { + let mut dissimilarity = 0.0; + for ((i, j), &p) in glcm.indexed_iter() { + dissimilarity += (i as f64 - j as f64).abs() * p; + } + dissimilarity +} + +// Calculates the 'homogeneity' Haralick feature. +fn homogeneity(glcm: &Array2) -> f64 { + let mut homogeneity = 0.0; + for ((i, j), &p) in glcm.indexed_iter() { + homogeneity += p / (1.0 + (i as f64 - j as f64).powi(2)); + } + homogeneity +} + +// Calculates the 'entropy' Haralick feature. +fn entropy(glcm: &Array2) -> f64 { + let mut entropy = 0.0; + for &p in glcm.iter() { + if p > 0.0 { + entropy -= p * p.log2(); + } + } + entropy +} + +// Helper function to calculate all Haralick features for a single window. +fn calculate_features_for_window(window: &Array2, levels: u8) -> (f64, f64, f64, f64) { + // Offsets matching scikit-image: [dist, angle] + // 0 deg: (0, 1), 45 deg: (-1, 1), 90 deg: (-1, 0), 135 deg: (-1, -1) + let offsets = [(0, 1), (-1, 1), (-1, 0), (-1, -1)]; + + let mut total_contrast = 0.0; + let mut total_dissimilarity = 0.0; + let mut total_homogeneity = 0.0; + let mut total_entropy = 0.0; + let count = offsets.len() as f64; + + for &(dy, dx) in &offsets { + let mut glcm_matrix = glcm(window, levels, dx, dy); + let t_glcm = glcm_matrix.t(); + glcm_matrix = &glcm_matrix + &t_glcm; // Symmetrize + normalize_glcm(&mut glcm_matrix); + + total_contrast += contrast(&glcm_matrix); + total_dissimilarity += dissimilarity(&glcm_matrix); + total_homogeneity += homogeneity(&glcm_matrix); + total_entropy += entropy(&glcm_matrix); + } + + ( + total_contrast / count, + total_dissimilarity / count, + total_homogeneity / count, + total_entropy / count, + ) +} + +/// Applies a sliding window over a 2D array and calculates Haralick texture features. +/// +/// :param arr: 2D NumPy array of unsigned 8-bit integers. +/// :param window_size: The size of the square window. +/// :param levels: The number of gray levels for quantization. +/// :return: A tuple of 4 2D NumPy arrays: (contrast, dissimilarity, homogeneity, entropy). #[pyfunction] -pub fn texture_entropy( +#[pyo3(name = "haralick_features")] +pub fn haralick_features_py( py: Python<'_>, - arr: PyReadonlyArray2, + arr: PyReadonlyArray2, window_size: usize, -) -> PyResult>> { - let array = arr.as_array(); + levels: u8, +) -> HaralickPyResult { + let array = arr.as_array().to_owned(); let (height, width) = (array.shape()[0], array.shape()[1]); - let mut result = Array2::::zeros((height, width)); let half_window = window_size / 2; - result - .axis_iter_mut(Axis(0)) - .into_par_iter() - .enumerate() - .for_each(|(r, mut row)| { - for c in 0..width { - let r_min = r.saturating_sub(half_window); - let r_max = (r + half_window).min(height - 1); - let c_min = c.saturating_sub(half_window); - let c_max = (c + half_window).min(width - 1); - - let window = array.slice(ndarray::s![r_min..=r_max, c_min..=c_max]); - let mut hist = HashMap::new(); - for &val in window.iter() { - *hist.entry(val.to_bits()).or_insert(0) += 1; - } + let (contrast_out, dissimilarity_out, homogeneity_out, entropy_out) = + py.allow_threads(move || { + let mut contrast_out = Array2::::zeros((height, width)); + let mut dissimilarity_out = Array2::::zeros((height, width)); + let mut homogeneity_out = Array2::::zeros((height, width)); + let mut entropy_out = Array2::::zeros((height, width)); - let mut entropy = 0.0; - let n_pixels = window.len() as f64; - for &count in hist.values() { - let p = count as f64 / n_pixels; - if p > 0.0 { - entropy -= p * p.log2(); - } - } - row[c] = entropy; + let pixels: Vec<(usize, usize)> = (0..height) + .flat_map(|r| (0..width).map(move |c| (r, c))) + .collect(); + + let results: Vec<(f64, f64, f64, f64)> = pixels + .par_iter() + .map(|&(r, c)| { + let r_min = r.saturating_sub(half_window); + let r_max = (r + half_window).min(height - 1); + let c_min = c.saturating_sub(half_window); + let c_max = (c + half_window).min(width - 1); + + let window = array.slice(s![r_min..=r_max, c_min..=c_max]).to_owned(); + calculate_features_for_window(&window, levels) + }) + .collect(); + + for (i, (con, dis, hom, ent)) in results.into_iter().enumerate() { + let r = i / width; + let c = i % width; + contrast_out[[r, c]] = con; + dissimilarity_out[[r, c]] = dis; + homogeneity_out[[r, c]] = hom; + entropy_out[[r, c]] = ent; } + + ( + contrast_out, + dissimilarity_out, + homogeneity_out, + entropy_out, + ) }); - Ok(result.into_pyarray(py).to_owned()) -} + Ok(( + contrast_out.into_pyarray(py).to_owned(), + dissimilarity_out.into_pyarray(py).to_owned(), + homogeneity_out.into_pyarray(py).to_owned(), + entropy_out.into_pyarray(py).to_owned(), + )) +} \ No newline at end of file diff --git a/tests/test_classification.py b/tests/test_classification.py index 2ee2476..f5c125c 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -6,6 +6,7 @@ from eo_processor import random_forest_predict, random_forest_train from .utils import sklearn_to_json + def test_random_forest_predict(): """Test the random_forest_predict function.""" # Generate synthetic data @@ -33,6 +34,7 @@ def test_random_forest_predict(): assert np.array_equal(predictions, sklearn_predictions) + def test_random_forest_train_and_predict(): """Test the full train-and-predict cycle.""" # Generate synthetic data @@ -45,7 +47,9 @@ def test_random_forest_train_and_predict(): random_state=42, shuffle=True, ) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.3, random_state=42 + ) # Convert labels to float64 y_train = y_train.astype(np.float64) @@ -64,4 +68,6 @@ def test_random_forest_train_and_predict(): # Check accuracy accuracy = accuracy_score(y_test, predictions) - assert accuracy >= 0.75, f"Accuracy of {accuracy:.2f} is below the threshold of 0.75" + assert accuracy >= 0.75, ( + f"Accuracy of {accuracy:.2f} is below the threshold of 0.75" + ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 83d1a5b..38ed380 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -418,6 +418,7 @@ def test_missing_file_handling(tmp_path): import logging from eo_processor import log + def test_cli_logging(tmp_path): # Redirect logging to a string buffer log_stream = io.StringIO() diff --git a/tests/test_composite.py b/tests/test_composite.py index a37f563..397cfa9 100644 --- a/tests/test_composite.py +++ b/tests/test_composite.py @@ -2,36 +2,43 @@ import pytest from eo_processor import composite + def test_composite_median_1d(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) result = composite(arr, method="median") assert result == 3.0 + def test_composite_median_1d_with_nan_skip(): arr = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) result = composite(arr, method="median", skip_na=True) assert result == 3.0 + def test_composite_mean_1d(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) result = composite(arr, method="mean") assert result == 3.0 + def test_composite_mean_1d_with_nan_skip(): arr = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) result = composite(arr, method="mean", skip_na=True) assert result == 3.0 + def test_composite_std_1d(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) result = composite(arr, method="std") assert np.isclose(result, 1.58113883) + def test_composite_std_1d_with_nan_skip(): arr = np.array([1.0, 2.0, np.nan, 4.0, 5.0]) result = composite(arr, method="std", skip_na=True) assert np.isclose(result, 1.825741858) + def test_composite_unknown_method(): arr = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) with pytest.raises(ValueError): diff --git a/tests/test_morphology.py b/tests/test_morphology.py index 4f532c0..016b256 100644 --- a/tests/test_morphology.py +++ b/tests/test_morphology.py @@ -2,82 +2,88 @@ import pytest from eo_processor import binary_dilation, binary_erosion, binary_opening, binary_closing + def test_binary_dilation_basic(): # Single pixel in 3x3 input_arr = np.zeros((5, 5), dtype=np.uint8) input_arr[2, 2] = 1 - + # Dilate with 3x3 kernel -> 3x3 block of 1s dilated = binary_dilation(input_arr, kernel_size=3) - + expected = np.zeros((5, 5), dtype=np.uint8) expected[1:4, 1:4] = 1 - + np.testing.assert_array_equal(dilated, expected) + def test_binary_erosion_basic(): # 3x3 block of 1s input_arr = np.zeros((5, 5), dtype=np.uint8) input_arr[1:4, 1:4] = 1 - + # Erode with 3x3 kernel -> single pixel at center eroded = binary_erosion(input_arr, kernel_size=3) - + expected = np.zeros((5, 5), dtype=np.uint8) expected[2, 2] = 1 - + np.testing.assert_array_equal(eroded, expected) + def test_binary_opening_noise_removal(): # 3x3 block + noise pixel input_arr = np.zeros((7, 7), dtype=np.uint8) input_arr[2:5, 2:5] = 1 - input_arr[0, 0] = 1 # Noise - + input_arr[0, 0] = 1 # Noise + # Opening should remove the isolated noise pixel but keep the block roughly same # (Erosion removes noise and shrinks block, Dilation restores block) opened = binary_opening(input_arr, kernel_size=3) - + expected = np.zeros((7, 7), dtype=np.uint8) expected[2:5, 2:5] = 1 - + np.testing.assert_array_equal(opened, expected) + def test_binary_closing_hole_filling(): # 3x3 block with hole in center input_arr = np.zeros((5, 5), dtype=np.uint8) input_arr[1:4, 1:4] = 1 - input_arr[2, 2] = 0 # Hole - + input_arr[2, 2] = 0 # Hole + # Closing should fill the hole closed = binary_closing(input_arr, kernel_size=3) - + expected = np.zeros((5, 5), dtype=np.uint8) expected[1:4, 1:4] = 1 - + np.testing.assert_array_equal(closed, expected) + def test_kernel_size_5(): input_arr = np.zeros((7, 7), dtype=np.uint8) input_arr[3, 3] = 1 - + # Dilate with 5x5 kernel -> 5x5 block dilated = binary_dilation(input_arr, kernel_size=5) - + expected = np.zeros((7, 7), dtype=np.uint8) expected[1:6, 1:6] = 1 - + np.testing.assert_array_equal(dilated, expected) + def test_boundary_handling(): # Pixel at edge input_arr = np.zeros((5, 5), dtype=np.uint8) input_arr[0, 0] = 1 - + # Dilate 3x3 -> 2x2 block at corner dilated = binary_dilation(input_arr, kernel_size=3) - + expected = np.zeros((5, 5), dtype=np.uint8) expected[0:2, 0:2] = 1 - + np.testing.assert_array_equal(dilated, expected) diff --git a/tests/test_processes.py b/tests/test_processes.py index 6ffa558..c16fdd3 100644 --- a/tests/test_processes.py +++ b/tests/test_processes.py @@ -6,7 +6,9 @@ # ----------------------------- # Helpers # ----------------------------- -def py_moving_average_same(series: np.ndarray, window: int, skip_na: bool) -> np.ndarray: +def py_moving_average_same( + series: np.ndarray, window: int, skip_na: bool +) -> np.ndarray: """ Pure-Python/Numpy reference for 'same' mode moving average (variable window near edges). skip_na semantics: @@ -29,7 +31,9 @@ def py_moving_average_same(series: np.ndarray, window: int, skip_na: bool) -> np return out -def py_moving_average_valid(series: np.ndarray, window: int, skip_na: bool) -> np.ndarray: +def py_moving_average_valid( + series: np.ndarray, window: int, skip_na: bool +) -> np.ndarray: """ 'valid' mode: only full windows (no edge shrink). Length = t - window + 1. """ @@ -182,7 +186,9 @@ def test_pixelwise_transform_multi_dim(): assert np.allclose(out3d, arr3d * 0.5 - 0.2, atol=1e-12) arr4d = np.random.rand(2, 3, 4, 5) - out4d = pixelwise_transform(arr4d, scale=1.1, offset=0.0, clamp_min=0.0, clamp_max=1.0) + out4d = pixelwise_transform( + arr4d, scale=1.1, offset=0.0, clamp_min=0.0, clamp_max=1.0 + ) # Since arr4d in [0,1), scaling may push some >1; clamp verifies expected4d = np.clip(arr4d * 1.1, 0.0, 1.0) assert np.allclose(out4d, expected4d, atol=1e-12) @@ -196,7 +202,9 @@ def test_moving_average_then_transform_chain(): cube = np.random.rand(12, 8, 8) ma = moving_average_temporal(cube, window=3, skip_na=True, mode="same") # Scale and clamp - stretched = pixelwise_transform(ma, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0) + stretched = pixelwise_transform( + ma, scale=1.2, offset=-0.1, clamp_min=0.0, clamp_max=1.0 + ) assert stretched.shape == ma.shape assert np.all(stretched <= 1.0 + 1e-12) assert np.all(stretched >= -1e-12) # clamp floor diff --git a/tests/test_processes_stride.py b/tests/test_processes_stride.py index c208bd0..6ffffa7 100644 --- a/tests/test_processes_stride.py +++ b/tests/test_processes_stride.py @@ -9,7 +9,10 @@ # Helper reference implementations (pure NumPy/Python) # --------------------------------------------------------------------------- -def _naive_moving_average_same(series: np.ndarray, window: int, skip_na: bool) -> np.ndarray: + +def _naive_moving_average_same( + series: np.ndarray, window: int, skip_na: bool +) -> np.ndarray: """ Naive O(T*W) moving average with 'same' mode edge shrinking. Window centered: half_left = window // 2; half_right = window - half_left - 1. @@ -21,7 +24,7 @@ def _naive_moving_average_same(series: np.ndarray, window: int, skip_na: bool) - for t in range(T): start = max(0, t - half_left) end = min(T - 1, t + half_right) - window_vals = series[start:end + 1] + window_vals = series[start : end + 1] if skip_na: valid = window_vals[~np.isnan(window_vals)] out[t] = np.nan if valid.size == 0 else valid.mean() @@ -30,7 +33,9 @@ def _naive_moving_average_same(series: np.ndarray, window: int, skip_na: bool) - return out -def _naive_moving_average_valid(series: np.ndarray, window: int, skip_na: bool) -> np.ndarray: +def _naive_moving_average_valid( + series: np.ndarray, window: int, skip_na: bool +) -> np.ndarray: """ Naive moving average 'valid' mode (no edge shrink). """ @@ -38,7 +43,7 @@ def _naive_moving_average_valid(series: np.ndarray, window: int, skip_na: bool) out_len = T - window + 1 out = np.empty(out_len, dtype=float) for i in range(out_len): - window_vals = series[i:i + window] + window_vals = series[i : i + window] if skip_na: valid = window_vals[~np.isnan(window_vals)] out[i] = np.nan if valid.size == 0 else valid.mean() @@ -51,11 +56,14 @@ def _naive_moving_average_valid(series: np.ndarray, window: int, skip_na: bool) # Tests # --------------------------------------------------------------------------- + def test_moving_average_temporal_stride_basic_1d(): - series = np.array([1., 2., 3., 4., 5., 6.]) + series = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) window = 3 stride = 2 - rust = moving_average_temporal_stride(series, window=window, stride=stride, mode="same") + rust = moving_average_temporal_stride( + series, window=window, stride=stride, mode="same" + ) # Build naive same-mode then stride sample naive_full = _naive_moving_average_same(series, window, skip_na=True) expected = naive_full[::stride] @@ -68,15 +76,19 @@ def test_moving_average_temporal_stride_same_vs_direct_ma(): window = 5 stride = 3 full_rust = moving_average_temporal(series, window=window, mode="same") - stride_rust = moving_average_temporal_stride(series, window=window, stride=stride, mode="same") + stride_rust = moving_average_temporal_stride( + series, window=window, stride=stride, mode="same" + ) assert np.allclose(stride_rust, full_rust[::stride], atol=1e-12) def test_moving_average_temporal_stride_valid_mode(): - series = np.array([1., 2., 3., 4., 5., 6., 7.]) + series = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) window = 3 stride = 2 - rust_valid = moving_average_temporal_stride(series, window=window, stride=stride, mode="valid") + rust_valid = moving_average_temporal_stride( + series, window=window, stride=stride, mode="valid" + ) naive_valid = _naive_moving_average_valid(series, window, skip_na=True) expected = naive_valid[::stride] assert rust_valid.shape == expected.shape @@ -84,10 +96,12 @@ def test_moving_average_temporal_stride_valid_mode(): def test_moving_average_temporal_stride_nan_skip(): - series = np.array([1., np.nan, 3., 4., np.nan, 6.]) + series = np.array([1.0, np.nan, 3.0, 4.0, np.nan, 6.0]) window = 3 stride = 2 - rust = moving_average_temporal_stride(series, window=window, stride=stride, skip_na=True, mode="same") + rust = moving_average_temporal_stride( + series, window=window, stride=stride, skip_na=True, mode="same" + ) naive_full = _naive_moving_average_same(series, window, skip_na=True) expected = naive_full[::stride] # NaNs in expected propagate; use nan-aware comparison @@ -96,10 +110,12 @@ def test_moving_average_temporal_stride_nan_skip(): def test_moving_average_temporal_stride_nan_no_skip(): - series = np.array([1., np.nan, 3., 4., 5., 6.]) + series = np.array([1.0, np.nan, 3.0, 4.0, 5.0, 6.0]) window = 3 stride = 1 - rust = moving_average_temporal_stride(series, window=window, stride=stride, skip_na=False, mode="same") + rust = moving_average_temporal_stride( + series, window=window, stride=stride, skip_na=False, mode="same" + ) naive_full = _naive_moving_average_same(series, window, skip_na=False) assert rust.shape == naive_full.shape # Windows touching NaN become NaN @@ -135,7 +151,9 @@ def test_moving_average_temporal_stride_window_stride_errors(): def test_moving_average_temporal_stride_equivalence_with_stride_1(): cube = np.random.rand(10, 3, 3) win = 3 - out_stride1 = moving_average_temporal_stride(cube, window=win, stride=1, mode="same") + out_stride1 = moving_average_temporal_stride( + cube, window=win, stride=1, mode="same" + ) out_full = moving_average_temporal(cube, window=win, mode="same") assert np.allclose(out_stride1, out_full, atol=1e-12) @@ -146,7 +164,9 @@ def test_moving_average_temporal_stride_large_random_consistency(): series = rng.random(257) window = 7 stride = 5 - rust = moving_average_temporal_stride(series, window=window, stride=stride, mode="same") + rust = moving_average_temporal_stride( + series, window=window, stride=stride, mode="same" + ) naive_full = _naive_moving_average_same(series, window, skip_na=True) expected = naive_full[::stride] assert rust.shape == expected.shape diff --git a/tests/test_texture.py b/tests/test_texture.py index 37ab030..5ee451c 100644 --- a/tests/test_texture.py +++ b/tests/test_texture.py @@ -1,52 +1,159 @@ import numpy as np -from eo_processor import texture_entropy +import pytest +import xarray as xr +from skimage.feature import graycomatrix, graycoprops +from eo_processor import haralick_features -def test_texture_entropy(): - # Create a sample 2D array - arr = np.array([ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15], - [16, 17, 18, 19, 20], - [21, 22, 23, 24, 25] - ], dtype=np.float64) +# Create a sample 2D array for testing +np.random.seed(42) +SAMPLE_ARRAY = np.random.randint(0, 8, size=(100, 100), dtype=np.uint8) +SAMPLE_XR = xr.DataArray(SAMPLE_ARRAY, dims=("y", "x")) - # Compute the texture entropy with a 3x3 window - entropy = texture_entropy(arr, 3) +# Define parameters for tests +WINDOW_SIZE = 5 +LEVELS = 8 +FEATURES = ["contrast", "dissimilarity", "homogeneity", "entropy"] - # Check that the output is a 2D array with the same shape as the input - assert entropy.shape == arr.shape - # Check that the entropy values are within a reasonable range (0 to log2(window_size^2)) - assert np.all(entropy >= 0) - assert np.all(entropy <= np.log2(3**2)) +def _calculate_skimage_props_for_pixel(window, levels): + """ + Helper to calculate GLCM properties for a single window using scikit-image, + matching the eo-processor implementation (symmetric, normed, averaged). + """ + # Distances and angles must match the Rust implementation + distances = [1] + angles = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4] -def test_texture_entropy_small_image(): - # Create a sample 2D array smaller than the window size - arr = np.array([ - [1, 2], - [3, 4] - ], dtype=np.float64) + glcm = graycomatrix( + window, + distances=distances, + angles=angles, + levels=levels, + symmetric=True, + normed=True, + ) - # Compute the texture entropy with a 3x3 window - entropy = texture_entropy(arr, 3) + results = {} + # skimage calls it 'ASM' for homogeneity, we use the common name + results["contrast"] = graycoprops(glcm, "contrast").mean() + results["dissimilarity"] = graycoprops(glcm, "dissimilarity").mean() + results["homogeneity"] = graycoprops(glcm, "homogeneity").mean() - # Check that the output is a 2D array with the same shape as the input - assert entropy.shape == arr.shape + # Calculate entropy manually per-matrix, then average, to match graycoprops behavior + entropies = [] + for d in range(glcm.shape[2]): + for a in range(glcm.shape[3]): + matrix = glcm[:, :, d, a] + non_zeros = matrix[matrix > 0] + entropies.append(-np.sum(non_zeros * np.log2(non_zeros))) + results["entropy"] = np.mean(entropies) - # Check that the entropy values are within a reasonable range (0 to log2(window_size^2)) - assert np.all(entropy >= 0) - assert np.all(entropy <= np.log2(3**2)) + return results -def test_texture_entropy_uniform_image(): - # Create a uniform 2D array - arr = np.ones((10, 10), dtype=np.float64) - # Compute the texture entropy with a 3x3 window - entropy = texture_entropy(arr, 3) +@pytest.fixture(scope="module") +def skimage_results(): + """ + Pre-calculate the expected results using scikit-image for the center pixel + of the sample array. This is slow, so we do it once. + """ + half_window = WINDOW_SIZE // 2 + center_y, center_x = 50, 50 - # Check that the output is a 2D array with the same shape as the input - assert entropy.shape == arr.shape + window = SAMPLE_ARRAY[ + center_y - half_window : center_y + half_window + 1, + center_x - half_window : center_x + half_window + 1, + ] - # Check that the entropy is close to zero for a uniform image - assert np.allclose(entropy, 0) + return _calculate_skimage_props_for_pixel(window, LEVELS) + + +def test_haralick_features_correctness(skimage_results): + """ + Compare the output of the Rust implementation with scikit-image for a single pixel. + """ + # Run eo-processor implementation + result_xr = haralick_features( + SAMPLE_XR, + window_size=WINDOW_SIZE, + levels=LEVELS, + features=FEATURES, + ) + + # Extract the values for the center pixel + center_y, center_x = 50, 50 + eo_processor_results = result_xr.sel(y=center_y, x=center_x).values + + # Compare results + for i, feature in enumerate(FEATURES): + expected = skimage_results[feature] + actual = eo_processor_results[i] + # Using a tolerance because of potential minor floating point differences + np.testing.assert_allclose(actual, expected, rtol=1e-5, atol=1e-8) + + +def test_dask_integration(): + """ + Test that the function works correctly with a Dask-backed xarray DataArray. + """ + # Chunk the sample array to trigger Dask execution + dask_xr = SAMPLE_XR.chunk({"y": 50, "x": 50}) + + # Run haralick_features + result_dask = haralick_features( + dask_xr, + window_size=WINDOW_SIZE, + levels=LEVELS, + features=FEATURES, + ) + + # Compute the result + computed_result = result_dask.compute() + + # For simplicity, we compare against a non-dask run. + # The correctness test already validates against scikit-image. + result_numpy = haralick_features( + SAMPLE_XR, + window_size=WINDOW_SIZE, + levels=LEVELS, + features=FEATURES, + ) + + xr.testing.assert_allclose(computed_result, result_numpy) + + +def test_edge_case_small_array(): + """ + Test that the function handles arrays smaller than the window size gracefully. + The Rust implementation should produce NaNs. + """ + small_array = xr.DataArray( + np.random.randint(0, 8, size=(2, 2), dtype=np.uint8), dims=("y", "x") + ) + + result = haralick_features( + small_array, + window_size=3, + levels=8, + ) + + # The output should be all NaNs because no full window can be formed + assert np.all(np.isnan(result.values)) + + +def test_quantization(): + """ + Test that the auto-quantization logic works as expected. + """ + # Create an array with values outside the [0, levels-1] range + high_value_array = xr.DataArray(np.arange(0, 100).reshape(10, 10), dims=("y", "x")) + levels = 16 + + # This should run without error and produce a valid result + result = haralick_features(high_value_array, window_size=3, levels=levels) + + # Check that the output is not all NaNs (which would indicate an error) + assert not np.all(np.isnan(result.values)) + + # Check shape + assert result.shape == (len(FEATURES), 10, 10) diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 7aab71b..2e36837 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -1,5 +1,6 @@ import numpy as np -from eo_processor import detect_breakpoints, complex_classification, texture_entropy +from eo_processor import detect_breakpoints, complex_classification + def test_detect_breakpoints(): """ @@ -12,10 +13,15 @@ def test_detect_breakpoints(): np.random.seed(42) # Reduce noise to make the breakpoint more obvious and the test more stable noise = np.random.normal(0, 0.1, time) - y = np.concatenate([ - np.linspace(0, 10, breakpoint_time), - np.linspace(10, 0, time - breakpoint_time) - ]) + noise + y = ( + np.concatenate( + [ + np.linspace(0, 10, breakpoint_time), + np.linspace(10, 0, time - breakpoint_time), + ] + ) + + noise + ) # Create a 3D stack (time, y, x) stack = np.zeros((time, 1, 1)) @@ -37,6 +43,7 @@ def test_detect_breakpoints(): assert magnitude > 0 assert confidence == 1.0 + def test_complex_classification(): """ Test the complex_classification function. @@ -53,14 +60,3 @@ def test_complex_classification(): result = complex_classification(blue, green, red, nir, swir1, swir2, temp) assert result.shape == shape assert result.dtype == np.uint8 - -def test_texture_entropy(): - """ - Test the texture_entropy function. - """ - shape = (20, 20) - data = np.random.rand(*shape) - - result = texture_entropy(data, window_size=3) - assert result.shape == shape - assert result.dtype == np.float64 diff --git a/tests/test_zonal.py b/tests/test_zonal.py index b9bf9b7..98cb008 100644 --- a/tests/test_zonal.py +++ b/tests/test_zonal.py @@ -2,14 +2,15 @@ import pytest from eo_processor import zonal_stats + def test_zonal_stats_basic_1d(): values = np.array([10.0, 20.0, 30.0, 40.0, 50.0]) zones = np.array([1, 1, 2, 2, 3]) - + stats = zonal_stats(values, zones) - + assert len(stats) == 3 - + # Zone 1: [10, 20] z1 = stats[1] assert z1.count == 2 @@ -17,62 +18,60 @@ def test_zonal_stats_basic_1d(): assert z1.mean == 15.0 assert z1.min == 10.0 assert z1.max == 20.0 - assert np.isclose(z1.std, 7.0710678) # std sample of [10, 20] is ~7.07 - + assert np.isclose(z1.std, 7.0710678) # std sample of [10, 20] is ~7.07 + # Zone 2: [30, 40] z2 = stats[2] assert z2.count == 2 assert z2.mean == 35.0 - + # Zone 3: [50] z3 = stats[3] assert z3.count == 1 assert z3.mean == 50.0 assert z3.std == 0.0 + def test_zonal_stats_2d(): - values = np.array([ - [1.0, 2.0], - [3.0, 4.0] - ]) - zones = np.array([ - [1, 1], - [2, 2] - ]) - + values = np.array([[1.0, 2.0], [3.0, 4.0]]) + zones = np.array([[1, 1], [2, 2]]) + stats = zonal_stats(values, zones) - + # Zone 1: [1, 2] assert stats[1].sum == 3.0 assert stats[1].mean == 1.5 - + # Zone 2: [3, 4] assert stats[2].sum == 7.0 assert stats[2].mean == 3.5 + def test_zonal_stats_with_nans(): values = np.array([10.0, np.nan, 30.0]) zones = np.array([1, 1, 1]) - + stats = zonal_stats(values, zones) - + z1 = stats[1] - assert z1.count == 2 # NaN ignored + assert z1.count == 2 # NaN ignored assert z1.sum == 40.0 assert z1.mean == 20.0 + def test_zonal_stats_shape_mismatch(): values = np.array([1.0, 2.0]) zones = np.array([1, 2, 3]) - + with pytest.raises(ValueError, match="Shape mismatch"): zonal_stats(values, zones) + def test_zonal_stats_dtype_coercion(): # Int values should be coerced to float values = np.array([1, 2, 3], dtype=np.int32) zones = np.array([1, 1, 2], dtype=np.int32) - + stats = zonal_stats(values, zones) assert stats[1].mean == 1.5 assert stats[2].mean == 3.0 diff --git a/tests/utils.py b/tests/utils.py index b30fab3..ffd832c 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,7 @@ import json import numpy as np + def sklearn_to_json(model): """Converts a scikit-learn RandomForestClassifier to a JSON serializable format.""" trees = [] @@ -23,16 +24,22 @@ def build_tree(node_id): } } - trees.append({ - "root": build_tree(0), + trees.append( + { + "root": build_tree(0), + "max_depth": model.max_depth, + "min_samples_split": model.min_samples_split, + } + ) + + return json.dumps( + { + "trees": trees, + "n_estimators": model.n_estimators, "max_depth": model.max_depth, "min_samples_split": model.min_samples_split, - }) - - return json.dumps({ - "trees": trees, - "n_estimators": model.n_estimators, - "max_depth": model.max_depth, - "min_samples_split": model.min_samples_split, - "max_features": model.max_features if isinstance(model.max_features, int) else None, - }) + "max_features": model.max_features + if isinstance(model.max_features, int) + else None, + } + ) diff --git a/tox.ini b/tox.ini index 611faf4..8ca2083 100644 --- a/tox.ini +++ b/tox.ini @@ -14,6 +14,9 @@ deps = numpy>=1.20.0 pillow>=9.0.0 scikit-learn>=1.0 + scikit-image>=0.18.0 + xarray>=2023.0.0 + dask>=2023.0.0 setenv = RUST_BACKTRACE=1 PYTHONDONTWRITEBYTECODE=1 @@ -41,6 +44,9 @@ deps = numpy>=1.20.0 pillow>=9.0.0 scikit-learn>=1.0 + scikit-image>=0.18.0 + xarray>=2023.0.0 + dask>=2023.0.0 setenv = RUST_BACKTRACE=1 commands = @@ -102,6 +108,7 @@ deps = maturin>=1.9.6 numpy>=1.20.0 scipy>=1.10.0 + scikit-image>=0.18.0 setenv = RUST_BACKTRACE=1 commands = diff --git a/uv.lock b/uv.lock index ccb0735..763e636 100644 --- a/uv.lock +++ b/uv.lock @@ -310,14 +310,19 @@ wheels = [ [[package]] name = "eo-processor" -version = "0.16.0" +version = "0.18.0" source = { editable = "." } dependencies = [ + { name = "dask", version = "2024.8.0", source = { registry = "https://pypi.org/simple" }, extra = ["array"], marker = "python_full_version < '3.10'" }, + { name = "dask", version = "2025.11.0", source = { registry = "https://pypi.org/simple" }, extra = ["array"], marker = "python_full_version >= '3.10'" }, { name = "maturin" }, { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "structlog" }, + { name = "xarray", version = "2024.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "xarray", version = "2025.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "xarray", version = "2025.10.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] [package.optional-dependencies] @@ -326,19 +331,16 @@ cli = [ { name = "pillow", version = "12.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, ] dask = [ - { name = "dask", version = "2024.8.0", source = { registry = "https://pypi.org/simple" }, extra = ["array"], marker = "python_full_version < '3.10'" }, - { name = "dask", version = "2025.11.0", source = { registry = "https://pypi.org/simple" }, extra = ["array"], marker = "python_full_version >= '3.10'" }, { name = "pyarrow", version = "21.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "xarray", version = "2024.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "xarray", version = "2025.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, - { name = "xarray", version = "2025.10.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] dev = [ { name = "pytest" }, + { name = "scikit-image", version = "0.24.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scikit-image", version = "0.25.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "scikit-learn", version = "1.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -367,7 +369,7 @@ dev = [ [package.metadata] requires-dist = [ - { name = "dask", extras = ["array"], marker = "extra == 'dask'", specifier = ">=2023.0.0" }, + { name = "dask", extras = ["array"], specifier = ">=2023.0.0" }, { name = "furo", marker = "extra == 'docs'", specifier = ">=2024.1.0" }, { name = "linkify-it-py", marker = "extra == 'docs'", specifier = ">=2.0.0" }, { name = "maturin", specifier = ">=1.9.6" }, @@ -376,6 +378,7 @@ requires-dist = [ { name = "pillow", marker = "extra == 'cli'", specifier = ">=10.4.0" }, { name = "pyarrow", marker = "extra == 'dask'", specifier = ">=17.0.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" }, + { name = "scikit-image", marker = "extra == 'dev'", specifier = ">=0.18.0" }, { name = "scikit-learn", marker = "extra == 'dev'", specifier = ">=1.0" }, { name = "scipy", marker = "extra == 'dask'", specifier = ">=1.10.0" }, { name = "scipy", marker = "extra == 'dev'", specifier = ">=1.10.0" }, @@ -383,7 +386,7 @@ requires-dist = [ { name = "sphinx-autodoc-typehints", marker = "extra == 'docs'", specifier = ">=2.0.0" }, { name = "structlog", specifier = ">=23.1.0" }, { name = "tox", marker = "extra == 'dev'", specifier = ">=4.25.0" }, - { name = "xarray", marker = "extra == 'dask'", specifier = ">=2023.0.0" }, + { name = "xarray", specifier = ">=2023.0.0" }, ] provides-extras = ["cli", "dev", "dask", "docs"] @@ -466,6 +469,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "imageio" +version = "2.37.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow", version = "11.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "pillow", version = "12.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/6f/606be632e37bf8d05b253e8626c2291d74c691ddc7bcdf7d6aaf33b32f6a/imageio-2.37.2.tar.gz", hash = "sha256:0212ef2727ac9caa5ca4b2c75ae89454312f440a756fcfc8ef1993e718f50f8a", size = 389600, upload-time = "2025-11-04T14:29:39.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/fe/301e0936b79bcab4cacc7548bf2853fc28dced0a578bab1f7ef53c9aa75b/imageio-2.37.2-py3-none-any.whl", hash = "sha256:ad9adfb20335d718c03de457358ed69f141021a333c40a53e57273d8a5bd0b9b", size = 317646, upload-time = "2025-11-04T14:29:37.948Z" }, +] + [[package]] name = "imagesize" version = "1.4.1" @@ -534,6 +553,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396, upload-time = "2025-08-27T12:15:45.188Z" }, ] +[[package]] +name = "lazy-loader" +version = "0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6b/c875b30a1ba490860c93da4cabf479e03f584eba06fe5963f6f6644653d8/lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1", size = 15431, upload-time = "2024-04-05T13:03:12.261Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097, upload-time = "2024-04-05T13:03:10.514Z" }, +] + [[package]] name = "linkify-it-py" version = "2.0.3" @@ -770,6 +801,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579, upload-time = "2025-02-12T10:53:02.078Z" }, ] +[[package]] +name = "networkx" +version = "3.2.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/80/a84676339aaae2f1cfdf9f418701dd634aef9cc76f708ef55c36ff39c3ca/networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", size = 2073928, upload-time = "2023-10-28T08:41:39.364Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2", size = 1647772, upload-time = "2023-10-28T08:41:36.945Z" }, +] + +[[package]] +name = "networkx" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.10.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b9/54/dd730b32ea14ea797530a4479b2ed46a6fb250f682a9cfb997e968bf0261/networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f", size = 1723263, upload-time = "2024-10-21T12:39:36.247Z" }, +] + +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + [[package]] name = "numpy" version = "2.0.2" @@ -1633,6 +1701,95 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/bd/4168a751ddbbf43e86544b4de8b5c3b7be8d7167a2a5cb977d274e04f0a1/ruff-0.14.4-py3-none-win_arm64.whl", hash = "sha256:dd09c292479596b0e6fec8cd95c65c3a6dc68e9ad17b8f2382130f87ff6a75bb", size = 12663065, upload-time = "2025-11-06T22:07:42.603Z" }, ] +[[package]] +name = "scikit-image" +version = "0.24.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "imageio", marker = "python_full_version < '3.10'" }, + { name = "lazy-loader", marker = "python_full_version < '3.10'" }, + { name = "networkx", version = "3.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "packaging", marker = "python_full_version < '3.10'" }, + { name = "pillow", version = "11.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "scipy", version = "1.13.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "tifffile", version = "2024.8.30", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/c5/bcd66bf5aae5587d3b4b69c74bee30889c46c9778e858942ce93a030e1f3/scikit_image-0.24.0.tar.gz", hash = "sha256:5d16efe95da8edbeb363e0c4157b99becbd650a60b77f6e3af5768b66cf007ab", size = 22693928, upload-time = "2024-06-18T19:05:31.49Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/82/d4eaa6e441f28a783762093a3c74bcc4a67f1c65bf011414ad4ea85187d8/scikit_image-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb3bc0264b6ab30b43c4179ee6156bc18b4861e78bb329dd8d16537b7bbf827a", size = 14051470, upload-time = "2024-06-18T19:03:37.385Z" }, + { url = "https://files.pythonhosted.org/packages/65/15/1879307aaa2c771aa8ef8f00a171a85033bffc6b2553cfd2657426881452/scikit_image-0.24.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:9c7a52e20cdd760738da38564ba1fed7942b623c0317489af1a598a8dedf088b", size = 13385822, upload-time = "2024-06-18T19:03:43.996Z" }, + { url = "https://files.pythonhosted.org/packages/b6/b8/2d52864714b82122f4a36f47933f61f1cd2a6df34987873837f8064d4fdf/scikit_image-0.24.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93f46e6ce42e5409f4d09ce1b0c7f80dd7e4373bcec635b6348b63e3c886eac8", size = 14216787, upload-time = "2024-06-18T19:03:50.169Z" }, + { url = "https://files.pythonhosted.org/packages/40/2e/8b39cd2c347490dbe10adf21fd50bbddb1dada5bb0512c3a39371285eb62/scikit_image-0.24.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39ee0af13435c57351a3397eb379e72164ff85161923eec0c38849fecf1b4764", size = 14866533, upload-time = "2024-06-18T19:03:56.286Z" }, + { url = "https://files.pythonhosted.org/packages/99/89/3fcd68d034db5d29c974e964d03deec9d0fbf9410ff0a0b95efff70947f6/scikit_image-0.24.0-cp310-cp310-win_amd64.whl", hash = "sha256:7ac7913b028b8aa780ffae85922894a69e33d1c0bf270ea1774f382fe8bf95e7", size = 12864601, upload-time = "2024-06-18T19:04:00.868Z" }, + { url = "https://files.pythonhosted.org/packages/90/e3/564beb0c78bf83018a146dfcdc959c99c10a0d136480b932a350c852adbc/scikit_image-0.24.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:272909e02a59cea3ed4aa03739bb88df2625daa809f633f40b5053cf09241831", size = 14020429, upload-time = "2024-06-18T19:04:07.18Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f6/be8b16d8ab6ebf19057877c2aec905cbd438dd92ca64b8efe9e9af008fa3/scikit_image-0.24.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:190ebde80b4470fe8838764b9b15f232a964f1a20391663e31008d76f0c696f7", size = 13371950, upload-time = "2024-06-18T19:04:13.266Z" }, + { url = "https://files.pythonhosted.org/packages/b8/2e/3a949995f8fc2a65b15a4964373e26c5601cb2ea68f36b115571663e7a38/scikit_image-0.24.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59c98cc695005faf2b79904e4663796c977af22586ddf1b12d6af2fa22842dc2", size = 14197889, upload-time = "2024-06-18T19:04:17.181Z" }, + { url = "https://files.pythonhosted.org/packages/ad/96/138484302b8ec9a69cdf65e8d4ab47a640a3b1a8ea3c437e1da3e1a5a6b8/scikit_image-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa27b3a0dbad807b966b8db2d78da734cb812ca4787f7fbb143764800ce2fa9c", size = 14861425, upload-time = "2024-06-18T19:04:27.363Z" }, + { url = "https://files.pythonhosted.org/packages/50/b2/d5e97115733e2dc657e99868ae0237705b79d0c81f6ced21b8f0799a30d1/scikit_image-0.24.0-cp311-cp311-win_amd64.whl", hash = "sha256:dacf591ac0c272a111181afad4b788a27fe70d213cfddd631d151cbc34f8ca2c", size = 12843506, upload-time = "2024-06-18T19:04:35.782Z" }, + { url = "https://files.pythonhosted.org/packages/16/19/45ad3b8b8ab8d275a48a9d1016c4beb1c2801a7a13e384268861d01145c1/scikit_image-0.24.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6fccceb54c9574590abcddc8caf6cefa57c13b5b8b4260ab3ff88ad8f3c252b3", size = 14101823, upload-time = "2024-06-18T19:04:39.576Z" }, + { url = "https://files.pythonhosted.org/packages/6e/75/db10ee1bc7936b411d285809b5fe62224bbb1b324a03dd703582132ce5ee/scikit_image-0.24.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ccc01e4760d655aab7601c1ba7aa4ddd8b46f494ac46ec9c268df6f33ccddf4c", size = 13420758, upload-time = "2024-06-18T19:04:45.645Z" }, + { url = "https://files.pythonhosted.org/packages/87/fd/07a7396962abfe22a285a922a63d18e4d5ec48eb5dbb1c06e96fb8fb6528/scikit_image-0.24.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18836a18d3a7b6aca5376a2d805f0045826bc6c9fc85331659c33b4813e0b563", size = 14256813, upload-time = "2024-06-18T19:04:51.68Z" }, + { url = "https://files.pythonhosted.org/packages/2c/24/4bcd94046b409ac4d63e2f92e46481f95f5006a43e68f6ab2b24f5d70ab4/scikit_image-0.24.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8579bda9c3f78cb3b3ed8b9425213c53a25fa7e994b7ac01f2440b395babf660", size = 15013039, upload-time = "2024-06-18T19:04:56.433Z" }, + { url = "https://files.pythonhosted.org/packages/d9/17/b561823143eb931de0f82fed03ae128ef954a9641309602ea0901c357f95/scikit_image-0.24.0-cp312-cp312-win_amd64.whl", hash = "sha256:82ab903afa60b2da1da2e6f0c8c65e7c8868c60a869464c41971da929b3e82bc", size = 12949363, upload-time = "2024-06-18T19:05:02.773Z" }, + { url = "https://files.pythonhosted.org/packages/93/8e/b6e50d8a6572daf12e27acbf9a1722fdb5e6bfc64f04a5fefa2a71fea0c3/scikit_image-0.24.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef04360eda372ee5cd60aebe9be91258639c86ae2ea24093fb9182118008d009", size = 14083010, upload-time = "2024-06-18T19:05:07.582Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6c/f528c6b80b4e9d38444d89f0d1160797d20c640b7a8cabd8b614ac600b79/scikit_image-0.24.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e9aadb442360a7e76f0c5c9d105f79a83d6df0e01e431bd1d5757e2c5871a1f3", size = 13414235, upload-time = "2024-06-18T19:05:11.58Z" }, + { url = "https://files.pythonhosted.org/packages/52/03/59c52aa59b952aafcf19163e5d7e924e6156c3d9e9c86ea3372ad31d90f8/scikit_image-0.24.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e37de6f4c1abcf794e13c258dc9b7d385d5be868441de11c180363824192ff7", size = 14238540, upload-time = "2024-06-18T19:05:17.481Z" }, + { url = "https://files.pythonhosted.org/packages/f0/cc/1a58efefb9b17c60d15626b33416728003028d5d51f0521482151a222560/scikit_image-0.24.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4688c18bd7ec33c08d7bf0fd19549be246d90d5f2c1d795a89986629af0a1e83", size = 14883801, upload-time = "2024-06-18T19:05:23.231Z" }, + { url = "https://files.pythonhosted.org/packages/9d/63/233300aa76c65a442a301f9d2416a9b06c91631287bd6dd3d6b620040096/scikit_image-0.24.0-cp39-cp39-win_amd64.whl", hash = "sha256:56dab751d20b25d5d3985e95c9b4e975f55573554bd76b0aedf5875217c93e69", size = 12891952, upload-time = "2024-06-18T19:05:27.173Z" }, +] + +[[package]] +name = "scikit-image" +version = "0.25.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "imageio", marker = "python_full_version >= '3.10'" }, + { name = "lazy-loader", marker = "python_full_version >= '3.10'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging", marker = "python_full_version >= '3.10'" }, + { name = "pillow", version = "12.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "scipy", version = "1.16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "tifffile", version = "2025.5.10", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, + { name = "tifffile", version = "2025.12.12", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/a8/3c0f256012b93dd2cb6fda9245e9f4bff7dc0486880b248005f15ea2255e/scikit_image-0.25.2.tar.gz", hash = "sha256:e5a37e6cd4d0c018a7a55b9d601357e3382826d3888c10d0213fc63bff977dde", size = 22693594, upload-time = "2025-02-18T18:05:24.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/cb/016c63f16065c2d333c8ed0337e18a5cdf9bc32d402e4f26b0db362eb0e2/scikit_image-0.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d3278f586793176599df6a4cf48cb6beadae35c31e58dc01a98023af3dc31c78", size = 13988922, upload-time = "2025-02-18T18:04:11.069Z" }, + { url = "https://files.pythonhosted.org/packages/30/ca/ff4731289cbed63c94a0c9a5b672976603118de78ed21910d9060c82e859/scikit_image-0.25.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5c311069899ce757d7dbf1d03e32acb38bb06153236ae77fcd820fd62044c063", size = 13192698, upload-time = "2025-02-18T18:04:15.362Z" }, + { url = "https://files.pythonhosted.org/packages/39/6d/a2aadb1be6d8e149199bb9b540ccde9e9622826e1ab42fe01de4c35ab918/scikit_image-0.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be455aa7039a6afa54e84f9e38293733a2622b8c2fb3362b822d459cc5605e99", size = 14153634, upload-time = "2025-02-18T18:04:18.496Z" }, + { url = "https://files.pythonhosted.org/packages/96/08/916e7d9ee4721031b2f625db54b11d8379bd51707afaa3e5a29aecf10bc4/scikit_image-0.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c464b90e978d137330be433df4e76d92ad3c5f46a22f159520ce0fdbea8a09", size = 14767545, upload-time = "2025-02-18T18:04:22.556Z" }, + { url = "https://files.pythonhosted.org/packages/5f/ee/c53a009e3997dda9d285402f19226fbd17b5b3cb215da391c4ed084a1424/scikit_image-0.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:60516257c5a2d2f74387c502aa2f15a0ef3498fbeaa749f730ab18f0a40fd054", size = 12812908, upload-time = "2025-02-18T18:04:26.364Z" }, + { url = "https://files.pythonhosted.org/packages/c4/97/3051c68b782ee3f1fb7f8f5bb7d535cf8cb92e8aae18fa9c1cdf7e15150d/scikit_image-0.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f4bac9196fb80d37567316581c6060763b0f4893d3aca34a9ede3825bc035b17", size = 14003057, upload-time = "2025-02-18T18:04:30.395Z" }, + { url = "https://files.pythonhosted.org/packages/19/23/257fc696c562639826065514d551b7b9b969520bd902c3a8e2fcff5b9e17/scikit_image-0.25.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d989d64ff92e0c6c0f2018c7495a5b20e2451839299a018e0e5108b2680f71e0", size = 13180335, upload-time = "2025-02-18T18:04:33.449Z" }, + { url = "https://files.pythonhosted.org/packages/ef/14/0c4a02cb27ca8b1e836886b9ec7c9149de03053650e9e2ed0625f248dd92/scikit_image-0.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2cfc96b27afe9a05bc92f8c6235321d3a66499995675b27415e0d0c76625173", size = 14144783, upload-time = "2025-02-18T18:04:36.594Z" }, + { url = "https://files.pythonhosted.org/packages/dd/9b/9fb556463a34d9842491d72a421942c8baff4281025859c84fcdb5e7e602/scikit_image-0.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24cc986e1f4187a12aa319f777b36008764e856e5013666a4a83f8df083c2641", size = 14785376, upload-time = "2025-02-18T18:04:39.856Z" }, + { url = "https://files.pythonhosted.org/packages/de/ec/b57c500ee85885df5f2188f8bb70398481393a69de44a00d6f1d055f103c/scikit_image-0.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:b4f6b61fc2db6340696afe3db6b26e0356911529f5f6aee8c322aa5157490c9b", size = 12791698, upload-time = "2025-02-18T18:04:42.868Z" }, + { url = "https://files.pythonhosted.org/packages/35/8c/5df82881284459f6eec796a5ac2a0a304bb3384eec2e73f35cfdfcfbf20c/scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8db8dd03663112783221bf01ccfc9512d1cc50ac9b5b0fe8f4023967564719fb", size = 13986000, upload-time = "2025-02-18T18:04:47.156Z" }, + { url = "https://files.pythonhosted.org/packages/ce/e6/93bebe1abcdce9513ffec01d8af02528b4c41fb3c1e46336d70b9ed4ef0d/scikit_image-0.25.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:483bd8cc10c3d8a7a37fae36dfa5b21e239bd4ee121d91cad1f81bba10cfb0ed", size = 13235893, upload-time = "2025-02-18T18:04:51.049Z" }, + { url = "https://files.pythonhosted.org/packages/53/4b/eda616e33f67129e5979a9eb33c710013caa3aa8a921991e6cc0b22cea33/scikit_image-0.25.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d1e80107bcf2bf1291acfc0bf0425dceb8890abe9f38d8e94e23497cbf7ee0d", size = 14178389, upload-time = "2025-02-18T18:04:54.245Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b5/b75527c0f9532dd8a93e8e7cd8e62e547b9f207d4c11e24f0006e8646b36/scikit_image-0.25.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a17e17eb8562660cc0d31bb55643a4da996a81944b82c54805c91b3fe66f4824", size = 15003435, upload-time = "2025-02-18T18:04:57.586Z" }, + { url = "https://files.pythonhosted.org/packages/34/e3/49beb08ebccda3c21e871b607c1cb2f258c3fa0d2f609fed0a5ba741b92d/scikit_image-0.25.2-cp312-cp312-win_amd64.whl", hash = "sha256:bdd2b8c1de0849964dbc54037f36b4e9420157e67e45a8709a80d727f52c7da2", size = 12899474, upload-time = "2025-02-18T18:05:01.166Z" }, + { url = "https://files.pythonhosted.org/packages/e6/7c/9814dd1c637f7a0e44342985a76f95a55dd04be60154247679fd96c7169f/scikit_image-0.25.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7efa888130f6c548ec0439b1a7ed7295bc10105458a421e9bf739b457730b6da", size = 13921841, upload-time = "2025-02-18T18:05:03.963Z" }, + { url = "https://files.pythonhosted.org/packages/84/06/66a2e7661d6f526740c309e9717d3bd07b473661d5cdddef4dd978edab25/scikit_image-0.25.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dd8011efe69c3641920614d550f5505f83658fe33581e49bed86feab43a180fc", size = 13196862, upload-time = "2025-02-18T18:05:06.986Z" }, + { url = "https://files.pythonhosted.org/packages/4e/63/3368902ed79305f74c2ca8c297dfeb4307269cbe6402412668e322837143/scikit_image-0.25.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28182a9d3e2ce3c2e251383bdda68f8d88d9fff1a3ebe1eb61206595c9773341", size = 14117785, upload-time = "2025-02-18T18:05:10.69Z" }, + { url = "https://files.pythonhosted.org/packages/cd/9b/c3da56a145f52cd61a68b8465d6a29d9503bc45bc993bb45e84371c97d94/scikit_image-0.25.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8abd3c805ce6944b941cfed0406d88faeb19bab3ed3d4b50187af55cf24d147", size = 14977119, upload-time = "2025-02-18T18:05:13.871Z" }, + { url = "https://files.pythonhosted.org/packages/8a/97/5fcf332e1753831abb99a2525180d3fb0d70918d461ebda9873f66dcc12f/scikit_image-0.25.2-cp313-cp313-win_amd64.whl", hash = "sha256:64785a8acefee460ec49a354706db0b09d1f325674107d7fa3eadb663fb56d6f", size = 12885116, upload-time = "2025-02-18T18:05:17.844Z" }, + { url = "https://files.pythonhosted.org/packages/10/cc/75e9f17e3670b5ed93c32456fda823333c6279b144cd93e2c03aa06aa472/scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd", size = 13862801, upload-time = "2025-02-18T18:05:20.783Z" }, +] + [[package]] name = "scikit-learn" version = "1.6.1" @@ -2107,6 +2264,52 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] +[[package]] +name = "tifffile" +version = "2024.8.30" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/30/7017e5560154c100cad3a801c02adb48879cd8e8cb862b82696d84187184/tifffile-2024.8.30.tar.gz", hash = "sha256:2c9508fe768962e30f87def61819183fb07692c258cb175b3c114828368485a4", size = 365714, upload-time = "2024-08-31T17:32:43.945Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/4f/73714b1c1d339b1545cac28764e39f88c69468b5e10e51f327f9aa9d55b9/tifffile-2024.8.30-py3-none-any.whl", hash = "sha256:8bc59a8f02a2665cd50a910ec64961c5373bee0b8850ec89d3b7b485bf7be7ad", size = 227262, upload-time = "2024-08-31T17:32:41.87Z" }, +] + +[[package]] +name = "tifffile" +version = "2025.5.10" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version == '3.10.*'", +] +dependencies = [ + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/d0/18fed0fc0916578a4463f775b0fbd9c5fed2392152d039df2fb533bfdd5d/tifffile-2025.5.10.tar.gz", hash = "sha256:018335d34283aa3fd8c263bae5c3c2b661ebc45548fde31504016fcae7bf1103", size = 365290, upload-time = "2025-05-10T19:22:34.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/06/bd0a6097da704a7a7c34a94cfd771c3ea3c2f405dd214e790d22c93f6be1/tifffile-2025.5.10-py3-none-any.whl", hash = "sha256:e37147123c0542d67bc37ba5cdd67e12ea6fbe6e86c52bee037a9eb6a064e5ad", size = 226533, upload-time = "2025-05-10T19:22:27.279Z" }, +] + +[[package]] +name = "tifffile" +version = "2025.12.12" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", +] +dependencies = [ + { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/b9/4253513a66f0a836ec3a5104266cf73f7812bfbbcda9d87d8c0e93b28293/tifffile-2025.12.12.tar.gz", hash = "sha256:97e11fd6b1d8dc971896a098c841d9cd4e6eb958ac040dd6fb8b332c3f7288b6", size = 373597, upload-time = "2025-12-13T03:42:53.765Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/5c/e444e1b024a519e488326525f0c154396c6b16baff17e00623f2c21dfc42/tifffile-2025.12.12-py3-none-any.whl", hash = "sha256:e3e3f1290ec6741ca248a5b5a997125209b5c2962f6bd9aef01ea9352c25d0ee", size = 232132, upload-time = "2025-12-13T03:42:52.072Z" }, +] + [[package]] name = "tomli" version = "2.3.0"