diff --git a/DESCRIPTION b/DESCRIPTION index 8e9f343..346ad04 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,27 +35,37 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.3.3 Depends: - R (>= 4.0.0) + R (>= 4.1.0) Imports: - sf (>= 1.0.0), + igraph, + magrittr, + Matrix, prioritizr (>= 7.0.0), rlang, + sf (>= 1.0.0), stats, - magrittr, - igraph, - Matrix + tidyselect Suggests: dplyr, ggplot2, - prioritizrdata, - terra, - testthat (>= 3.0.0), + kableExtra, knitr, + lwgeom, + oceandatr, + parallelly, patchwork, + prioritizrdata, + purrr, rmarkdown, + shadowtext, stars, stringr, - parallelly + terra, + testthat (>= 3.0.0), + tibble, + tmap +Remotes: + github::emlab-ucsb/oceandatr VignetteBuilder: knitr URL: https://github.com/SpatialPlanning/minpatch BugReports: https://github.com/SpatialPlanning/minpatch/issues diff --git a/NAMESPACE b/NAMESPACE index cb175d5..2f7efa8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,8 +3,11 @@ export("%>%") export(compare_solutions) export(generate_minpatch_report) +export(make_minpatch_summary_table) export(plot_minpatch) +export(plot_patch_validity) export(plot_prioritizr) +export(plot_solution_grid) export(print_minpatch_summary) export(run_minpatch) importFrom(magrittr,"%>%") diff --git a/R/minpatch.R b/R/minpatch.R index 33ee34b..8755cd5 100644 --- a/R/minpatch.R +++ b/R/minpatch.R @@ -23,6 +23,8 @@ #' @param whittle_patches Logical, whether to remove unnecessary units (Stage 3, default = TRUE) #' @param solution_column Name of solution column (default = "solution_1") #' @param verbose Logical, whether to print progress (default = TRUE) +#' @param debug_boundary Logical, whether to print boundary cost debug info (default = FALSE) +#' @param debug_boundary_every Integer, print debug info every N iterations (default = 50) #' #' @details #' The MinPatch algorithm consists of three stages: @@ -109,7 +111,9 @@ run_minpatch <- function(prioritizr_problem, add_patches = TRUE, whittle_patches = TRUE, solution_column = "solution_1", - verbose = TRUE) { + verbose = TRUE, + debug_boundary = FALSE, + debug_boundary_every = 50) { # Stage 0: Checks and data preparation ----- @@ -212,6 +216,10 @@ run_minpatch <- function(prioritizr_problem, prioritizr_problem, prioritizr_solution, verbose ) + # DEBUG: pass flags into downstream functions (e.g., simulated_whittling) + minpatch_data$debug_boundary <- debug_boundary + minpatch_data$debug_boundary_every <- debug_boundary_every + # Create initial minpatch column in prioritizr_solution minpatch_data$prioritizr_solution$minpatch <- create_solution_vector(minpatch_data$unit_dict) diff --git a/R/output.R b/R/output.R index b99ea63..3d962cb 100644 --- a/R/output.R +++ b/R/output.R @@ -322,22 +322,118 @@ plot_minpatch <- function(minpatch_result, title = "MinPatch Results") { #' plot_prioritizr(s, col = "solution_1", title = "My Conservation Plan") plot_prioritizr <- function(s, col = "solution_1", title = "prioritizr Solution") { -ggplot2::ggplot(data = s) + - ggplot2::geom_sf(ggplot2::aes(fill = as.logical(.data[[col]])), color = "white", size = 0.2) + - ggplot2::scale_fill_manual( - values = c("TRUE" = "blue", - "FALSE" = "grey80"), - name = "PU's Selected", - drop = FALSE) + - ggplot2::theme_void() + - ggplot2::labs(title = title) + - ggplot2::theme( - plot.title = ggplot2::element_text(hjust = 0.5), - legend.position = "bottom" - ) + ggplot2::ggplot(data = s) + + ggplot2::geom_sf(ggplot2::aes(fill = as.logical(.data[[col]])), color = "white", size = 0.2) + + ggplot2::scale_fill_manual( + values = c("TRUE" = "blue", + "FALSE" = "grey80"), + name = "PU's Selected", + drop = FALSE) + + ggplot2::theme_void() + + ggplot2::labs(title = title) + + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5), + legend.position = "bottom" + ) } +#' Plot solution grid across multiple results +#' +#' Creates a grid of prioritizr-style selection maps with a shared legend. +#' Useful for comparing solutions across different boundary penalties or +#' MinPatch multipliers. +#' +#' @param results List of MinPatch result objects to plot +#' @param get_solution_fun Function to extract solution sf object from each result +#' @param ncol Number of columns in the grid (default = 3) +#' @param legend_position Position of shared legend (default = "bottom") +#' @param title_size Font size for plot titles (default = 13) +#' @param titles Character vector of titles (one per result). Takes priority over other title options. +#' @param title_fun Function(i, results) returning title string. Used if titles is NULL. +#' @param subtitle_values Numeric vector of values (e.g., penalties or multipliers) for titles +#' @param title_prefix Prefix string for subtitle_values titles (e.g., "Boundary penalty = ") +#' @param value_label_fun Function to format subtitle_values (default formats scientifically) +#' @param include_baseline Logical, whether to include a baseline plot (default = FALSE) +#' @param baseline_solution sf object for baseline solution (required if include_baseline = TRUE) +#' @param baseline_title Title for baseline plot +#' +#' @return A patchwork object combining all plots +#' @export +plot_solution_grid <- function( + results, + get_solution_fun, + ncol = 3, + legend_position = "bottom", + title_size = 13, + + # --- Title options (pick ONE) --- + titles = NULL, # e.g., c("MinPatch: 5x median PU area", ...) + title_fun = NULL, # function(i, results) -> "..." + subtitle_values = NULL, # e.g., penalties or multipliers + title_prefix = "", # e.g., "Boundary penalty = " or "MinPatch: " + value_label_fun = function(x) format(x, scientific = TRUE, trim = TRUE), + + # --- Optional baseline --- + include_baseline = FALSE, + baseline_solution = NULL, + baseline_title = NULL +) { + + stopifnot(length(results) >= 1) + + # Extract solution objects for plotting + sol_list <- purrr::map(results, get_solution_fun) + + # ---- Build titles (priority order) ---- + if (!is.null(titles)) { + stopifnot(length(titles) == length(sol_list)) + plot_titles <- titles + + } else if (!is.null(title_fun)) { + plot_titles <- purrr::map_chr(seq_along(sol_list), ~ title_fun(.x, results)) + + } else if (!is.null(subtitle_values)) { + stopifnot(length(subtitle_values) == length(sol_list)) + plot_titles <- paste0(title_prefix, vapply(subtitle_values, value_label_fun, character(1))) + + } else { + plot_titles <- paste("Run", seq_along(sol_list)) + } + + # ---- Make plots ---- + p_list <- purrr::map2( + sol_list, + plot_titles, + ~ plot_prioritizr(.x) + + ggplot2::ggtitle(.y) + + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5, size = title_size), + legend.position = "none" + ) + ) + + # ---- Optional baseline (prepended) ---- + if (isTRUE(include_baseline)) { + if (is.null(baseline_solution)) { + stop("include_baseline = TRUE, but baseline_solution is NULL.") + } + + p_base <- plot_prioritizr(baseline_solution) + + ggplot2::ggtitle(baseline_title) + + ggplot2::theme( + plot.title = ggplot2::element_text(hjust = 0.5, size = title_size), + legend.position = "none" + ) + + p_list <- c(list(p_base), p_list) + } + + # ---- Wrap and collect legend once ---- + patchwork::wrap_plots(p_list, ncol = ncol, guides = "collect") & + ggplot2::theme(legend.position = legend_position) +} + #' Generate comprehensive MinPatch report #' @@ -499,3 +595,441 @@ print_minpatch_summary <- function(minpatch_result) { cat("\n=== End Summary ===\n") } + +#' Make MinPatch summary table +#' +#' Builds a single table summarising a baseline solution and multiple MinPatch solutions. +#' Baseline patch metrics come from `compare_solutions(... )$overall[,"Original"]`, while +#' MinPatch metrics come from `compare_solutions(... )$overall[,"MinPatch"]`. +#' +#' @param region_sf sf of all planning units (used to compute median PU area for min patch size). +#' @param baseline_solution_sf sf containing the baseline solution selection column and PU costs. +#' @param minpatch_results List of MinPatch results; each must contain `$solution` (sf). +#' @param multipliers Numeric vector; same length/order as `minpatch_results`. +#' @param compare_solutions_fun Function that returns a list with `$overall`. +#' If NULL, the function will try to find `compare_solutions()` in your session. +#' @param baseline_compare_obj Object to pass into compare_solutions() for baseline "Original" metrics. +#' If NULL, defaults to `minpatch_results[[1]]`. +#' @param baseline_elapsed Baseline runtime (seconds). Optional. +#' @param minpatch_elapsed MinPatch runtimes (seconds). Optional; must match `multipliers`. +#' @param projected_epsg EPSG for perimeter calcs if inputs are lon/lat (default 32740). +#' @param baseline_solution_col Baseline selection column name (1/0). If NULL, auto-detects. +#' @param minpatch_solution_col MinPatch selection column in `$solution` (default "minpatch"). +#' @param cost_col PU cost column in `baseline_solution_sf` (default "cost"). +#' @param baseline_boundary_cost Baseline boundary cost (default 0). +#' @param make_kable If TRUE, also returns an HTML kable. +#' +#' @return list(data = tibble, kable = html or NULL) +#' @export +make_minpatch_summary_table <- function( + region_sf, + baseline_solution_sf, + minpatch_results, + multipliers, + compare_solutions_fun = NULL, + baseline_compare_obj = NULL, + baseline_elapsed = NA_real_, + minpatch_elapsed = NULL, + projected_epsg = 32740, + baseline_solution_col = NULL, + minpatch_solution_col = "minpatch", + cost_col = "cost", + baseline_boundary_cost = 0, + make_kable = TRUE +) { + if (!requireNamespace("sf", quietly = TRUE)) stop("Package 'sf' is required.") + if (!requireNamespace("dplyr", quietly = TRUE)) stop("Package 'dplyr' is required.") + if (!requireNamespace("tibble", quietly = TRUE)) stop("Package 'tibble' is required.") + if (!requireNamespace("purrr", quietly = TRUE)) stop("Package 'purrr' is required.") + if (isTRUE(make_kable)) { + if (!requireNamespace("knitr", quietly = TRUE)) stop("Package 'knitr' is required for make_kable=TRUE.") + if (!requireNamespace("kableExtra", quietly = TRUE)) stop("Package 'kableExtra' is required for make_kable=TRUE.") + } + + # --- Resolve compare_solutions() robustly --- + if (is.null(compare_solutions_fun)) { + if (exists("compare_solutions", mode = "function", inherits = TRUE)) { + compare_solutions_fun <- get("compare_solutions", mode = "function") + } else { + ga <- utils::getAnywhere("compare_solutions") + if (length(ga$objs) > 0) { + compare_solutions_fun <- ga$objs[[1]] + } else { + stop( + "compare_solutions() was not found.\n", + "Fix: load the package/script that defines compare_solutions(), or pass it in as compare_solutions_fun=." + ) + } + } + } + + # --- helper to extract a metric --- + get_metric <- function(overall, column, metric) { + out <- overall[[column]][overall$Metric == metric] + if (length(out) == 0) NA_real_ else out + } + + # --- total OUTER perimeter (km) of selected PUs --- + total_outer_perimeter_km <- function(solution_sf, solution_col) { + sel <- solution_sf %>% dplyr::filter(.data[[solution_col]] == 1) + if (nrow(sel) == 0) return(NA_real_) + u <- sf::st_union(sf::st_geometry(sel)) + per_m <- as.numeric(sf::st_length(sf::st_cast(u, "MULTILINESTRING"))) + per_m / 1000 + } + + # --- sum of PU costs over selected PUs --- + pu_cost <- function(solution_sf, solution_col, cost_col = "cost") { + if (!cost_col %in% names(solution_sf)) { + stop("Cost column '", cost_col, "' not found in baseline_solution_sf.") + } + sel <- solution_sf %>% dplyr::filter(.data[[solution_col]] == 1) + sum(sel[[cost_col]], na.rm = TRUE) + } + + # --- checks --- + if (length(minpatch_results) != length(multipliers)) { + stop("minpatch_results and multipliers must have the same length.") + } + if (!is.null(minpatch_elapsed) && length(minpatch_elapsed) != length(multipliers)) { + stop("minpatch_elapsed must be NULL or same length as multipliers.") + } + + # --- auto-detect baseline selection column if not provided --- + if (is.null(baseline_solution_col)) { + baseline_solution_col <- if ("solution_1" %in% names(baseline_solution_sf)) "solution_1" else + if ("prioritizr" %in% names(baseline_solution_sf)) "prioritizr" else + if ("solution" %in% names(baseline_solution_sf)) "solution" else + stop("Could not auto-detect baseline_solution_col; please pass it explicitly.") + } + + # --- project to metres for perimeter if inputs are lon/lat --- + to_projected_if_longlat <- function(x) { + if (inherits(x, "sf") && sf::st_is_longlat(x)) sf::st_transform(x, projected_epsg) else x + } + region_sf_proj <- to_projected_if_longlat(region_sf) + baseline_sf_proj <- to_projected_if_longlat(baseline_solution_sf) + minpatch_results_proj <- purrr::map(minpatch_results, function(res) { + res$solution <- to_projected_if_longlat(res$solution) + res + }) + + # --- min patch size lookup (km^2) from median PU area --- + median_pu_area_m2 <- stats::median(sf::st_area(region_sf_proj)) + min_patch_lookup <- tibble::tibble( + multiplier = multipliers, + min_patch_km2 = as.numeric(multipliers * median_pu_area_m2) / 1e6 + ) + + # --- runtime lookup (seconds) --- + runtime_lookup <- tibble::tibble( + scenario = c("Baseline", paste0("MinPatch \u00d7", multipliers)), + runtime_sec = c(as.numeric(baseline_elapsed), + if (is.null(minpatch_elapsed)) rep(NA_real_, length(multipliers)) else as.numeric(minpatch_elapsed)) + ) + + # --- baseline overall metrics (Original) --- + if (is.null(baseline_compare_obj)) baseline_compare_obj <- minpatch_results[[1]] + overall0 <- compare_solutions_fun(baseline_compare_obj)$overall + + baseline_pu_cost_val <- pu_cost(baseline_sf_proj, baseline_solution_col, cost_col = cost_col) + baseline_overall_cost_val <- baseline_pu_cost_val + baseline_boundary_cost + + baseline_row <- tibble::tibble( + scenario = "Baseline", + multiplier = NA_real_, + min_patch_size_km2 = NA_real_, + selected_pu = get_metric(overall0, "Original", "Selected Planning Units"), + total_area_km2 = get_metric(overall0, "Original", "Total Area") / 1e6, + total_perimeter_km = total_outer_perimeter_km(baseline_sf_proj, baseline_solution_col), + n_patches = get_metric(overall0, "Original", "Number of Patches"), + valid_patches_ge_min = get_metric(overall0, "Original", "Valid Patches (>= min size)"), + median_patch_km2 = get_metric(overall0, "Original", "Median Patch Size") / 1e6, + total_pu_cost = baseline_pu_cost_val, + boundary_cost = baseline_boundary_cost, + overall_cost = baseline_overall_cost_val + ) + + # --- MinPatch rows --- + minpatch_rows <- purrr::map2_dfr(minpatch_results_proj, multipliers, function(res, mult) { + overall <- compare_solutions_fun(res)$overall + tibble::tibble( + scenario = paste0("MinPatch \u00d7", mult), + multiplier = mult, + min_patch_size_km2 = min_patch_lookup$min_patch_km2[min_patch_lookup$multiplier == mult], + selected_pu = get_metric(overall, "MinPatch", "Selected Planning Units"), + total_area_km2 = get_metric(overall, "MinPatch", "Total Area") / 1e6, + total_perimeter_km = total_outer_perimeter_km(res$solution, minpatch_solution_col), + n_patches = get_metric(overall, "MinPatch", "Number of Patches"), + valid_patches_ge_min = get_metric(overall, "MinPatch", "Valid Patches (>= min size)"), + median_patch_km2 = get_metric(overall, "MinPatch", "Median Patch Size") / 1e6, + total_pu_cost = get_metric(overall, "MinPatch", "Planning Unit Cost"), + boundary_cost = get_metric(overall, "MinPatch", "Boundary Cost"), + overall_cost = get_metric(overall, "MinPatch", "Total Cost") + ) + }) + + # --- combine + format --- + solution_summary <- dplyr::bind_rows(baseline_row, minpatch_rows) %>% + dplyr::left_join(runtime_lookup, by = "scenario") %>% + dplyr::mutate( + multiplier = dplyr::if_else(is.na(.data$multiplier), "-", as.character(.data$multiplier)), + dplyr::across(tidyselect::all_of(c("selected_pu", "n_patches", "valid_patches_ge_min")), + ~ as.integer(round(.x))), + dplyr::across(tidyselect::all_of(c("min_patch_size_km2", "total_area_km2", "median_patch_km2", + "total_perimeter_km","total_pu_cost", "boundary_cost", + "overall_cost")), + ~ round(.x, 2)), + runtime_sec = round(.data$runtime_sec, 1) + ) %>% + dplyr::select( + .data$scenario, + `Multiplier` = .data$multiplier, + `Minimum patch size (km^2)` = .data$min_patch_size_km2, + `Selected PUs` = .data$selected_pu, + `Total area (km^2)` = .data$total_area_km2, + `Total perimeter (km)` = .data$total_perimeter_km, + `# patches` = .data$n_patches, + `Valid patches (>= min size)` = .data$valid_patches_ge_min, + `Median patch size (km^2)` = .data$median_patch_km2, + `Total PU cost` = .data$total_pu_cost, + `Boundary cost` = .data$boundary_cost, + `Overall cost` = .data$overall_cost, + `Runtime (s)` = .data$runtime_sec + ) + + out <- list(data = solution_summary, kable = NULL) + + if (isTRUE(make_kable)) { + out$kable <- knitr::kable(solution_summary, format = "html", align = "l") |> + kableExtra::kable_styling( + bootstrap_options = c("striped", "hover", "condensed", "responsive"), + full_width = FALSE, + position = "left" + ) |> + kableExtra::row_spec(0, bold = TRUE) + } + + out +} + + +#' Plot rook-adjacency patches for a MinPatch solution and flag valid/invalid patches +#' +#' A patch is a rook-connected cluster of selected planning units (edge-sharing). +#' A patch is "valid" if its area >= (multiplier x median PU area), with a small +#' numeric tolerance to avoid floating-point edge cases. +#' +#' @param multiplier Numeric. The MinPatch multiplier to plot (must exist in `multipliers`). +#' @param multipliers Numeric vector of multipliers (same order as `minpatch_results`). +#' @param minpatch_results List. Each element contains a `$solution` sf object with column `minpatch`. +#' @param pu_sf sf. Planning-unit sf used to compute median PU area (defaults to Seychelles_sf if present). +#' @param return Character. "plot" (ggplot), "counts" (tibble), or "patches" (sf patch polygons). +#' @param do_snap Logical. If TRUE, snap geometries before adjacency for robustness. +#' @param snap_size Numeric. Grid size passed to `lwgeom::st_snap_to_grid()` (CRS units). +#' @param eps_rel Numeric. Relative tolerance applied to the area threshold (default 1e-9). +#' @param debug Logical. If TRUE, prints patch table (including diff vs threshold). +#' +#' @return ggplot object, or tibble, or sf patch polygons (see `return`). +#' @export +plot_patch_validity <- function( + multiplier, + multipliers, + minpatch_results, + pu_sf = NULL, + return = c("plot", "counts", "patches"), + do_snap = TRUE, + snap_size = 1, + eps_rel = 1e-9, + debug = FALSE +) { + + return <- match.arg(return) + + if (!requireNamespace("dplyr", quietly = TRUE)) stop("Package 'dplyr' is required.") + if (!requireNamespace("sf", quietly = TRUE)) stop("Package 'sf' is required.") + if (!requireNamespace("ggplot2", quietly = TRUE)) stop("Package 'ggplot2' is required.") + if (!requireNamespace("igraph", quietly = TRUE)) stop("Package 'igraph' is required.") + if (!requireNamespace("shadowtext", quietly = TRUE)) stop("Package 'shadowtext' is required.") + if (!requireNamespace("lwgeom", quietly = TRUE)) stop("Package 'lwgeom' is required.") + + # fallback PU sf for threshold + if (is.null(pu_sf)) { + if (exists("Seychelles_sf", inherits = TRUE)) { + pu_sf <- get("Seychelles_sf", inherits = TRUE) + } else { + stop("Provide `pu_sf` (planning-unit sf) or ensure `Seychelles_sf` exists.") + } + } + + # locate run + i_run <- which(multipliers == multiplier) + stopifnot(length(i_run) == 1) + + sol <- minpatch_results[[i_run]]$solution + stopifnot("minpatch" %in% names(sol)) + + # consistent PU id for joins/labels + sol <- sol %>% + dplyr::mutate( + pu_id = dplyr::row_number(), + selected = (.data$minpatch == 1) + ) + + # ---- threshold area (m^2): multiplier x median PU area (numeric) ---- + median_pu_area_m2 <- stats::median(as.numeric(sf::st_area(pu_sf)), na.rm = TRUE) + min_patch_area_m2 <- as.numeric(multiplier) * as.numeric(median_pu_area_m2) + + # selected units only (for patch detection) + sel <- sol %>% dplyr::filter(.data$selected) + stopifnot(nrow(sel) > 0) + + # optionally snap + validate (only for patch counting/unioning) + sel2 <- sel + if (isTRUE(do_snap)) { + sel2 <- sel2 %>% lwgeom::st_snap_to_grid(size = snap_size) + } + sel2 <- sel2 %>% sf::st_make_valid() + + # rook adjacency = edge sharing only + nb_list <- sf::st_relate(sel2, sel2, pattern = "F***1****") + + edges <- do.call( + rbind, + lapply(seq_along(nb_list), function(i) { + if (length(nb_list[[i]]) == 0) return(NULL) + cbind(from = i, to = nb_list[[i]]) + }) + ) + + g <- igraph::make_empty_graph(n = nrow(sel2), directed = FALSE) + if (!is.null(edges) && nrow(edges) > 0) { + g <- igraph::add_edges(g, as.vector(t(edges))) + } + + comp <- igraph::components(g) + sel2$patch_id <- comp$membership + + # numeric tolerance to avoid floating-point edge cases + eps <- eps_rel * min_patch_area_m2 + + # ---- patch polygons + patch size (area) + validity ---- + patch_info <- sel2 %>% + dplyr::group_by(.data$patch_id) %>% + dplyr::summarise( + n_pu = dplyr::n(), + geometry = sf::st_union(.data$geometry), + .groups = "drop" + ) %>% + sf::st_as_sf() %>% + dplyr::mutate( + patch_area = as.numeric(sf::st_area(.data$geometry)), # m^2 + is_valid = (.data$patch_area + eps) >= min_patch_area_m2, + label_pt = sf::st_point_on_surface(.data$geometry) + ) + + # optional debug table + if (isTRUE(debug)) { + print( + patch_info %>% + sf::st_drop_geometry() %>% + dplyr::mutate( + patch_km2 = .data$patch_area / 1e6, + thresh_km2 = min_patch_area_m2 / 1e6, + diff_km2 = .data$patch_km2 - .data$thresh_km2 + ) %>% + dplyr::arrange(.data$is_valid, .data$patch_area) + ) + } + + # counts + n_patches <- nrow(patch_info) + n_valid_patches <- sum(patch_info$is_valid) + n_invalid_patches <- n_patches - n_valid_patches + + if (return == "counts") { + return(tibble::tibble( + scenario = paste0("MinPatch \u00d7", multiplier), + new_n_patches = as.integer(n_patches), + new_valid_patches = as.integer(n_valid_patches), + new_invalid_patches = as.integer(n_invalid_patches), + threshold_km2 = min_patch_area_m2 / 1e6, + snapped = isTRUE(do_snap) + )) + } + + if (return == "patches") { + return(patch_info) + } + + # label coords + xy <- sf::st_coordinates(patch_info$label_pt) + patch_info$x <- xy[, 1] + patch_info$y <- xy[, 2] + + # join validity back to PUs + sel_class <- sel2 %>% + sf::st_drop_geometry() %>% + dplyr::select(.data$pu_id, .data$patch_id) %>% + dplyr::left_join( + patch_info %>% sf::st_drop_geometry() %>% dplyr::select(.data$patch_id, .data$is_valid), + by = "patch_id" + ) %>% + dplyr::mutate(status = ifelse(.data$is_valid, "Valid patch", "Invalid patch")) + + sol_plot <- sol %>% + dplyr::left_join(sel_class %>% dplyr::select(.data$pu_id, .data$status), by = "pu_id") %>% + dplyr::mutate( + status = ifelse(is.na(.data$status), "Not selected", .data$status), + status = factor(.data$status, levels = c("Not selected", "Valid patch", "Invalid patch")) + ) + + cat( + "MinPatch ", multiplier, "x: ", + n_patches, " patches | ", + n_valid_patches, " valid | ", + n_invalid_patches, " invalid", + " (threshold = ", round(min_patch_area_m2 / 1e6, 3), " km\u00b2) | ", + "snap = ", isTRUE(do_snap), + "\n", + sep = "" + ) + + ggplot2::ggplot() + + ggplot2::geom_sf( + data = sol_plot, + ggplot2::aes(fill = .data$status), + colour = "white", + linewidth = 0.15 + ) + + shadowtext::geom_shadowtext( + data = patch_info, + ggplot2::aes(x = .data$x, y = .data$y, label = .data$patch_id), + size = 4, + fontface = "bold", + colour = "black", + bg.colour = "white", + bg.r = 0.12 + ) + + ggplot2::scale_fill_manual( + values = c( + "Not selected" = "grey92", + "Valid patch" = "#2ca25f", + "Invalid patch" = "red" + ) + ) + + ggplot2::labs( + title = paste0("MinPatch: ", multiplier, "\u00d7 median PU area"), + subtitle = paste0( + "Patches (rook adjacency): ", + n_patches, " total | ", + n_valid_patches, " valid | ", + n_invalid_patches, " invalid", + " | valid if patch area \u2265 ", + multiplier, "\u00d7 median PU area", + " | snap = ", isTRUE(do_snap) + ), + fill = NULL + ) + + ggplot2::theme_minimal() +} diff --git a/R/whittling_functions.R b/R/whittling_functions.R index 7e68849..8636513 100644 --- a/R/whittling_functions.R +++ b/R/whittling_functions.R @@ -10,6 +10,11 @@ #' @keywords internal simulated_whittling <- function(minpatch_data, verbose = TRUE) { + # message("[TEST SIMULATED WHITTLING]") + # DEBUG SETTINGS (turn on/off from your script) + debug_boundary <- isTRUE(minpatch_data$debug_boundary) + debug_every <- if (!is.null(minpatch_data$debug_boundary_every)) minpatch_data$debug_boundary_every else 200 + unit_dict <- minpatch_data$unit_dict boundary_matrix <- minpatch_data$boundary_matrix abundance_matrix <- minpatch_data$abundance_matrix @@ -21,17 +26,23 @@ simulated_whittling <- function(minpatch_data, verbose = TRUE) { iteration <- 0 max_iterations <- 10000 # Prevent infinite loops keystone_pu_id_set <- character(0) # Initialize keystone set - + # OPTIMIZATION: Cache feature amounts and only update when unit is removed feature_amounts <- calculate_feature_conservation(minpatch_data) while (iteration < max_iterations) { iteration <- iteration + 1 + minpatch_data$debug_iteration <- iteration if (verbose && iteration %% 100 == 0) { cat(" Whittling iteration", iteration, "\n") } + # IMPORTANT: sync minpatch_data BEFORE using helpers + # because helpers (find_edge_units, make_patch_dict, can_remove_unit) + # read minpatch_data$unit_dict, not local unit_dict + minpatch_data$unit_dict <- unit_dict + # Find edge planning units (units on the boundary of selected areas) edge_units <- find_edge_units(minpatch_data) @@ -77,12 +88,30 @@ simulated_whittling <- function(minpatch_data, verbose = TRUE) { # Find the unit with the lowest whittling score (most suitable for removal) candidate_unit <- names(whittle_scores)[which.min(unlist(whittle_scores))] + # OPTIONAL: show candidate + score + if (isTRUE(minpatch_data$debug_boundary)) { + cat("[DEBUG candidate] unit", candidate_unit, + "score:", whittle_scores[[candidate_unit]], "\n") + } + # Check if removing this unit is acceptable (pass cached feature amounts) removal_result <- can_remove_unit(candidate_unit, minpatch_data, feature_amounts) + if (removal_result) { - # Remove the unit + + # Remove the unit (LOCAL state) unit_dict[[candidate_unit]]$status <- 0 - + + # NEW: print why it was successful (only prints on success) + if (isTRUE(minpatch_data$debug_boundary)) { + cat("[DEBUG removed] unit", candidate_unit, + "PASSED: targets, min_patch_size, boundary_cost, split_patch\n") + } + + # CRITICAL: sync back into minpatch_data immediately + # so next iteration sees the removal in find_edge_units/make_patch_dict/etc. + minpatch_data$unit_dict <- unit_dict + # OPTIMIZATION: Update cached feature amounts incrementally instead of recalculating unit_abundances <- abundance_matrix[[candidate_unit]] for (feat_id in names(unit_abundances)) { @@ -94,6 +123,7 @@ simulated_whittling <- function(minpatch_data, verbose = TRUE) { if (verbose && iteration <= 10) { cat(" Removed unit", candidate_unit, "at iteration", iteration, "\n") } + } else { # Add to keystone set and remove from consideration keystone_pu_id_set <- c(keystone_pu_id_set, candidate_unit) @@ -115,6 +145,8 @@ simulated_whittling <- function(minpatch_data, verbose = TRUE) { return(minpatch_data) } + + #' Find edge planning units #' #' Identifies planning units that are on the edge of selected areas @@ -128,24 +160,28 @@ find_edge_units <- function(minpatch_data) { unit_dict <- minpatch_data$unit_dict boundary_matrix <- minpatch_data$boundary_matrix - + n_units <- length(unit_dict) - + # Pre-allocate for worst case (all selected units are edge units) edge_units <- character(n_units) edge_count <- 0 for (unit_id in names(unit_dict)) { - if (unit_dict[[unit_id]]$status %in% c(1, 2)) { # Consider both selected (1) and conserved (2) units + if (unit_dict[[unit_id]]$status %in% c(1, 2)) { # selected (1) or conserved (2) # Get neighbors from sparse matrix (units with non-zero boundary) unit_idx <- as.integer(unit_id) neighbor_indices <- which(boundary_matrix[unit_idx, ] > 0) + + # IMPORTANT: drop diagonal/self index so every PU isn't automatically an "edge" + neighbor_indices <- setdiff(neighbor_indices, unit_idx) + neighbor_ids <- colnames(boundary_matrix)[neighbor_indices] is_edge <- FALSE for (neighbor_id in neighbor_ids) { - if (neighbor_id != unit_id && neighbor_id %in% names(unit_dict)) { + if (neighbor_id %in% names(unit_dict)) { neighbor_status <- unit_dict[[neighbor_id]]$status # If neighbor is available (0) or excluded (3), this is an edge unit @@ -153,21 +189,16 @@ find_edge_units <- function(minpatch_data) { is_edge <- TRUE break } - } else if (neighbor_id == unit_id) { - # Self-reference (diagonal) indicates external boundary - is_edge <- TRUE - break } } - + if (is_edge) { edge_count <- edge_count + 1 edge_units[edge_count] <- unit_id } } } - - # Trim to actual size and return unique values + return(unique(edge_units[seq_len(edge_count)])) } @@ -265,26 +296,31 @@ can_remove_unit <- function(unit_id, minpatch_data, feature_amounts = NULL) { # Don't remove conserved units (status 2) if (unit_dict[[unit_id]]$status == 2) { + if (isTRUE(minpatch_data$debug_boundary)) cat("[DEBUG can_remove] unit", unit_id, "BLOCKED: conserved\n") return(FALSE) } # Check if removal would violate conservation targets (pass cached feature amounts) if (removal_violates_targets(unit_id, minpatch_data, feature_amounts)) { + if (isTRUE(minpatch_data$debug_boundary)) cat("[DEBUG can_remove] unit", unit_id, "BLOCKED: targets\n") return(FALSE) } # Check if removal would make patch too small if (removal_makes_patch_too_small(unit_id, minpatch_data)) { + if (isTRUE(minpatch_data$debug_boundary)) cat("[DEBUG can_remove] unit", unit_id, "BLOCKED: min_patch_size\n") return(FALSE) } # Check if removal would increase cost if (removal_increases_cost(unit_id, minpatch_data)) { + if (isTRUE(minpatch_data$debug_boundary)) cat("[DEBUG can_remove] unit", unit_id, "BLOCKED: boundary_cost\n") return(FALSE) } # Check if removal would split patch into non-viable pieces if (removal_splits_patch_nonviably(unit_id, minpatch_data)) { + if (isTRUE(minpatch_data$debug_boundary)) cat("[DEBUG can_remove] unit", unit_id, "BLOCKED: split_patch\n") return(FALSE) } @@ -381,6 +417,11 @@ removal_increases_cost <- function(unit_id, minpatch_data) { boundary_matrix <- minpatch_data$boundary_matrix boundary_penalty <- minpatch_data$boundary_penalty + debug_boundary <- isTRUE(minpatch_data$debug_boundary) + debug_every <- if (!is.null(minpatch_data$debug_boundary_every)) minpatch_data$debug_boundary_every else 200 + debug_iter <- if (!is.null(minpatch_data$debug_iteration)) minpatch_data$debug_iteration else NA_integer_ + + if (boundary_penalty == 0) { return(FALSE) # No boundary cost to consider } @@ -391,7 +432,7 @@ removal_increases_cost <- function(unit_id, minpatch_data) { unit_idx <- as.integer(unit_id) neighbor_indices <- which(boundary_matrix[unit_idx, ] > 0) neighbor_ids <- colnames(boundary_matrix)[neighbor_indices] - + # Calculate change in boundary cost boundary_change <- 0 @@ -413,8 +454,22 @@ removal_increases_cost <- function(unit_id, minpatch_data) { boundary_cost_change <- boundary_change * boundary_penalty total_cost_change <- boundary_cost_change - unit_cost + blocked <- (total_cost_change > 0) + + if (debug_boundary && !is.na(debug_iter) && (debug_iter %% debug_every == 0)) { + cat( + "[DEBUG boundary] unit:", unit_id, + "unit_cost:", round(unit_cost, 6), + "boundary_change:", round(as.numeric(boundary_change), 6), + "penalty:", boundary_penalty, + "boundary_cost_change:", round(as.numeric(boundary_cost_change), 6), + "total_cost_change:", round(as.numeric(total_cost_change), 6), + "blocked:", blocked, "\n" + ) + } + + return(blocked) - return(total_cost_change > 0) } #' Check if removing unit would split patch into non-viable pieces @@ -426,26 +481,68 @@ removal_increases_cost <- function(unit_id, minpatch_data) { #' @keywords internal removal_splits_patch_nonviably <- function(unit_id, minpatch_data) { - unit_dict <- minpatch_data$unit_dict + unit_dict <- minpatch_data$unit_dict boundary_matrix <- minpatch_data$boundary_matrix - area_dict <- minpatch_data$area_dict + area_dict <- minpatch_data$area_dict min_patch_size <- minpatch_data$min_patch_size - # Create a temporary minpatch_data without this unit - temp_minpatch_data <- minpatch_data - temp_minpatch_data$unit_dict <- unit_dict - temp_minpatch_data$unit_dict[[unit_id]]$status <- 0 + # Need igraph for connected components + if (!requireNamespace("igraph", quietly = TRUE)) { + stop("Please install igraph: install.packages('igraph')") + } - # Find patches in the modified solution - new_patch_dict <- make_patch_dict(temp_minpatch_data) + # Build CURRENT patches and find the patch that contains unit_id + old_patch_dict <- make_patch_dict(minpatch_data) - # Check if any new patches are too small - for (patch_id in names(new_patch_dict)) { - patch_area <- sum(area_dict[new_patch_dict[[patch_id]]$unit_ids]) - if (as.numeric(patch_area) < as.numeric(min_patch_size)) { - return(TRUE) + old_patch_id <- NA_character_ + for (pid in names(old_patch_dict)) { + if (unit_id %in% old_patch_dict[[pid]]$unit_ids) { + old_patch_id <- pid + break } } - return(FALSE) + # If unit is not in any patch, it can't "split a patch" + if (is.na(old_patch_id)) return(FALSE) + + old_units <- old_patch_dict[[old_patch_id]]$unit_ids + + # If patch is tiny (1 unit), removing it doesn't "split" it + remaining_units <- setdiff(old_units, unit_id) + if (length(remaining_units) <= 1) return(FALSE) + + # Build adjacency within the OLD patch only (after removing unit_id) + # Treat any non-zero off-diagonal boundary as adjacency + idx <- as.integer(remaining_units) + submat <- boundary_matrix[idx, idx, drop = FALSE] + + # Make a simple unweighted adjacency (TRUE/FALSE), drop diagonal + adj <- (submat > 0) + diag(adj) <- FALSE + + g <- igraph::graph_from_adjacency_matrix(adj, mode = "undirected") + comps <- igraph::components(g) + + n_comp <- comps$no + + # If it doesn't split into multiple components, it's not a "split_patch" + if (n_comp <= 1) return(FALSE) + + # Compute area per component + comp_ids <- comps$membership + comp_areas <- vapply(seq_len(n_comp), function(k) { + units_k <- remaining_units[comp_ids == k] + sum(area_dict[units_k]) + }, numeric(1)) + + if (isTRUE(minpatch_data$debug_boundary)) { + cat("[DEBUG split_patch] unit", unit_id, + "old_patch:", old_patch_id, + "n_components:", n_comp, + "min_comp_area:", min(comp_areas), + "min_patch_size:", as.numeric(min_patch_size), "\n") + } + + # Non-viable split if any fragment is below min patch size + any(as.numeric(comp_areas) < as.numeric(min_patch_size)) } diff --git a/README.Rmd b/README.Rmd index 5324328..460c223 100644 --- a/README.Rmd +++ b/README.Rmd @@ -14,19 +14,16 @@ knitr::opts_chunk$set( ``` # MinPatch for R + -[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) -[![Windows](https://github.com/SpatialPlanning/minpatch/actions/workflows/Windows.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/Windows.yaml) -[![Ubuntu](https://github.com/SpatialPlanning/minpatch/actions/workflows/Ubuntu.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/Ubuntu.yaml) -[![MacOS](https://github.com/SpatialPlanning/minpatch/actions/workflows/MacOS.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/MacOS.yaml) -[![issues - zoomss](https://img.shields.io/github/issues/SpatialPlanning/minpatch)](https://github.com/SpatialPlanning/minpatch/issues) -[![Codecov test coverage](https://codecov.io/gh/SpatialPlanning/minpatch/graph/badge.svg)](https://app.codecov.io/gh/SpatialPlanning/minpatch) - +[![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![Windows](https://github.com/SpatialPlanning/minpatch/actions/workflows/Windows.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/Windows.yaml) [![Ubuntu](https://github.com/SpatialPlanning/minpatch/actions/workflows/Ubuntu.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/Ubuntu.yaml) [![MacOS](https://github.com/SpatialPlanning/minpatch/actions/workflows/MacOS.yaml/badge.svg)](https://github.com/SpatialPlanning/minpatch/actions/workflows/MacOS.yaml) [![issues - zoomss](https://img.shields.io/github/issues/SpatialPlanning/minpatch)](https://github.com/SpatialPlanning/minpatch/issues) [![Codecov test coverage](https://codecov.io/gh/SpatialPlanning/minpatch/graph/badge.svg)](https://app.codecov.io/gh/SpatialPlanning/minpatch) + + **Note: This is still a work in progress and significant bugs may be present. Use with caution** -More information on the original implemention of MinPatch for Marxan and QGIS is available here: https://cluz-systematic-conservation-planning.github.io +More information on the original implemention of MinPatch for Marxan and QGIS is available here: ## Overview @@ -38,18 +35,18 @@ MinPatch is a post-processing tool for conservation planning solutions that ensu Conservation planning software like Marxan and prioritizr can produce solutions with many small, fragmented protected areas. While these solutions may be mathematically optimal, small protected areas are often: -- Less ecologically viable -- More expensive to manage -- More vulnerable to edge effects -- Less resilient to disturbances +- Less ecologically viable +- More expensive to manage +- More vulnerable to edge effects +- Less resilient to disturbances ### The Solution MinPatch addresses this by post-processing conservation solutions through three stages: -1. **Remove Small Patches**: Eliminate protected areas smaller than a minimum size threshold -2. **Add New Patches**: Add new areas to meet conservation targets using the BestPatch algorithm -3. **Simulated Whittling**: Remove unnecessary planning units while maintaining constraints +1. **Remove Small Patches**: Eliminate protected areas smaller than a minimum size threshold +2. **Add New Patches**: Add new areas to meet conservation targets using the BestPatch algorithm +3. **Simulated Whittling**: Remove unnecessary planning units while maintaining constraints ## Installation @@ -58,15 +55,14 @@ MinPatch addresses this by post-processing conservation solutions through three pak::pak("SpatialPlanning/minpatch") ``` - ## Key Features -- **Full MinPatch Algorithm**: Complete implementation of all three stages -- **prioritizr Integration**: Seamless workflow with prioritizr solutions -- **Locked Constraints Support**: Automatically respects locked-in and locked-out constraints from prioritizr -- **Flexible Parameters**: Control minimum patch sizes, patch radius, and boundary penalties -- **Comprehensive Reporting**: Detailed statistics and comparisons -- **Visualization Support**: Plot results with ggplot2 (optional) +- **Full MinPatch Algorithm**: Complete implementation of all three stages +- **prioritizr Integration**: Seamless workflow with prioritizr solutions +- **Locked Constraints Support**: Automatically respects locked-in and locked-out constraints from prioritizr +- **Flexible Parameters**: Control minimum patch sizes, patch radius, and boundary penalties +- **Comprehensive Reporting**: Detailed statistics and comparisons +- **Visualization Support**: Plot results with ggplot2 (optional) ## Algorithm Details @@ -74,9 +70,9 @@ pak::pak("SpatialPlanning/minpatch") MinPatch automatically respects locked-in and locked-out constraints from prioritizr problems: -- **Locked-in constraints** (from `add_locked_in_constraints()`): Planning units that are locked-in will never be removed, regardless of patch size or during the whittling stage. These units are treated as "conserved" areas that must be retained in the final solution. +- **Locked-in constraints** (from `add_locked_in_constraints()`): Planning units that are locked-in will never be removed, regardless of patch size or during the whittling stage. These units are treated as "conserved" areas that must be retained in the final solution. -- **Locked-out constraints** (from `add_locked_out_constraints()`): Planning units that are locked-out will never be selected, even when adding new patches to meet conservation targets. These units are completely excluded from consideration. +- **Locked-out constraints** (from `add_locked_out_constraints()`): Planning units that are locked-out will never be selected, even when adding new patches to meet conservation targets. These units are completely excluded from consideration. If locked-in units form patches smaller than `min_patch_size`, a warning will be issued, but these units will still be preserved in the solution. @@ -88,14 +84,14 @@ Identifies connected components (patches) in the solution and removes those smal Uses the BestPatch scoring system to add new patches: -1. Calculate current conservation levels for each feature -2. Identify features with unmet targets -3. Score potential patches based on their contribution to targets relative to cost -4. Add the highest-scoring patch and repeat +1. Calculate current conservation levels for each feature +2. Identify features with unmet targets +3. Score potential patches based on their contribution to targets relative to cost +4. Add the highest-scoring patch and repeat The BestPatch score is calculated as: -``` +``` Score = Σ(feature_contribution / target_gap) / patch_cost ``` @@ -103,19 +99,33 @@ Score = Σ(feature_contribution / target_gap) / patch_cost Removes unnecessary planning units through an iterative process: -1. Identify edge units (on the boundary of selected areas) -2. Calculate whittling scores based on feature importance -3. Remove units that don't violate constraints: - - Must not cause targets to be unmet - - Must not make patches too small - - Must not increase total cost (if boundary penalty > 0) - - Must not split patches into non-viable pieces +1. Identify edge units (on the boundary of selected areas) +2. Calculate whittling scores based on feature importance +3. Remove units that don't violate constraints: + - Must not cause targets to be unmet + - Must not make patches too small + - Must not increase total cost (if boundary penalty \> 0) + - Must not split patches into non-viable pieces + +### User inputs (parameters that shape patch structure) + +MinPatch uses a small set of user-defined parameters that determine how patch structure is modified to reduce fragmentation. Together, these settings control minimum patch size, how candidate patches are constructed, and how strongly irregular boundaries are penalised. + +- **`min_patch_size`**: The minimum allowable patch size. Patches smaller than this threshold are removed in Stage 1 (except where locked-in units apply), and removals in Stage 3 are blocked if they would cause any patch (or split fragment) to drop below this size. + +- **`patch_radius`**: Controls how candidate patches are constructed in Stage 2 (BestPatch). Larger radii generally favour adding more compact patches (fewer isolated units), but can also increase the total area added. + +- **`boundary_penalty`** *(optional)*: Controls how strongly MinPatch discourages irregular patch edges during Stage 3 (whittling). Conceptually, `boundary_penalty` converts patch perimeter (boundary length) into the same “currency” as planning-unit cost, so edge length can be traded off against area/opportunity cost. When `boundary_penalty > 0`, a candidate edge unit is only removed if doing so does not increase the boundary-penalised objective: + + $$ \Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost} $$ + + If $\Delta \text{Objective} > 0$, the removal is blocked because the increase in boundary penalty outweighs the cost saved by removing the unit (i.e., it would make the patch edge more “expensive”). Setting `boundary_penalty = 0` turns off this check. ## Citation If you use this package, please cite both the original paper and this implementation: -``` +``` Smith, R.J., Di Minin, E., Linke, S., Segan, D.B., Possingham, H.P. (2010). An approach for ensuring minimum protected area size in systematic conservation planning. Biological Conservation, 143(10), 2525-2531. @@ -126,7 +136,7 @@ Everett J.D., Richardson A.J., Smith R.J. (2025). _minpatch: Post-Processing for ## License -GPL (>= 3) +GPL (\>= 3) ## References @@ -134,6 +144,6 @@ Smith, R.J., Di Minin, E., Linke, S., Segan, D.B., Possingham, H.P. (2010). An a ## Getting Help -- Check the package vignette: `vignette("minpatch")` -- View function documentation: `?run_minpatch` -- Report bugs: [GitHub Issues](https://github.com/SpatialPlanning/minpatch/issues) +- Check the package vignette: `vignette("minpatch")` +- View function documentation: `?run_minpatch` +- Report bugs: [GitHub Issues](https://github.com/SpatialPlanning/minpatch/issues) diff --git a/Untitled.Rprofvis b/Untitled.Rprofvis deleted file mode 100644 index f06a1f6..0000000 --- a/Untitled.Rprofvis +++ /dev/null @@ -1,4637 +0,0 @@ - - - - -profvis - - - - - - - - - - - - - -
-
-
- - - - diff --git a/data-raw/flowchart_test.Rmd b/data-raw/flowchart_test.Rmd new file mode 100644 index 0000000..98b0f69 --- /dev/null +++ b/data-raw/flowchart_test.Rmd @@ -0,0 +1,56 @@ +--- +title: "Flowchart" +author: "Kris Esturas" +date: "2026-02-10" +output: + html_document: + self_contained: true +--- + +```{hello} +knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{install once if needed:} +# install.packages("DiagrammeR") + +library(DiagrammeR) + +mermaid(" +flowchart TD + A([START Stage 3: Simulated Whittling]) --> B[Initialise
iteration = 0
keystone_set = {}
feature_amounts = calculate_feature_conservation()] + B --> C{{iteration < max_iterations?}} + C -- No --> Z([STOP: max iterations reached]) + C -- Yes --> D[iteration++
sync unit_dict -> minpatch_data] + D --> E[edge_units = find_edge_units()] + E --> F[edge_units = edge_units \\\\ keystone_set] + F --> G{{edge_units empty?}} + G -- Yes --> Y([STOP: no edge units left]) + G -- No --> H[whittle_scores_raw = calculate_whittle_scores(edge_units, feature_amounts)] + H --> I[Split scores:
If score == 'needed for targets' -> add to keystone_set
Else keep numeric score] + I --> J{{Any numeric (scoreable) units?}} + J -- No --> X([STOP: all candidates keystone]) + J -- Yes --> K[candidate = unit with minimum whittling score] + K --> L{{can_remove_unit(candidate)?}} + L -- No --> M[Add candidate to keystone_set] + M --> C + L -- Yes --> N[Remove candidate:
unit_dict[candidate].status = 0
sync to minpatch_data] + N --> O[Update cached feature_amounts:
feature_amounts[f] -= abundance(candidate,f)] + O --> C +") +``` + +## Including Plots + +You can also embed plots, for example: + +```{} +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. diff --git a/data-raw/test_Seychelles.R b/data-raw/test_Seychelles.R new file mode 100644 index 0000000..c55477c --- /dev/null +++ b/data-raw/test_Seychelles.R @@ -0,0 +1,406 @@ +# ============================================================ +# Using oceandatr + minpatch in a marine planning region +# Seychelles example (plain R script) +# ============================================================ + +# ---------------------------- +# 0. Setup +# ---------------------------- +options(scipen = 999) +set.seed(123) + +library(oceandatr) +library(terra) +library(sf) +library(dplyr) +library(ggplot2) +library(patchwork) +library(prioritizr) +#library(minpatch) +devtools::load_all() +library(purrr) +library(tibble) + +# optional (only if you actually use it later) +# install.packages("kableExtra") +# library(kableExtra) + +# ---------------------------- +# 1. Study region: Seychelles EEZ +# ---------------------------- +Seychelles_eez <- get_boundary(name = "Seychelles") + +# quick visual check +plot(Seychelles_eez[1], + col = "lightgreen", + main = "Seychelles EEZ", + axes = TRUE) + +# ---------------------------- +# 2. Choose equal-area projection +# ---------------------------- +projection_Seychelles <- "+proj=laea +lon_0=55 +lat_0=-4.5 +datum=WGS84 +units=m +no_defs" + +# ---------------------------- +# 3. Create planning-unit grid +# ---------------------------- +Seychelles_grid <- get_grid( + boundary = Seychelles_eez, + resolution = 30000, # 30 km grid for runtime; reduce for finer resolution + crs = projection_Seychelles +) + +# project EEZ for plotting +Seychelles_eez_proj <- Seychelles_eez %>% + st_transform(crs = projection_Seychelles) %>% + st_geometry() + +# plot grid +terra::plot(Seychelles_grid, + col = "gold3", + axes = FALSE, + legend = FALSE, + main = "Seychelles spatial grid (30 km)") +plot(Seychelles_eez_proj, + add = TRUE, + border = "black", + lwd = 1) + +# convert grid to sf polygons (minpatch needs sf) +Seychelles_pu <- Seychelles_grid %>% + stars::st_as_stars() %>% + sf::st_as_sf() %>% + dplyr::mutate( + id = dplyr::row_number(), + cost = as.numeric(sf::st_area(.)) / 1e6 # cost = area in km^2 + ) %>% + dplyr::select(-layer) + +# ---------------------------- +# 4. Build marine features with oceandatr +# ---------------------------- +set.seed(500) # reproducibility for enviro zones sampling + +feature_stack <- get_features(spatial_grid = Seychelles_grid) %>% + remove_empty_layers() + +# tidy names +names(feature_stack) <- gsub("_", " ", names(feature_stack)) %>% + stringr::str_to_sentence() + +# (optional) quick look: plot first few layers +# terra::plot(feature_stack[[1:4]]) + +# convert features to sf table (drop geometry) +features_df <- feature_stack %>% + stars::st_as_stars() %>% + sf::st_as_sf() %>% + dplyr::mutate(id = dplyr::row_number()) %>% + sf::st_drop_geometry() + +# combine features + PU polygons +Seychelles_sf <- Seychelles_pu %>% + dplyr::left_join( + as.data.frame(features_df), + by = "id" + ) + +# feature column names (excluding id) +feature_names <- names(features_df) %>% + dplyr::setdiff("id") + +# ---------------------------- +# 5. Build + solve prioritizr baseline +# ---------------------------- +p_base <- prioritizr::problem( + x = Seychelles_sf, + features = feature_names, + cost_column = "cost" +) %>% + add_min_set_objective() %>% + add_relative_targets(0.30) %>% + add_binary_decisions() %>% + add_rsymphony_solver(verbose = FALSE) + +t_base <- system.time({ + s_base <- solve(p_base) +}) + +p_base_plot <- plot_prioritizr(s_base) + + ggtitle("Baseline (no MinPatch)") + +print(p_base_plot) + +# ---------------------------- +# 6. MinPatch parameters +# ---------------------------- +median_pu_area_m2 <- median(st_area(Seychelles_sf)) +median_pu_area_km2 <- median_pu_area_m2 / 1e6 + +multipliers <- c(5, 10, 20) + +patch_sizes_m2 <- multipliers * median_pu_area_m2 +patch_sizes_km2 <- patch_sizes_m2 / 1e6 + +patch_radius <- sqrt(median_pu_area_m2 * 10 / pi) + +minpatch_param_summary <- tibble::tibble( + multiplier = multipliers, + min_patch_m2 = patch_sizes_m2, + min_patch_km2 = patch_sizes_km2, + median_pu_m2 = median_pu_area_m2, + median_pu_area_km2 = median_pu_area_km2 +) + +cat("\nMinPatch parameters:\n") +cat("- Median planning unit area:", round(median_pu_area_km2, 3), "km^2\n\n") + +for (i in seq_along(multipliers)) { + cat("Multiplier:", multipliers[i], "x median PU area\n") + cat(" - Minimum patch size:", round(patch_sizes_km2[i], 2), "km^2\n") + cat(" - Corresponds to ≈", round(patch_sizes_km2[i] / median_pu_area_km2, 2), "planning units\n\n") +} + +median_pu_length <- sqrt(median_pu_area_m2) +patch_radius_cells <- patch_radius / median_pu_length + +cat("Patch radius used:\n") +cat(" -", round(patch_radius, 1), "m (≈", round(patch_radius / 1000, 2), "km)\n") +cat(" - Corresponds to ≈", round(patch_radius_cells, 2), "median planning-unit lengths\n") + +# ---------------------------- +# 7. Run MinPatch across patch-size multipliers +# ---------------------------- +minpatch_results <- vector("list", length(patch_sizes_m2)) +minpatch_times <- numeric(length(patch_sizes_m2)) + +for (i in seq_along(patch_sizes_m2)) { + + cat("\n============================================\n") + cat("Running MinPatch with min patch area ~", + round(patch_sizes_km2[i], 2), "km^2 (", + multipliers[i], "x median PU)\n") + cat("============================================\n") + + t_mp <- system.time({ + minpatch_results[[i]] <- run_minpatch( + prioritizr_problem = p_base, + prioritizr_solution = s_base, + min_patch_size = patch_sizes_m2[i], + patch_radius = patch_radius, + boundary_penalty = 0, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE + ) + }) + + minpatch_times[i] <- t_mp[["elapsed"]] +} + +names(minpatch_results) <- paste0("minpatch_", multipliers, "x") +names(minpatch_times) <- paste0("minpatch_", multipliers, "x") + +# ---------------------------- +# 8. Plot baseline vs MinPatch solutions (2x2) +# ---------------------------- +get_solution_with_solution1 <- function(x) { + sol <- x$solution + extra_cols <- setdiff(names(sol), names(x$planning_units)) + if (!"solution_1" %in% names(sol) && length(extra_cols) >= 1) { + sol$solution_1 <- sol[[extra_cols[1]]] + } + sol +} + +sol_list <- purrr::map(minpatch_results, get_solution_with_solution1) + +p_list <- purrr::map2( + sol_list, + multipliers, + ~ plot_prioritizr(.x) + + ggtitle(paste0("MinPatch: ", .y, "× median PU area")) +) + +p_1 <- p_list[[1]] +p_2 <- p_list[[2]] +p_3 <- p_list[[3]] + +p_base_plot <- plot_prioritizr(s_base) + + ggtitle("Baseline (no MinPatch)") + +print( + (p_base_plot | p_1) / + (p_2 | p_3) +) + + + + + + + + +# ---------------------------- +# 9. Boundary penalty experiments (result3, result4, result5) +# ---------------------------- +median_area <- median(st_area(Seychelles_sf)) +min_patch_size <- median_area * 5 +patch_radius_bp <- sqrt(median_area * 10) + +t3 <- system.time({ + result3 <- run_minpatch( + prioritizr_problem = p_base, + prioritizr_solution = s_base, + min_patch_size = min_patch_size, + patch_radius = patch_radius_bp, + boundary_penalty = 0, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 + ) +}) +cat("result3 runtime (sec):", t3[["elapsed"]], "\n") + +t4 <- system.time({ + result4 <- run_minpatch( + prioritizr_problem = p_base, + prioritizr_solution = s_base, + min_patch_size = min_patch_size, + patch_radius = patch_radius_bp, + boundary_penalty = 1e-5, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 + ) +}) +cat("result4 runtime (sec):", t4[["elapsed"]], "\n") + +t5 <- system.time({ + result5 <- run_minpatch( + prioritizr_problem = p_base, + prioritizr_solution = s_base, + min_patch_size = min_patch_size, + patch_radius = patch_radius_bp, + boundary_penalty = 10, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 + ) +}) +cat("result5 runtime (sec):", t5[["elapsed"]], "\n") + +# ---------------------------- +# 10. Plot baseline vs boundary-penalty MinPatch runs +# ---------------------------- +boundary_penalties <- c(0, 1e-5, 10) +boundary_results <- list(result3, result4, result5) + +sol_list_bp <- purrr::map(boundary_results, get_solution_with_solution1) + +p_base_plot <- plot_prioritizr(s_base) + + ggtitle("Baseline (no MinPatch)") + +p_list_bp <- purrr::map2( + sol_list_bp, + boundary_penalties, + ~ plot_prioritizr(.x) + + ggtitle(paste0("MinPatch: boundary penalty = ", .y)) +) + +p_bp1 <- p_list_bp[[1]] +p_bp2 <- p_list_bp[[2]] +p_bp3 <- p_list_bp[[3]] + +print( + (p_base_plot | p_bp1) / + (p_bp2 | p_bp3) +) + +# ---------------------------- +# 11. Compare solutions table helper + summary for boundary penalty runs +# ---------------------------- +get_metric <- function(overall, column, metric) { + out <- overall[[column]][overall$Metric == metric] + if (length(out) == 0) NA_real_ else out +} + +minpatch_results_bp <- list(result3, result4, result5) +minpatch_runtimes <- c(t3[["elapsed"]], t4[["elapsed"]], t5[["elapsed"]]) + +overall0 <- compare_solutions(minpatch_results_bp[[1]])$overall + +baseline_row <- tibble::tibble( + scenario = "baseline", + boundary_penalty = NA_real_, + selected_pu = get_metric(overall0, "Original", "Selected Planning Units"), + total_area = get_metric(overall0, "Original", "Total Area"), + n_patches = get_metric(overall0, "Original", "Number of Patches"), + valid_patches = get_metric(overall0, "Original", "Valid Patches (>= min size)"), + median_patch_size = get_metric(overall0, "Original", "Median Patch Size"), + planning_unit_cost = get_metric(overall0, "Original", "Planning Unit Cost"), + boundary_cost = get_metric(overall0, "Original", "Boundary Cost"), + total_cost = get_metric(overall0, "Original", "Total Cost"), + runtime_sec = t_base[["elapsed"]] +) + +minpatch_rows <- purrr::pmap_dfr( + list(minpatch_results_bp, as.list(boundary_penalties), as.list(minpatch_runtimes)), + function(res, bp, rt) { + overall <- compare_solutions(res)$overall + + tibble::tibble( + scenario = paste0("minpatch_bp", bp), + boundary_penalty = bp, + selected_pu = get_metric(overall, "MinPatch", "Selected Planning Units"), + total_area = get_metric(overall, "MinPatch", "Total Area"), + n_patches = get_metric(overall, "MinPatch", "Number of Patches"), + valid_patches = get_metric(overall, "MinPatch", "Valid Patches (>= min size)"), + median_patch_size = get_metric(overall, "MinPatch", "Median Patch Size"), + planning_unit_cost = get_metric(overall, "MinPatch", "Planning Unit Cost"), + boundary_cost = get_metric(overall, "MinPatch", "Boundary Cost"), + total_cost = get_metric(overall, "MinPatch", "Total Cost"), + runtime_sec = rt + ) + } +) + +solution_summary_bp <- dplyr::bind_rows(baseline_row, minpatch_rows) +print(solution_summary_bp) +View(solution_summary_bp) +# ---------------------------- +# 12. Individual summaries loop (optional) +# ---------------------------- +for (i in seq_along(minpatch_results_bp)) { + res <- minpatch_results_bp[[i]] + bp <- boundary_penalties[i] + + cat("\n\n====================================\n") + cat("Scenario: boundary penalty =", format(bp, scientific = TRUE), "\n") + cat("====================================\n") + + cat("\n-- MinPatch processing summary --\n") + print_minpatch_summary(res) + + comparison <- compare_solutions(res) + + cat("\n-- Overall comparison --\n") + print(comparison$overall) + + cat("\n-- Feature-level comparison --\n") + print(comparison$features) + + cat("\n-- Feature change summary --\n") + print(comparison$summary) +} + diff --git a/data-raw/test_boundary_penalty.R b/data-raw/test_boundary_penalty.R new file mode 100644 index 0000000..138cff5 --- /dev/null +++ b/data-raw/test_boundary_penalty.R @@ -0,0 +1,251 @@ +library(prioritizr) +library(prioritizrdata) +library(terra) +library(sf) +library(ggplot2) +library(dplyr) + +#library(minpatch) + +rm(list = c( + "compare_solutions", + "generate_minpatch_report", + "plot_minpatch", + "plot_prioritizr", + "print_minpatch_summary", + "run_minpatch" +)) + +devtools::load_all() +# Sourcing out the scripts locally +#source("R/utils-pipe.R") +#source("R/data_structures.R") +#source("R/cost_functions.R") +#source("R/patch_functions.R") +#source("R/whittling_functions.R") +#source("R/minpatch.R") +#source("R/output.R") + + +# load data +tas_pu <- get_tas_pu() %>% + mutate(cost = cost*10000) + +# At present minpatch works with sf objects. Here we convert the data to sf. +tas_features <- get_tas_features() %>% + stars::st_as_stars() %>% + sf::st_as_sf() + +tas <- sf::st_interpolate_aw(tas_features, tas_pu, extensive = FALSE, keep_NA = FALSE, na.rm = FALSE) %>% + st_join(tas_pu, join = st_equals) + + +features = tas %>% + sf::st_drop_geometry() %>% + dplyr::select(-all_of(c("id", "cost", "locked_in", "locked_out"))) %>% + names() + +# Convert data to binary again +tas <- tas %>% + mutate(across(all_of(features), ~ if_else(.x > 0, 1, 0))) + +p <- problem(tas, features = features, cost_column = "cost") %>% + add_min_set_objective() %>% + add_relative_targets(0.30) %>% # 30% of each feature + add_binary_decisions() %>% + add_default_solver(verbose = FALSE) + +s <- solve(p) + +base<-plot_prioritizr(s) + + labs(subtitle = "Base Solution") + + + + +# Calculate reasonable parameters based on planning unit characteristics +median_area <- median(st_area(tas)) +min_patch_size <- median_area * 4 +patch_radius <- sqrt(median_area * 10) + + + +result <- run_minpatch( + prioritizr_problem = p, + prioritizr_solution = s, + min_patch_size = min_patch_size, + patch_radius = patch_radius, + boundary_penalty = 0, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 +) + + +result3 <- run_minpatch( + prioritizr_problem = p, + prioritizr_solution = s, + min_patch_size = min_patch_size, + patch_radius = patch_radius, + boundary_penalty = 1, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 +) + +result4 <- run_minpatch( + prioritizr_problem = p, + prioritizr_solution = s, + min_patch_size = min_patch_size, + patch_radius = patch_radius, + boundary_penalty = 10, + remove_small_patches = TRUE, + add_patches = TRUE, + whittle_patches = TRUE, + verbose = TRUE, + debug_boundary = TRUE, + debug_boundary_every = 50 +) + + + +patchwork::wrap_plots(plot_minpatch(result, title = "Boundary Penalty: 0"), + plot_minpatch(result3, title = "Boundary Penalty: 1"), + plot_minpatch(result4, title = "Boundary Penalty: 10"), + guides = "collect", + ncol = 3) & + theme(legend.position = "bottom") + + +result_solution <- result$solution +result_solution <- result_solution |> + dplyr::rename(solution_1 = minpatch) + + + +result3_solution <- result3$solution +result3_solution <- result3_solution |> + dplyr::rename(solution_1 = minpatch) + + +result4_solution <- result4$solution +result4_solution <- result4_solution |> + dplyr::rename(solution_1 = minpatch) + + + +result_plot <- plot_prioritizr(result_solution) + + labs(subtitle = "MinPatch (4x): Boundary Penalty: 0") + +result3_plot <- plot_prioritizr(result3_solution) + + labs(subtitle = "MinPatch (4x): Boundary Penalty: 1") + +result4_plot <- plot_prioritizr(result4_solution) + + labs(subtitle = "MinPatch (4x): Boundary Penalty: 10") + +library(patchwork) +(base | result_plot | + result3_plot | result4_plot) + + plot_layout(nrow = 2, guides = "collect") & + theme(legend.position = "bottom") + + + + + + +# this is for result, boundary penalty = 0 +# Looking at the results +print_minpatch_summary(result) + +# Compare original vs MinPatch solutions +comparison <- compare_solutions(result) + +# Print overall comparison +cat("=== Overall Solution Comparison ===\n") +print(comparison$overall) + +# Print feature-level comparison +cat("\n=== Feature-Level Area Comparison ===\n") +print(comparison$features) + +# Print summary statistics +cat("\n=== Feature Change Summary ===\n") +print(comparison$summary) + + + +# this is for result, boundary penalty = 1 +# Looking at the results +print_minpatch_summary(result3) + +# Compare original vs MinPatch solutions +comparison <- compare_solutions(result3) + +# Print overall comparison +cat("=== Overall Solution Comparison ===\n") +print(comparison$overall) + +# Print feature-level comparison +cat("\n=== Feature-Level Area Comparison ===\n") +print(comparison$features) + +# Print summary statistics +cat("\n=== Feature Change Summary ===\n") +print(comparison$summary) + + + +# this is for result, boundary penalty = 10 +# Looking at the results +print_minpatch_summary(result4) + +# Compare original vs MinPatch solutions +comparison <- compare_solutions(result4) + +# Print overall comparison +cat("=== Overall Solution Comparison ===\n") +print(comparison$overall) + +# Print feature-level comparison +cat("\n=== Feature-Level Area Comparison ===\n") +print(comparison$features) + +# Print summary statistics +cat("\n=== Feature Change Summary ===\n") +print(comparison$summary) + + + + + +library(dplyr) +library(purrr) + +# 1. Extract MinPatch column and rename +extract_minpatch <- function(comp, name){ + comp$overall %>% + select(Metric, MinPatch) %>% + rename(!!name := MinPatch) +} + +# 2. Build summary table +summary_table <- reduce( + list( + extract_minpatch(compare_solutions(result), "result"), + extract_minpatch(compare_solutions(result3), "result3"), + extract_minpatch(compare_solutions(result4), "result4") + ), + full_join, + by = "Metric" +) + +# 3. Print +print(summary_table) + diff --git a/docs/404.html b/docs/404.html index d14a441..420744b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -34,7 +34,9 @@ diff --git a/docs/articles/boundary-penalty.html b/docs/articles/boundary-penalty.html new file mode 100644 index 0000000..e52e08c --- /dev/null +++ b/docs/articles/boundary-penalty.html @@ -0,0 +1,420 @@ + + + + + + + +Boundary penalty in MinPatch • minpatch + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Overview +

+

boundary_penalty is an optional parameter that +discourages irregular (high-perimeter) patch shapes during Stage +3 (Simulated Whittling). The key idea is that +boundary_penalty converts boundary length +(perimeter) into the same “currency” as planning-unit cost, so +MinPatch can trade off saving cost against creating more edge.

+

In practice:

+
    +
  • higher boundary_penalty → stronger preference for +smooth, compact patches (less edge)

  • +
  • lower boundary_penalty → more willingness to accept +jagged edges if they reduce planning-unit cos

  • +
+
+
+

Where it is used in MinPatch +

+

MinPatch does not re-run the optimiser in Stage 3. +Instead, it tests removing one edge planning unit at a +time and accepts the removal only if:

+
    +
  1. Targets remain met

  2. +
  3. Minimum patch-size requirements remain met (including after +splits)

  4. +
  5. The boundary-penalised objective does not +increase (only if boundary_penalty > 0)

  6. +
+

This means Stage 3 uses a local “delta” test (change +in objective) rather than solving a new optimisation problem.

+
+
+

Decision rule (the thing to remember) +

+

Conceptually, Stage 3 evaluates an objective of the form:

+

Objective=(PU cost)+boundary_penalty×(total perimeter) +\text{Objective} = \sum(\text{PU cost}) + \text{boundary_penalty} \times (\text{total perimeter}) +

+

When testing removal of a single planning unit, the code computes a +local change:

+

ΔObjective=(boundary_penalty×Δperimeter)unit_cost \Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost}

+

- If +ΔObjective>0\Delta \text{Objective} > 0 +→ objective increased → block removal
+- If +ΔObjective0\Delta \text{Objective} \le 0 +→ objective stayed the same or decreased → allow +removal (assuming constraints pass)

+
+

Why this looks like “total_cost_change” in the code +

+

In the code, you’ll often see a value like:

+

total_cost_change = boundary_cost_change - unit_cost

+

That is exactly +ΔObjective\Delta \text{Objective} +written in code form:

+
    +
  • boundary_cost_change corresponds to +boundary_penalty × Δperimeter

  • +
  • unit_cost is the cost you save by removing the +unit

  • +
+

So: total_cost_change is the change in the +objective.

+
+
+
+

What does “same currency as cost” mean? +

+

The objective combines two different things:

+
    +
  1. Planning-unit cost (whatever you choose: +dollars, area, opportunity cost, etc.)

  2. +
  3. Perimeter (boundary length: km, m, or “edge +count” depending on your data)

  4. +
+

To add these together, boundary_penalty acts like a +conversion rate: it converts boundary length into “cost +units”.

+

Objective=(PU cost)+boundary_penalty×(total perimeter)\text{Objective} = \sum(\text{PU cost}) + \text{boundary_penalty} \times (\text{total perimeter})

+
+

If your PU cost is in dollars +

+
    +
  • PU cost might be: +$/PU\$ / \text{PU} +or +$/km2\$ / \text{km}^2

  • +
  • perimeter is in km

  • +
  • so boundary_penalty behaves like dollars per +km of edge: +$/km\$ / \text{km}

  • +
+

Interpretation:
boundary_penalty is the price you are willing to +pay to avoid 1 km of extra boundary.

+

Example: if boundary_penalty = 2000, then adding 1 km of +perimeter “costs” $2000 in the objective.

+
+
+

If your PU cost is area +

+

Sometimes “cost” is literally area (e.g., each PU cost = area in +km²). Then:

+
    +
  • PU cost is in km²

  • +
  • perimeter is in km

  • +
  • boundary_penalty behaves like km² per +km

  • +
+

You can think of this as “equivalent area cost per km of edge”.

+

Interpretation:
boundary_penalty tells you how much extra area +cost you are willing to accept to avoid 1 km of extra boundary.

+

So if boundary_penalty = 0.5, then an extra 1 km of edge +is treated like adding 0.5 km² of cost.

+
+
+

The simplest way to remember it +

+
    +
  • unit_cost = the savings you get from removing a +PU

  • +
  • boundary_penalty × Δperimeter = the “edge bill” +you pay (or save) when boundary changes

  • +
+

And Stage 3 only removes a PU when:

+

ΔObjective=(boundary_penalty×Δperimeter)unit_cost0 \Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost} \le 0

+

So you can read it as:

+
+

Only remove the PU if the edge bill doesn’t outweigh the cost +saved.

+
+
+
+
+

Worked examples (with explicit total perimeter) +

+

Assumptions for both examples:

+
    +
  • 4-neighbour adjacency (up/down/left/right)
  • +
  • each shared cell edge has length 1 km +
  • +
  • +total perimeter is the perimeter of the whole +selected patch (in km)
  • +
+

A very useful identity (grid perimeter change):

+

If a removed PU has +kkselected neighbours (0–4), then removing it changes +perimeter by:

+

Δperimeter=4+2k +\Delta \text{perimeter} = -4 + 2k +

+

So perimeter increases only when +k3k \ge 3 +(i.e., the PU was “shielding” three selected neighbours).

+
+
+

Example A — removing an edge PU reduces total perimeter +(allowed) +

+

Here the patch has two selected PUs in a line. U is an +edge unit because it touches . above, +below, and right.

+

Before removing U

+
| . | . |
+| S | U |
+| . | . |
+

Compute the total perimeter:

+
    +
  • Two adjacent cells have perimeter =6 km
    +(Each cell has 4 edges → 8 total, minus 2 for the shared edge → 6)
  • +
+

So:

+
    +
  • +PbeforeP{\text{before}}= +6 km
  • +
+

After removing U

+
| . | . |
+| S | . |
+| . | . |
+
    +
  • Single cell has perimeter = 4 km
  • +
+

So:

+
    +
  • PafterP{\text{after}} += 4 km

  • +
  • Δperimeter\Delta \text{perimeter} += 4 - 6 =−2 km

  • +
+

Now apply the Stage 3 boundary check:

+

Assume:

+
    +
  • boundary_penalty = 10

  • +
  • unit_cost = 5

  • +
+

ΔObjective=(boundary_penalty×Δperimeter)unit_cost= 10×(2)5=2\Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost}\ \text{= 10}\ \times \ (-2) - 5 = -2

+

Decision:

+
    +
  • +ΔObjective0\Delta \text{Objective} \le 0 +→ objective decreased → allow removal (if other +constraints pass).
  • +
+
+
+
+

Example B — removing an edge PU increases total perimeter (blocked), +but patch stays connected +

+

Here we use a 3×2 rectangle. Let U be the middle +PU on the bottom row.
U is an edge unit because it touches +. below (outside the rectangle).

+

Before removing U

+
| S | S | S |
+| S | U | S |
+

Compute the total perimeter:

+

A 3×2 rectangle has perimeter:

+
    +
  • +PbeforeP{\text{before}}= +2(3+2)=10 km
  • +
+

After removing U

+
| S | S | S |
+| S | . | S |
+

Important: the patch is still connected (you can +still walk between all remaining S cells via shared borders +on the top row).

+

Now compute the total perimeter after.

+

We can use the neighbour rule:

+
    +
  • U had +k=3k=3 +selected neighbours (left, right, and above are selected; below is +not).

  • +
  • Therefore:

  • +
+

Δperimeter=4+2k=4+2(3)=+2 +\Delta \text{perimeter} = -4 + 2k = -4 + 2(3) = +2 +

+

So:

+
    +
  • +Pafter=10+2=12P_{\text{after}} = 10 + 2 = 12 +km
    +
  • +
  • +Δperimeter=+2\Delta \text{perimeter} = +2 +km
  • +
+

Now apply the boundary check:

+

Assume:

+
    +
  • boundary_penalty = 10

  • +
  • unit_cost = 5

  • +
+

ΔObjective=10×(+2)5=+15\Delta \text{Objective} = 10 \times (+2) - 5 = +15 +

+

Decision:

+
    +
  • +ΔObjective>0\Delta \text{Objective} > 0 +→ objective increased → block removal +
  • +
+

Interpretation:
+Removing U saves cost, but it exposes new edge on +its three neighbours, increasing total perimeter enough that the +boundary-penalised objective gets worse.

+
+
+
+

Practical guidance for choosing boundary_penalty +

+

There isn’t one universal “correct” value because it depends on the +scale and units of your costs and boundary lengths. A practical +workflow:

+
    +
  1. Start with +boundary_penalty = 0
    +This turns off boundary blocking. Whittling then only uses targets + +patch-size + split viability rules.

  2. +
  3. Increase gradually and observe how solutions +change
    +As you increase it, removals that create extra edge become more likely +to be blocked, producing smoother patches.

  4. +
  5. Do a small sensitivity check
    +Try a few values spanning a small range (e.g., 0, low, medium, high) and +compare: ```

  6. +
+
    +
  • number of patches

  • +
  • mean/median patch size

  • +
  • total perimeter

  • +
  • total planning-unit cost retained/removed ```

  • +
+

If boundary_penalty is too high, Stage 3 can become +conservative: it may keep “bridges” or edge units because removing them +would raise the boundary term too much.

+
+
+

FAQ +

+

Does Stage 3 re-run the optimiser +(prioritizr)?
+No. Stage 3 does local, one-unit-at-a-time removal tests. It never +resolves a new optimisation problem.

+

Are we comparing costs to the Stage 2 +solution?
+Not directly. Stage 3 compares the objective before vs after +removing one candidate unit. It uses +ΔobjectiveΔ objective +(local change), not a full re-optimisation.

+

Is it “per patch” or “whole solution”?
+Conceptually the objective is for the whole solution (sum of PU costs + +boundary penalty term). In code, the change is computed locally using +the candidate’s neighbour relationships, because only local boundary +edges change when you remove one unit.

+

What if my costs are not area?
+That’s fine—boundary_penalty still converts perimeter into +the same units as your chosen cost layer. The interpretation becomes: +“how much cost am I willing to pay to avoid 1 km of extra edge?”

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/boundary-penalty.md b/docs/articles/boundary-penalty.md new file mode 100644 index 0000000..f0b15ec --- /dev/null +++ b/docs/articles/boundary-penalty.md @@ -0,0 +1,339 @@ +# Boundary penalty in MinPatch + +## Overview + +`boundary_penalty` is an optional parameter that discourages irregular +(high-perimeter) patch shapes during **Stage 3 (Simulated Whittling)**. +The key idea is that `boundary_penalty` converts **boundary length +(perimeter)** into the same “currency” as planning-unit cost, so +MinPatch can trade off saving cost against creating more edge. + +In practice: + +- higher `boundary_penalty` → stronger preference for smooth, compact + patches (less edge) + +- lower `boundary_penalty` → more willingness to accept jagged edges if + they reduce planning-unit cos + +## Where it is used in MinPatch + +MinPatch does **not** re-run the optimiser in Stage 3. Instead, it tests +removing one **edge** planning unit at a time and accepts the removal +only if: + +1. Targets remain met + +2. Minimum patch-size requirements remain met (including after splits) + +3. The boundary-penalised objective does **not** increase (only if + `boundary_penalty > 0`) + +This means Stage 3 uses a **local “delta” test** (change in objective) +rather than solving a new optimisation problem. + +## Decision rule (the thing to remember) + +Conceptually, Stage 3 evaluates an objective of the form: + +``` math +\text{Objective} = \sum(\text{PU cost}) + \text{boundary_penalty} \times (\text{total perimeter}) +``` + +When testing removal of a single planning unit, the code computes a +local change: + +``` math + \Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost} +``` + +\- If $`\Delta \text{Objective} > 0`$ → objective increased → **block** +removal\ +- If $`\Delta \text{Objective} \le 0`$ → objective stayed the same or +decreased → **allow** removal (assuming constraints pass) + +### Why this looks like “total_cost_change” in the code + +In the code, you’ll often see a value like: + +`total_cost_change = boundary_cost_change - unit_cost` + +That is exactly $`\Delta \text{Objective}`$ written in code form: + +- `boundary_cost_change` corresponds to `boundary_penalty × Δperimeter` + +- `unit_cost` is the cost you save by removing the unit + +So: `total_cost_change` **is the change in the objective**. + +## What does “same currency as cost” mean? + +The objective combines two different things: + +1. **Planning-unit cost** (whatever you choose: dollars, area, + opportunity cost, etc.) + +2. **Perimeter** (boundary length: km, m, or “edge count” depending on + your data) + +To add these together, `boundary_penalty` acts like a **conversion +rate**: it converts boundary length into “cost units”. + +``` math +\text{Objective} = \sum(\text{PU cost}) + \text{boundary_penalty} \times (\text{total perimeter}) +``` + +### If your PU cost is in dollars + +- PU cost might be: $`\$ / \text{PU}`$ or $`\$ / \text{km}^2`$ + +- perimeter is in km + +- so `boundary_penalty` behaves like **dollars per km of edge**: + $`\$ / \text{km}`$ + +**Interpretation:**\ +`boundary_penalty` is the *price you are willing to pay* to avoid 1 km +of extra boundary. + +Example: if `boundary_penalty = 2000`, then adding 1 km of perimeter +“costs” \$2000 in the objective. + +### If your PU cost is area + +Sometimes “cost” is literally area (e.g., each PU cost = area in km²). +Then: + +- PU cost is in km² + +- perimeter is in km + +- `boundary_penalty` behaves like **km² per km** + +You can think of this as “equivalent area cost per km of edge”. + +**Interpretation:**\ +`boundary_penalty` tells you how much extra *area cost* you are willing +to accept to avoid 1 km of extra boundary. + +So if `boundary_penalty = 0.5`, then an extra 1 km of edge is treated +like adding 0.5 km² of cost. + +### The simplest way to remember it + +- **unit_cost** = the savings you get from removing a PU + +- **boundary_penalty × Δperimeter** = the “edge bill” you pay (or save) + when boundary changes + +And Stage 3 only removes a PU when: + +``` math + \Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost} \le 0 +``` + +So you can read it as: + +> Only remove the PU if the edge bill doesn’t outweigh the cost saved. + +## Worked examples (with explicit total perimeter) + +Assumptions for both examples: + +- 4-neighbour adjacency (up/down/left/right) +- each shared cell edge has length **1 km** +- `total perimeter` is the perimeter of the **whole selected patch** (in + km) + +A very useful identity (grid perimeter change): + +If a removed PU has $`k`$**selected neighbours** (0–4), then removing it +changes perimeter by: + +``` math +\Delta \text{perimeter} = -4 + 2k +``` + +So perimeter **increases** only when $`k \ge 3`$ (i.e., the PU was +“shielding” three selected neighbours). + +------------------------------------------------------------------------ + +### Example A — removing an edge PU reduces total perimeter (allowed) + +Here the patch has two selected PUs in a line. `U` is an **edge unit** +because it touches `.` above, below, and right. + +**Before removing U** + +``` text +| . | . | +| S | U | +| . | . | +``` + +Compute the **total perimeter**: + +- Two adjacent cells have perimeter =6 km\ + (Each cell has 4 edges → 8 total, minus 2 for the shared edge → 6) + +So: + +- $`P{\text{before}}`$= 6 km + +**After removing U** + +``` text +| . | . | +| S | . | +| . | . | +``` + +- Single cell has perimeter = 4 km + +So: + +- $`P{\text{after}}`$ = 4 km + +- $`\Delta \text{perimeter}`$ = 4 - 6 =−2 km + +Now apply the Stage 3 boundary check: + +Assume: + +- `boundary_penalty = 10` + +- `unit_cost = 5` + +``` math +\Delta \text{Objective} = (\text{boundary_penalty} \times \Delta \text{perimeter}) - \text{unit_cost}\ \text{= 10}\ \times \ (-2) - 5 = -2 +``` + +Decision: + +- $`\Delta \text{Objective} \le 0`$ → objective decreased → **allow + removal** (if other constraints pass). + +------------------------------------------------------------------------ + +### Example B — removing an edge PU increases total perimeter (blocked), but patch stays connected + +Here we use a 3×2 rectangle. Let `U` be the **middle PU on the bottom +row**.\ +`U` is an **edge unit** because it touches `.` below (outside the +rectangle). + +**Before removing U** + +``` text +| S | S | S | +| S | U | S | +``` + +Compute the **total perimeter**: + +A 3×2 rectangle has perimeter: + +- $`P{\text{before}}`$= 2(3+2)=10 km + +**After removing U** + +``` text +| S | S | S | +| S | . | S | +``` + +Important: the patch is **still connected** (you can still walk between +all remaining `S` cells via shared borders on the top row). + +Now compute the **total perimeter after**. + +We can use the neighbour rule: + +- `U` had $`k=3`$ selected neighbours (left, right, and above are + selected; below is not). + +- Therefore: + +``` math +\Delta \text{perimeter} = -4 + 2k = -4 + 2(3) = +2 +``` + +So: + +- $`P_{\text{after}} = 10 + 2 = 12`$ km\ +- $`\Delta \text{perimeter} = +2`$ km + +Now apply the boundary check: + +Assume: + +- `boundary_penalty = 10` + +- `unit_cost = 5` + +``` math +\Delta \text{Objective} = 10 \times (+2) - 5 = +15 +``` + +Decision: + +- $`\Delta \text{Objective} > 0`$ → objective increased → **block + removal** + +**Interpretation:**\ +Removing `U` saves cost, but it *exposes new edge* on its three +neighbours, increasing total perimeter enough that the +boundary-penalised objective gets worse. + +## Practical guidance for choosing `boundary_penalty` + +There isn’t one universal “correct” value because it depends on the +scale and units of your costs and boundary lengths. A practical +workflow: + +1. **Start with `boundary_penalty = 0`**\ + This turns off boundary blocking. Whittling then only uses targets + + patch-size + split viability rules. + +2. **Increase gradually** and observe how solutions change\ + As you increase it, removals that create extra edge become more + likely to be blocked, producing smoother patches. + +3. **Do a small sensitivity check**\ + Try a few values spanning a small range (e.g., 0, low, medium, high) + and compare: \`\`\` + +- number of patches + +- mean/median patch size + +- total perimeter + +- total planning-unit cost retained/removed \`\`\` + +If `boundary_penalty` is too high, Stage 3 can become conservative: it +may keep “bridges” or edge units because removing them would raise the +boundary term too much. + +## FAQ + +**Does Stage 3 re-run the optimiser (prioritizr)?**\ +No. Stage 3 does local, one-unit-at-a-time removal tests. It never +resolves a new optimisation problem. + +**Are we comparing costs to the Stage 2 solution?**\ +Not directly. Stage 3 compares the objective **before vs after removing +one candidate unit**. It uses $`Δ objective`$ (local change), not a full +re-optimisation. + +**Is it “per patch” or “whole solution”?**\ +Conceptually the objective is for the whole solution (sum of PU costs + +boundary penalty term). In code, the change is computed locally using +the candidate’s neighbour relationships, because only local boundary +edges change when you remove one unit. + +**What if my costs are not area?**\ +That’s fine—`boundary_penalty` still converts perimeter into the same +units as your chosen cost layer. The interpretation becomes: “how much +cost am I willing to pay to avoid 1 km of extra edge?” diff --git a/docs/articles/index.html b/docs/articles/index.html index 0ec549b..6801937 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -19,7 +19,9 @@