From 106756324b7996cb22a5f430db23fd6596ea3521 Mon Sep 17 00:00:00 2001 From: Weber Date: Tue, 24 Mar 2026 15:51:34 -0700 Subject: [PATCH 1/7] get rid of misnamed test nlcd files --- tests/testthat/test-lc_nlcd.R | 20 -------------------- tests/testthat/test-sc_nlcd.R | 20 -------------------- 2 files changed, 40 deletions(-) delete mode 100644 tests/testthat/test-lc_nlcd.R delete mode 100644 tests/testthat/test-sc_nlcd.R diff --git a/tests/testthat/test-lc_nlcd.R b/tests/testthat/test-lc_nlcd.R deleted file mode 100644 index 74e1759..0000000 --- a/tests/testthat/test-lc_nlcd.R +++ /dev/null @@ -1,20 +0,0 @@ -context("Test that lc_nlcd is pulling in StreamCat API data") - - -test_that("lc_get_data for a sample COMID returns a data frame", { - testthat::skip_on_cran() - df <- lc_get_nlcd(year='2019', aoi='catchment', - comid='23783629,23794487,23812618') - expect_true(exists("df")) - expect_equal(nrow(df), 3) - expect_equal(ncol(df), 17) -}) - -test_that("lc_get_data for a county and ws metrics returns a data frame", { - testthat::skip_on_cran() - df <- lc_get_nlcd(year='2006, 2019', aoi='watershed', - comid='23794487',showAreaSqKm=TRUE, showPctFull=TRUE) - expect_true(exists("df")) - expect_equal(nrow(df), 1) - expect_equal(ncol(df), 36) -}) diff --git a/tests/testthat/test-sc_nlcd.R b/tests/testthat/test-sc_nlcd.R deleted file mode 100644 index da1634f..0000000 --- a/tests/testthat/test-sc_nlcd.R +++ /dev/null @@ -1,20 +0,0 @@ -context("Test that sc_nlcd is pulling in StreamCat API data") - - -test_that("sc_get_data for a sample COMID returns a data frame", { - testthat::skip_on_cran() - df <- sc_get_nlcd(year='2019', aoi='cat', - comid='179,1337,1337420') - expect_true(exists("df")) - expect_equal(nrow(df), 3) - expect_equal(ncol(df), 17) -}) - -test_that("sc_get_data for a county and ws metrics returns a data frame", { - testthat::skip_on_cran() - df <- sc_get_nlcd(year='2006, 2019', aoi='ws', - county='41003') - expect_true(exists("df")) - expect_equal(nrow(df), 632) - expect_equal(ncol(df), 33) -}) From 28ddcc942bdff1bf31fda8a60293bfc32bccbbc0 Mon Sep 17 00:00:00 2001 From: Marc Weber Date: Mon, 11 May 2026 12:16:17 -0700 Subject: [PATCH 2/7] Lake watersheds (#89) * starting lake watershed function in lake_watersheds branch * get rid of misnamed test nlcd files * tinkering with lake watersheds * added lc_get_watershed function to get watershed polygons from S3 * added lc_get_watershed function with optimizations * added vignette section for lc_get_watershed * added vignette for lc_get_watershed * updated lc_get_watershed function and added a section in the LakeCat article to describe * added tictoc to suggests in Description * trying to get rid of geoarrow dependency issue * got rid of accidental draft code still in lc_get_data * fixed deprecated functions and NAMESPACE * fixed depreacted functions and NAMESPACE * updating get_params functions * added test coverage for new lc_get_watershed function --- .github/workflows/coverage.yaml | 1 + DESCRIPTION | 7 +- NAMESPACE | 3 +- R/lc_get_data.R | 84 ++++----- R/lc_get_params.R | 1 + R/lc_get_watershed.R | 227 +++++++++++++++++++++++++ R/sc_get_data.R | 5 +- R/sc_get_params.R | 1 + man/lc_get_nlcd.Rd | 17 +- man/sc_get_data.Rd | 2 +- man/sc_get_nlcd.Rd | 4 +- tests/testthat/test-lc_get_watershed.R | 11 ++ vignettes/Articles/LakeCat.Rmd | 45 +++++ 13 files changed, 327 insertions(+), 81 deletions(-) create mode 100644 R/lc_get_watershed.R create mode 100644 tests/testthat/test-lc_get_watershed.R diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index 23f3c55..f717132 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -30,6 +30,7 @@ jobs: with: extra-packages: any::covr, any::xml2 needs: coverage + cache-version: 2 # Increment this to bust the cache - name: Test coverage run: | diff --git a/DESCRIPTION b/DESCRIPTION index 6fd162d..85b5059 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,7 +52,7 @@ Imports: patchwork, cowplot, tigris, - ggplot2 + ggplot2, Suggests: dplyr, mapview, @@ -67,7 +67,10 @@ Suggests: stringr, purrr, lifecycle, - tidyselect + tidyselect, + DBI, + duckdb, + tictoc, Encoding: UTF-8 URL: https://usepa.github.io/StreamCatTools/, https://github.com/USEPA/StreamCatTools BugReports: https://github.com/USEPA/StreamCatTools/issues diff --git a/NAMESPACE b/NAMESPACE index f332a1b..567224f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,7 @@ export(lc_get_metric_names) export(lc_get_nlcd) export(lc_get_nni) export(lc_get_params) -export(lc_nlcd) +export(lc_get_watershed) export(lc_plotnni) export(sc_fullname) export(sc_get_comid) @@ -16,7 +16,6 @@ export(sc_get_metric_names) export(sc_get_nlcd) export(sc_get_nni) export(sc_get_params) -export(sc_nlcd) export(sc_plotnni) import(ggpattern) import(ggplot2) diff --git a/R/lc_get_data.R b/R/lc_get_data.R index e4aec98..16f2eea 100644 --- a/R/lc_get_data.R +++ b/R/lc_get_data.R @@ -207,7 +207,7 @@ lc_get_data <- function(comid = NULL, #' @examples #' \dontrun{ #' -#' df <- lc_nlcd(comid='23783629', year='2019', aoi='ws') # Will show a deprecation warning +#' df <- lc_get_nlcd(comid='23783629', year='2019', aoi='ws') # Will show a deprecation warning #' #' df <- lc_get_nlcd(comid='23783629', year='2019', aoi='ws') #' @@ -294,31 +294,6 @@ lc_get_nlcd <- function(year = '2019', return(final_df) } -#' @rdname lc_get_nlcd -#' @export -#' @keywords internal -lc_nlcd <- function(year = '2019', - comid = NULL, - aoi = NULL, - showAreaSqKm = NULL, - showPctFull = NULL, - state = NULL, - county = NULL, - region = NULL, - conus = NULL, - countOnly = NULL) { - lifecycle::deprecate_warn("0.10.0", "lc_nlcd()", "lc_get_nlcd()") - lc_get_nlcd(year = '2019', - comid = NULL, - aoi = NULL, - showAreaSqKm = NULL, - showPctFull = NULL, - state = NULL, - county = NULL, - region = NULL, - conus = NULL, - countOnly = NULL) -} #' @title Get NNI #' @@ -365,8 +340,8 @@ lc_nlcd <- function(year = '2019', #' } lc_get_nni <- function(year, aoi = NULL, comid = NULL, - showAreaSqKm = TRUE, showPctFull = NULL, - countOnly = NULL) { + showAreaSqKm = TRUE, showPctFull = NULL, + countOnly = NULL) { # year must be a character string. year_chr <- as.character(year) # split multiple years supplied as a single string into @@ -412,29 +387,29 @@ lc_get_nni <- function(year, aoi = NULL, comid = NULL, ) # Vector of NNI metric names. nni <- c( - 'n_leg_', - 'n_ags_', - 'n_ff_', - 'n_uf_', - 'n_cf_', - 'n_cr_', - 'n_hw_', - 'n_lw_', - 'p_leg_', - 'p_ags_', - 'p_ff_', - 'p_uf_', - 'p_cr_', - 'p_hw_', - 'p_lw_' - ) + 'n_leg_', + 'n_ags_', + 'n_ff_', + 'n_uf_', + 'n_cf_', + 'n_cr_', + 'n_hw_', + 'n_lw_', + 'p_leg_', + 'p_ags_', + 'p_ff_', + 'p_uf_', + 'p_cr_', + 'p_hw_', + 'p_lw_' + ) # Add n_dep for available years ndep_year_vec <- year_vec[!year_vec %in% c('1987', '1988', '1989')] ndep_comb <- expand.grid('n_dep_', ndep_year_vec) ndep_mets <- paste0(ndep_comb$Var1, - ndep_comb$Var2, - collapse = ",", - recycle0 = TRUE) + ndep_comb$Var2, + collapse = ",", + recycle0 = TRUE) # Add p_dep for available years pdep_year_vec <- year_vec[!year_vec %in% c('1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '2014', '2015', '2016', '2017')] @@ -447,17 +422,17 @@ lc_get_nni <- function(year, aoi = NULL, comid = NULL, ww_year_vec <- year_vec[year_vec %in% c('1988', '1990', '1992', '1996', '2000', '2004', '2008', '2012')] ww_comb <- expand.grid(c('p_usgsww_', 'n_usgsww_'), ww_year_vec) ww_mets <- paste0(ww_comb$Var1, - ww_comb$Var2, - collapse = ",", - recycle0 = TRUE) + ww_comb$Var2, + collapse = ",", + recycle0 = TRUE) # Create a data frame of all NNI Metric and year combinations. all_comb <- expand.grid(nni, year_vec) # Concatenate the NLCD metric name with the supplied year(s) to create # valid metric names to submit to the API. nni_mets <- paste0(all_comb$Var1, - all_comb$Var2, - collapse = ",", - recycle0 = TRUE) + all_comb$Var2, + collapse = ",", + recycle0 = TRUE) # Combine all NNI metrics nni_mets_all <- paste0(nni_mets, ",", ndep_mets, ",", pdep_mets, ",", ww_mets) @@ -472,5 +447,4 @@ lc_get_nni <- function(year, aoi = NULL, comid = NULL, ) # End of function. Return a data frame. return(final_df) -} - +} \ No newline at end of file diff --git a/R/lc_get_params.R b/R/lc_get_params.R index 22a3e1e..d7366e2 100644 --- a/R/lc_get_params.R +++ b/R/lc_get_params.R @@ -80,6 +80,7 @@ lc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) + params <- params |> dplyr::select(-fips_str) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } diff --git a/R/lc_get_watershed.R b/R/lc_get_watershed.R new file mode 100644 index 0000000..cf1c902 --- /dev/null +++ b/R/lc_get_watershed.R @@ -0,0 +1,227 @@ +#' @title Get LakeCat Lake Watershed +#' +#' @description +#' Lookup function for a single COMID from S3 GeoParquet (optionally restricted to one HUC2). +#' Queries one COMID from an S3-hosted, HUC2-partitioned GeoParquet dataset and returns an sf object. +#' If `huc2` is provided, only that partition is scanned (fastest). If not, the function tries +#' a glob over all HUC2 partitions and falls back to a shallower pattern if needed. +#' The function: +#' - loads DuckDB httpfs (S3) extension, +#' - pushes an equality filter on `COMID` for row-group/file pruning, +#' - converts WKB geometry to sf with the CRS you provide (default EPSG:4326). +#' +#' @param comid Scalar COMID to query (numeric or character, required). +#' @param huc2 Optional two-digit HUC2 string (e.g., "01") to restrict search to one partition. +#' @param huc2_filter Optional character vector of HUC2s to read (e.g., c("01","05")) for multi-partition pruning. +#' @param bucket Character(1). S3 bucket (default "dmap-data-commons-ow"). +#' @param prefix Character(1). S3 prefix under the bucket (default "data/streamcat/LakeCatWatersheds/"). +#' @param region Character(1). S3 region (default "us-east-1"). +#' @param install_missing Logical. Install missing packages (duckdb, DBI, sf, wk) if needed (default FALSE). +#' @param keep_open Logical. Keep the DuckDB connection open (default FALSE). Note: the connection is not returned. +#' @param verbose Logical. Print progress messages (default TRUE). +#' @param progress Logical. Show a simple progress bar (default TRUE). +#' @param threads Integer or NULL. If set, `PRAGMA threads` for DuckDB (parallelism). +#' @param enable_object_cache Logical. Enable DuckDB object cache to speed repeated queries (default TRUE). +#' @param skip_describe Logical. Skip DESCRIBE step (default FALSE). +#' @param skip_counts Logical. Skip HUC2 counts step (default TRUE; no longer returned). +#' @param sf_crs Integer or character. CRS for the output sf object (default 4326). +#' +#' @return An sf object with zero or one+ rows (if multiple features share the same COMID). +#' @export + +lc_get_watershed <- function( + comid, + huc2 = NA_character_, + huc2_filter = NULL, + bucket = "dmap-data-commons-ow", + prefix = "data/streamcat/LakeCatWatersheds/", + region = "us-east-1", + install_missing = FALSE, + keep_open = FALSE, + verbose = TRUE, + progress = TRUE, + threads = 4, # default to moderate parallelism + enable_object_cache = TRUE, + skip_describe = FALSE, + skip_counts = TRUE, + sf_crs = 4326, + retries = 5, # NEW: retry count for transient timeouts + retry_base_delay = 0.5, # NEW: initial delay in seconds + retry_max_delay = 8, # NEW: cap delay per attempt + url_style = c("path", "virtual_hosted"), # NEW: allows changing URL style + s3_endpoint = NULL # NEW: custom endpoint (e.g., "s3.amazonaws.com") +) { + # --- COMID validation (single-value with warning if vector) --- + if (missing(comid) || length(comid) == 0L) stop("Argument 'comid' is required.") + if (length(comid) > 1L) { + warning("Currently the function only supports requesting one feature at a time by COMID; using the first value: ", + as.character(comid[1])) + comid <- comid[1] + } + if (is.na(comid)) stop("Argument 'comid' cannot be NA.") + comid_chr <- as.character(comid) + + # --- Packages --- + needed <- c("duckdb", "DBI", "sf", "wk") + have <- vapply(needed, requireNamespace, logical(1), quietly = TRUE) + if (!all(have)) { + missing <- needed[!have] + if (isTRUE(install_missing)) { + install.packages(missing, repos = "https://cloud.r-project.org") + have <- vapply(needed, requireNamespace, logical(1), quietly = TRUE) + if (!all(have)) stop("Could not load packages after installation: ", paste(needed[!have], collapse = ", ")) + } else { + stop("Missing required packages: ", paste(missing, collapse = ", "), + ". Set install_missing = TRUE to install automatically.") + } + } + + url_style <- match.arg(url_style) + msg <- function(...) if (isTRUE(verbose)) cat(sprintf("[%s] %s\n", format(Sys.time(), "%H:%M:%S"), paste0(...))) + + # --- Progress bar (coarse) --- + total_steps <- 6L + (!skip_describe) + (!skip_counts) + step <- 0L; pb <- NULL + bump <- function() { step <<- step + 1L; if (!is.null(pb)) utils::setTxtProgressBar(pb, step) } + if (isTRUE(progress)) { + pb <- utils::txtProgressBar(min = 0, max = total_steps, style = 3) + on.exit(try(close(pb), silent = TRUE), add = TRUE) + } + + msg("duckdb version: ", as.character(utils::packageVersion("duckdb"))); bump() + + # --- Connect (single controlled cleanup) --- + con <- DBI::dbConnect(duckdb::duckdb()) + if (!isTRUE(keep_open)) { + on.exit({ + valid <- tryCatch(DBI::dbIsValid(con), error = function(e) FALSE) + if (isTRUE(valid)) suppressWarnings(try(DBI::dbDisconnect(con, shutdown = TRUE), silent = TRUE)) + }, add = TRUE) + } + if (!is.null(threads)) DBI::dbExecute(con, sprintf("PRAGMA threads=%d;", as.integer(threads))) + bump() + + # --- Configure httpfs/S3 --- + msg("Loading and configuring httpfs ...") + DBI::dbExecute(con, "INSTALL httpfs;") + DBI::dbExecute(con, "LOAD httpfs;") + DBI::dbExecute(con, sprintf("SET s3_region = '%s';", region)) + DBI::dbExecute(con, "SET s3_use_ssl = true;") + DBI::dbExecute(con, sprintf("SET s3_url_style = '%s';", if (url_style == "path") "path" else "virtual_hosted")) + DBI::dbExecute(con, "SET s3_access_key_id = '';") + DBI::dbExecute(con, "SET s3_secret_access_key = '';") + DBI::dbExecute(con, "SET s3_session_token = '';") + if (!is.null(s3_endpoint)) { + # e.g., "s3.amazonaws.com" or a VPC endpoint; skip silently if not supported + try(DBI::dbExecute(con, sprintf("SET s3_endpoint = '%s';", s3_endpoint)), silent = TRUE) + } + if (isTRUE(enable_object_cache)) { + DBI::dbExecute(con, "SET enable_object_cache = true;") + } + # Optional secret (harmless; helps on some builds) + try(DBI::dbExecute(con, sprintf(" + CREATE OR REPLACE SECRET s3_public ( + TYPE S3, + PROVIDER CONFIG, + KEY_ID '', + SECRET '', + REGION '%s' + );", region)), silent = TRUE) + bump() + + # --- Retry helper for transient HTTP timeouts --- + sleep_time <- function(k) min(retry_max_delay, retry_base_delay * (2^(k - 1))) * runif(1, 0.9, 1.1) + db_get_query_retry <- function(sql) { + for (k in seq_len(retries)) { + out <- try(DBI::dbGetQuery(con, sql), silent = TRUE) + if (!inherits(out, "try-error")) return(out) + emsg <- conditionMessage(attr(out, "condition")) + # Only retry on network-ish timeouts; otherwise rethrow + retryable <- grepl("Timeout was reached", emsg, fixed = TRUE) || + grepl("Operation timed out", emsg, fixed = TRUE) || + grepl("Temporary failure in name resolution", emsg, fixed = TRUE) + if (!retryable || k == retries) { + stop(simpleError(emsg)) + } + wt <- sleep_time(k) + msg(sprintf("Transient S3 timeout; retry %d/%d in %.1fs ...", k, retries, wt)) + Sys.sleep(wt) + } + } + + # --- Build source (glob or restricted partitions) --- + norm_prefix <- paste0(sub("/+$", "", prefix), "/") + shallow_glob <- sprintf("s3://%s/%s*/*.parquet", bucket, norm_prefix) + deep_glob <- sprintf("s3://%s/%s**/*.parquet", bucket, norm_prefix) + + src_sql <- NULL + if (length(huc2_filter)) { + parts <- sprintf("s3://%s/%sHUC2=%s/*.parquet", bucket, norm_prefix, huc2_filter) + paths_sql <- paste0("[", paste(sprintf("'%s'", parts), collapse = ", "), "]") + src_sql <- sprintf("read_parquet(%s, hive_partitioning = true)", paths_sql) + } else { + pick_glob <- function(globs) { + for (g in globs) { + ok <- try({ + db_get_query_retry(sprintf("SELECT 1 FROM read_parquet('%s', hive_partitioning = true) LIMIT 1", g)) + TRUE + }, silent = TRUE) + if (isTRUE(ok)) return(g) + } + NULL + } + glob <- pick_glob(c(deep_glob, shallow_glob)) + if (is.null(glob)) { + stop("No Parquet files found under s3://", bucket, "/", norm_prefix, + " (checked patterns: ", deep_glob, " and ", shallow_glob, ").") + } + msg("Using glob: ", glob) + src_sql <- sprintf("read_parquet('%s', hive_partitioning = true)", glob) + } + bump() + + # --- Optional describe/counts (not returned) --- + if (!isTRUE(skip_describe)) { + msg("Describing schema ...") + invisible(db_get_query_retry(sprintf("DESCRIBE SELECT * FROM %s LIMIT 0", src_sql))) + } + bump() + if (!isTRUE(skip_counts)) { + msg("Counting rows per HUC2 ...") + invisible(db_get_query_retry(sprintf(" + SELECT HUC2, COUNT(*) AS n + FROM %s + GROUP BY HUC2 + ORDER BY HUC2 + ", src_sql))) + } + bump() + + # --- COMID query (with retries) --- + msg("Querying COMID = ", comid_chr, if (!is.na(huc2)) paste0(" within HUC2=", huc2) else "") + where <- sprintf("CAST(COMID AS VARCHAR) = '%s'", comid_chr) + if (!is.na(huc2)) where <- sprintf("%s AND HUC2 = '%s'", where, huc2) + sql <- sprintf("SELECT * FROM %s WHERE %s", src_sql, where) + res <- db_get_query_retry(sql) + msg("Rows returned: ", nrow(res)) + bump() + + # --- Convert to sf --- + geom_candidates <- c("geometry", "wkb_geometry", "geom", "wkb") + geom_col <- intersect(names(res), geom_candidates) + if (!length(geom_col)) { + stop("No geometry column found in result; checked: ", + paste(geom_candidates, collapse = ", "), + ". Ensure the dataset contains WKB geometry and uses one of these column names.") + } + gcol <- geom_col[1] + gval <- res[[gcol]] + if (inherits(gval, "blob")) gval <- unclass(gval) + if (is.raw(gval)) gval <- as.list(gval) + sfc <- if (length(gval) == 0L) sf::st_sfc(crs = sf_crs) else sf::st_as_sfc(wk::wkb(gval), crs = sf_crs) + data_no_geom <- res[, setdiff(names(res), gcol), drop = FALSE] + result_sf <- sf::st_sf(data_no_geom, geometry = sfc) |> + sf::st_set_crs(sf_crs) + + return(result_sf) +} diff --git a/R/sc_get_data.R b/R/sc_get_data.R index c7fffdf..ae8261f 100644 --- a/R/sc_get_data.R +++ b/R/sc_get_data.R @@ -240,7 +240,7 @@ sc_get_data <- function(comid = NULL, #' @examples #' \dontrun{ #' -#' df <- sc_nlcd(year='2001', aoi='cat',comid='179') # Will show a deprecation warning +#' df <- sc_get_nlcd(year='2001', aoi='cat',comid='179') # Will show a deprecation warning #' #' df <- sc_get_nlcd(year='2001', aoi='cat',comid='179,1337,1337420') #' @@ -249,7 +249,7 @@ sc_get_data <- function(comid = NULL, #' df <- sc_get_nlcd(year='2001', aoi='ws', region='Region01', #' countOnly=TRUE) #' -#' df <- sc_nlcd(year='2001', aoi='ws', region='Region01', +#' df <- sc_get_nlcd(year='2001', aoi='ws', region='Region01', #' showAreaSqKm=FALSE, showPctFull=TRUE) #' #' df <- sc_get_nlcd(year='2001, 2006', aoi='cat,ws', @@ -333,7 +333,6 @@ sc_get_nlcd <- function(year = '2019', } #' @rdname sc_get_nlcd -#' @export #' @keywords internal sc_nlcd <- function(year = '2019', comid = NULL, diff --git a/R/sc_get_params.R b/R/sc_get_params.R index afcf51e..efe4859 100644 --- a/R/sc_get_params.R +++ b/R/sc_get_params.R @@ -78,6 +78,7 @@ sc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) + params <- params |> dplyr::select(-fips_str) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } diff --git a/man/lc_get_nlcd.Rd b/man/lc_get_nlcd.Rd index b69a1f7..c9c302c 100644 --- a/man/lc_get_nlcd.Rd +++ b/man/lc_get_nlcd.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/lc_get_data.R \name{lc_get_nlcd} \alias{lc_get_nlcd} -\alias{lc_nlcd} \title{Get NLCD Data} \usage{ lc_get_nlcd( @@ -17,19 +16,6 @@ lc_get_nlcd( conus = NULL, countOnly = NULL ) - -lc_nlcd( - year = "2019", - comid = NULL, - aoi = NULL, - showAreaSqKm = NULL, - showPctFull = NULL, - state = NULL, - county = NULL, - region = NULL, - conus = NULL, - countOnly = NULL -) } \arguments{ \item{year}{Years(s) of NLCD metrics to query. @@ -67,7 +53,7 @@ Function to specifically retrieve all NLCD metrics for a given year using the St \examples{ \dontrun{ -df <- lc_nlcd(comid='23783629', year='2019', aoi='ws') # Will show a deprecation warning +df <- lc_get_nlcd(comid='23783629', year='2019', aoi='ws') # Will show a deprecation warning df <- lc_get_nlcd(comid='23783629', year='2019', aoi='ws') @@ -84,4 +70,3 @@ comid='23783629,23794487,23812618') \author{ Marc Weber } -\keyword{internal} diff --git a/man/sc_get_data.Rd b/man/sc_get_data.Rd index 09bccc0..3d572fd 100644 --- a/man/sc_get_data.Rd +++ b/man/sc_get_data.Rd @@ -82,7 +82,7 @@ df <- sc_get_data(metric='pctgrs2006', aoi='ws', region='Region01') df <- sc_get_data(metric='pctwdwet2006', aoi='ws', county='41003') -df <- sc_get_data(metric='pcturbmd2006', aoi='wsrp100', +df <- sc_get_data(metric='pcturbmd2006', aoi='ws,rp100', comid='1337420') df <- sc_get_data(metric='pcturbmd2006,damdens', diff --git a/man/sc_get_nlcd.Rd b/man/sc_get_nlcd.Rd index a09c47c..1b1319d 100644 --- a/man/sc_get_nlcd.Rd +++ b/man/sc_get_nlcd.Rd @@ -82,7 +82,7 @@ Function to retrieve all NLCD metrics for a given year using the StreamCat API. \examples{ \dontrun{ -df <- sc_nlcd(year='2001', aoi='cat',comid='179') # Will show a deprecation warning +df <- sc_get_nlcd(year='2001', aoi='cat',comid='179') # Will show a deprecation warning df <- sc_get_nlcd(year='2001', aoi='cat',comid='179,1337,1337420') @@ -91,7 +91,7 @@ df <- sc_get_nlcd(year='2001', aoi='ws', region='Region01') df <- sc_get_nlcd(year='2001', aoi='ws', region='Region01', countOnly=TRUE) -df <- sc_nlcd(year='2001', aoi='ws', region='Region01', +df <- sc_get_nlcd(year='2001', aoi='ws', region='Region01', showAreaSqKm=FALSE, showPctFull=TRUE) df <- sc_get_nlcd(year='2001, 2006', aoi='cat,ws', diff --git a/tests/testthat/test-lc_get_watershed.R b/tests/testthat/test-lc_get_watershed.R new file mode 100644 index 0000000..8108ddb --- /dev/null +++ b/tests/testthat/test-lc_get_watershed.R @@ -0,0 +1,11 @@ +context("Test that lc_get_watershed is returning an sf object") + +test_that("lc_get_watershed is returning an sf object representing a lake watershed", { + testthat::skip_on_cran() + p <- lc_get_watershed(comid = 19334077, huc2 = "01",huc2_filter = "01", + threads = parallel::detectCores()) + expect_true(exists("p")) + expect_true(class(p)[1]=='sf') +}) + + diff --git a/vignettes/Articles/LakeCat.Rmd b/vignettes/Articles/LakeCat.Rmd index b6d1bdd..1bf7e69 100644 --- a/vignettes/Articles/LakeCat.Rmd +++ b/vignettes/Articles/LakeCat.Rmd @@ -114,3 +114,48 @@ In this example we access National Land Cover Dataset (NLCD) data for 2019, just df <- lc_get_nlcd(comid='23783629,23794487,23812618', year='2019', aoi='ws') knitr::kable(df) ``` + +## Get lake and lake watersheds as `sf` objects + +`StreamCatTools` includes a function `lc_get_watershed` that will return the watershed of any lake by COMID from an AWS S3 bucket of partitioned geoparquet files for all LakeCat lake watersheds (both on-network and off-network lakes). The watershed requested is returned as an `sf` object and uses `duckDB` as well as optional parallelization for faster data returns and specification of a specific HUC2 if known to reduce search time. **NOTE** `lc_get_watershed` currently only returns one watershed per request. This can be used in conjunction with `nhdplusTools` function `get_waterbodies` to map lake features for which `LakeCat` metrics are requested as shown below. + +```{r get_lake} +lake <- nhdplusTools::get_waterbodies(id = 19334077) +``` + +```{r get_watershed} +tictoc::tic() +ws <- lc_get_watershed(comid = 19334077) +tictoc::toc() +``` + +### Adding a HUC2 query parameter and using multi-threading speeds the process significantly +```{r get_lake_and_watershed_faster} +tictoc::tic() +ws <- lc_get_watershed(comid = 19334077, huc2 = "01",huc2_filter = "01", + threads = 2,retries = 5) +tictoc::toc() +``` + +### View lake with watershed +```{r view_lake_watershed} +m1 <- mapview::mapview(ws, + col.regions = "blue", + alpha.regions = 0.1, + color = "blue", + alpha = 0.8, + lwd = 2, + layer.name = "Watershed") + + +m2 <- mapview::mapview(lake, + col.regions = "blue", + alpha.regions = 0.5, + color = "blue", + alpha = 0.8, + layer.name = "Lake") + + +mapview::m1 + m2 +``` + From 39938c8e8f6d48f9aa523bdc7c881a238fa9a558 Mon Sep 17 00:00:00 2001 From: Weber Date: Wed, 13 May 2026 14:23:06 -0700 Subject: [PATCH 3/7] A few updates to clear warnings with merged PR for new lakecat watershed function --- DESCRIPTION | 1 + NEWS.md | 8 +++++ R/lc_get_params.R | 2 +- R/lc_get_watershed.R | 2 +- R/sc_get_params.R | 2 +- cran-comments.md | 15 ++++----- man/lc_get_watershed.Rd | 73 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 91 insertions(+), 12 deletions(-) create mode 100644 man/lc_get_watershed.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 85b5059..19d58c3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -71,6 +71,7 @@ Suggests: DBI, duckdb, tictoc, + stats, Encoding: UTF-8 URL: https://usepa.github.io/StreamCatTools/, https://github.com/USEPA/StreamCatTools BugReports: https://github.com/USEPA/StreamCatTools/issues diff --git a/NEWS.md b/NEWS.md index 40491ca..40241d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# StreamCatTools 0.11.0 + +- Adds new `lc_get_watershed` function to return a lake watershed as an `sf` + feature for a LakeCat (NHDPlusV2 Waterbody) COMID from an S3-hosted, + HUC2-partitioned GeoParquet dataset +- Uses optional HUC2 query as well as retries, multi-threading and `duckdb` + to speed up retrieval of spatial data from AWS S3 bucket + # StreamCatTools 0.10.0 - Adds new functions `sc_get_nni()` and `lc_get_nni()` for ease of access to diff --git a/R/lc_get_params.R b/R/lc_get_params.R index d7366e2..4d01fd1 100644 --- a/R/lc_get_params.R +++ b/R/lc_get_params.R @@ -80,7 +80,7 @@ lc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) - params <- params |> dplyr::select(-fips_str) + params <- params |> dplyr::select(params, -dplyr::any_of("fips_str")) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } diff --git a/R/lc_get_watershed.R b/R/lc_get_watershed.R index cf1c902..01d4819 100644 --- a/R/lc_get_watershed.R +++ b/R/lc_get_watershed.R @@ -130,7 +130,7 @@ lc_get_watershed <- function( bump() # --- Retry helper for transient HTTP timeouts --- - sleep_time <- function(k) min(retry_max_delay, retry_base_delay * (2^(k - 1))) * runif(1, 0.9, 1.1) + sleep_time <- function(k) min(retry_max_delay, retry_base_delay * (2^(k - 1))) * stats::runif(1, 0.9, 1.1) db_get_query_retry <- function(sql) { for (k in seq_len(retries)) { out <- try(DBI::dbGetQuery(con, sql), silent = TRUE) diff --git a/R/sc_get_params.R b/R/sc_get_params.R index efe4859..2091ae6 100644 --- a/R/sc_get_params.R +++ b/R/sc_get_params.R @@ -78,7 +78,7 @@ sc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) - params <- params |> dplyr::select(-fips_str) + params <- params |> dplyr::select(params, -dplyr::any_of("fips_str")) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } diff --git a/cran-comments.md b/cran-comments.md index 97e3f25..fa1d76e 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,11 +1,8 @@ -This is a patch update that: +This is a minor update that: -- Adds new functions `sc_get_nni()` and `lc_get_nni()` for ease of access to - National Nutrient Inventory data in StreamCat -- Adds new functions `sc_plot()` and `lc_plot` for plotting nitrogen and - phosphorus budgets for watersheds -- Adds a new article describing functions for plotting National Nutrient - Inventory data in StreamCat +- Adds new `lc_get_watershed` function to return a lake watershed as an `sf` + feature for a LakeCat (NHDPlusV2 Waterbody) COMID from an S3-hosted, + HUC2-partitioned GeoParquet dataset ------- @@ -16,8 +13,8 @@ This is a resubmission. ## R CMD check results Here is the output from `devtools::check()` on R Version R version 4.5.2, -devtools version 2.4.6, and Windows 11 x64 operating system +devtools version 2.5.0, and Windows 11 x64 operating system -Duration: 3m 55s +Duration: 2m 18.5s 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ diff --git a/man/lc_get_watershed.Rd b/man/lc_get_watershed.Rd new file mode 100644 index 0000000..dd72eb1 --- /dev/null +++ b/man/lc_get_watershed.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lc_get_watershed.R +\name{lc_get_watershed} +\alias{lc_get_watershed} +\title{Get LakeCat Lake Watershed} +\usage{ +lc_get_watershed( + comid, + huc2 = NA_character_, + huc2_filter = NULL, + bucket = "dmap-data-commons-ow", + prefix = "data/streamcat/LakeCatWatersheds/", + region = "us-east-1", + install_missing = FALSE, + keep_open = FALSE, + verbose = TRUE, + progress = TRUE, + threads = 4, + enable_object_cache = TRUE, + skip_describe = FALSE, + skip_counts = TRUE, + sf_crs = 4326, + retries = 5, + retry_base_delay = 0.5, + retry_max_delay = 8, + url_style = c("path", "virtual_hosted"), + s3_endpoint = NULL +) +} +\arguments{ +\item{comid}{Scalar COMID to query (numeric or character, required).} + +\item{huc2}{Optional two-digit HUC2 string (e.g., "01") to restrict search to one partition.} + +\item{huc2_filter}{Optional character vector of HUC2s to read (e.g., c("01","05")) for multi-partition pruning.} + +\item{bucket}{Character(1). S3 bucket (default "dmap-data-commons-ow").} + +\item{prefix}{Character(1). S3 prefix under the bucket (default "data/streamcat/LakeCatWatersheds/").} + +\item{region}{Character(1). S3 region (default "us-east-1").} + +\item{install_missing}{Logical. Install missing packages (duckdb, DBI, sf, wk) if needed (default FALSE).} + +\item{keep_open}{Logical. Keep the DuckDB connection open (default FALSE). Note: the connection is not returned.} + +\item{verbose}{Logical. Print progress messages (default TRUE).} + +\item{progress}{Logical. Show a simple progress bar (default TRUE).} + +\item{threads}{Integer or NULL. If set, `PRAGMA threads` for DuckDB (parallelism).} + +\item{enable_object_cache}{Logical. Enable DuckDB object cache to speed repeated queries (default TRUE).} + +\item{skip_describe}{Logical. Skip DESCRIBE step (default FALSE).} + +\item{skip_counts}{Logical. Skip HUC2 counts step (default TRUE; no longer returned).} + +\item{sf_crs}{Integer or character. CRS for the output sf object (default 4326).} +} +\value{ +An sf object with zero or one+ rows (if multiple features share the same COMID). +} +\description{ +Lookup function for a single COMID from S3 GeoParquet (optionally restricted to one HUC2). +Queries one COMID from an S3-hosted, HUC2-partitioned GeoParquet dataset and returns an sf object. +If `huc2` is provided, only that partition is scanned (fastest). If not, the function tries +a glob over all HUC2 partitions and falls back to a shallower pattern if needed. +The function: +- loads DuckDB httpfs (S3) extension, +- pushes an equality filter on `COMID` for row-group/file pruning, +- converts WKB geometry to sf with the CRS you provide (default EPSG:4326). +} From 6193c7991fef8467bdce59bd59f5092529487c21 Mon Sep 17 00:00:00 2001 From: Weber Date: Wed, 13 May 2026 14:26:27 -0700 Subject: [PATCH 4/7] typo in CRAN comments --- cran-comments.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cran-comments.md b/cran-comments.md index fa1d76e..0e8cf2d 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -12,7 +12,7 @@ This is a resubmission. ## R CMD check results -Here is the output from `devtools::check()` on R Version R version 4.5.2, +Here is the output from `devtools::check()` on R Version 4.5.2, devtools version 2.5.0, and Windows 11 x64 operating system Duration: 2m 18.5s From 40723a1894269deae851e8b1532b90d86ed53378 Mon Sep 17 00:00:00 2001 From: Weber Date: Wed, 13 May 2026 15:17:12 -0700 Subject: [PATCH 5/7] fixed a couple warnings --- R/lc_get_params.R | 10 +++++----- R/sc_get_params.R | 10 +++++----- R/sc_plot.R | 5 ++++- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/R/lc_get_params.R b/R/lc_get_params.R index 4d01fd1..85a5699 100644 --- a/R/lc_get_params.R +++ b/R/lc_get_params.R @@ -46,7 +46,7 @@ lc_get_params <- function(param = NULL) { } else if(param == 'variable_info') { params <- httr2::request('https://api.epa.gov/StreamCat/lakes/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(-UUID,-DATE_DOWNLOADED,-METADATA) |> dplyr::rename(dataset=FINAL_TABLE,category=INDICATOR_CATEGORY, @@ -57,14 +57,14 @@ lc_get_params <- function(param = NULL) { } else if(param == 'categories'){ params <- httr2::request('https://api.epa.gov/StreamCat/lakes/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(INDICATOR_CATEGORY) params <- sort(unique(params$INDICATOR_CATEGORY)) } else if(param == 'datasets'){ params <- httr2::request('https://api.epa.gov/StreamCat/lakes/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(DSNAME) params <- sort(unique(params$DSNAME[!is.na(params$DSNAME)])) @@ -80,7 +80,7 @@ lc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) - params <- params |> dplyr::select(params, -dplyr::any_of("fips_str")) + params <- params |> dplyr::select(-dplyr::any_of("fips_str")) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } @@ -168,7 +168,7 @@ lc_get_metric_names <- function(category = NULL, resp <- tryCatch({ params <- httr2::request('https://api.epa.gov/StreamCat/lakes/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) },error = function(e) { message("An error occurred during req_perform(); the service may be down or function parameters may be mis-specified: ", e$message) diff --git a/R/sc_get_params.R b/R/sc_get_params.R index 2091ae6..95ced4b 100644 --- a/R/sc_get_params.R +++ b/R/sc_get_params.R @@ -44,7 +44,7 @@ sc_get_params <- function(param = NULL) { } else if(param == 'variable_info') { params <- httr2::request('https://api.epa.gov/StreamCat/streams/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(-UUID,-DATE_DOWNLOADED,-METADATA) |> dplyr::rename(dataset=FINAL_TABLE,category=INDICATOR_CATEGORY, @@ -55,14 +55,14 @@ sc_get_params <- function(param = NULL) { } else if(param == 'categories'){ params <- httr2::request('https://api.epa.gov/StreamCat/streams/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(INDICATOR_CATEGORY) params <- sort(unique(params$INDICATOR_CATEGORY)) } else if(param == 'datasets'){ params <- httr2::request('https://api.epa.gov/StreamCat/streams/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) |> dplyr::select(DSNAME) params <- sort(unique(params$DSNAME[!is.na(params$DSNAME)])) @@ -78,7 +78,7 @@ sc_get_params <- function(param = NULL) { } else if(param == 'county'){ params <- resp$county_options[[1]] params$fips <- as.character(params$fips) - params <- params |> dplyr::select(params, -dplyr::any_of("fips_str")) + params <- params |> dplyr::select(-dplyr::any_of("fips_str")) params$fips[nchar(params$fips) < 5] <- paste0('0',params$fips[nchar(params$fips) < 5]) params <- params[with(params,order(state,county_name)),] } @@ -171,7 +171,7 @@ sc_get_metric_names <- function(category = NULL, resp <- tryCatch({ params <- httr2::request('https://api.epa.gov/StreamCat/streams/variable_info') |> httr2::req_perform() |> - httr2::resp_body_string() |> + httr2::resp_body_raw() |> readr::read_csv(show_col_types = FALSE) },error = function(e) { message("An error occurred during req_perform(); the service may be down or function parameters may be mis-specified: ", e$message) diff --git a/R/sc_plot.R b/R/sc_plot.R index 59ee018..5713f74 100644 --- a/R/sc_plot.R +++ b/R/sc_plot.R @@ -206,7 +206,10 @@ sc_plotnni <- function(comid, include.nue = FALSE, include.inset = TRUE){ #comid comidint <- as.integer(comid) flowline <- nhdplusTools::get_nhdplus(comid = comidint, realization = "flowline") - point <- sf::st_centroid(flowline) + point <- flowline |> + sf::st_geometry() |> + sf::st_centroid() |> + sf::st_as_sf() #create N bar plot nbar <- ggplot() + From 2b86804602a1c31b8a4a7fc2ede33c22a9a7023a Mon Sep 17 00:00:00 2001 From: Weber Date: Thu, 14 May 2026 10:33:39 -0700 Subject: [PATCH 6/7] A few final fixes for latest update --- DESCRIPTION | 1 + R/lc_get_data.R | 15 +++++++++++++++ R/lc_get_watershed.R | 9 ++++++++- man/lc_get_nlcd.Rd | 15 +++++++++++++++ man/lc_get_watershed.Rd | 12 ++++++++++++ 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 19d58c3..fd5aec4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -72,6 +72,7 @@ Suggests: duckdb, tictoc, stats, + wk Encoding: UTF-8 URL: https://usepa.github.io/StreamCatTools/, https://github.com/USEPA/StreamCatTools BugReports: https://github.com/USEPA/StreamCatTools/issues diff --git a/R/lc_get_data.R b/R/lc_get_data.R index 16f2eea..af6f99b 100644 --- a/R/lc_get_data.R +++ b/R/lc_get_data.R @@ -191,6 +191,21 @@ lc_get_data <- function(comid = NULL, #' @param comid Return metric information for specific COMIDs #' Syntax: comid=, #' +#' @param state Return metric information for COMIDs within a specific state. Use a state's abbreviation to +#' query for a given state. +#' Syntax: state=, +#' +#' @param county Return metric information for COMIDs within a specific county. +#' Users must use the FIPS code, not county name, as a way to disambiguate counties. +#' Syntax: county=, +#' +#' @param region Return metric information for COMIDs within a specified hydroregion. +#' Syntax: region=, +#' +#' @param conus Return all COMIDs in the conterminous United States. +#' The default value is false. +#' Values: true|false +#' #' @param showAreaSqKm Return the area in square kilometers of a given area of interest. #' The default value is false. #' Values: true|false diff --git a/R/lc_get_watershed.R b/R/lc_get_watershed.R index 01d4819..399b32b 100644 --- a/R/lc_get_watershed.R +++ b/R/lc_get_watershed.R @@ -25,6 +25,13 @@ #' @param skip_describe Logical. Skip DESCRIBE step (default FALSE). #' @param skip_counts Logical. Skip HUC2 counts step (default TRUE; no longer returned). #' @param sf_crs Integer or character. CRS for the output sf object (default 4326). +#' @param retries Integer. Number of retries for transient S3/HTTP errors (default 5). +#' @param retry_base_delay Numeric. Initial exponential backoff delay in seconds (default 0.5). +#' @param retry_max_delay Numeric. Maximum backoff delay per attempt in seconds (default 8). +#' @param url_style Character. S3 URL style used by DuckDB httpfs, one of "path" or "virtual_hosted". +#' Passed to `match.arg()`, default "path". +#' @param s3_endpoint Optional character(1). Custom S3 endpoint hostname (e.g., "s3.amazonaws.com"). +#' NULL uses the default for the selected region. #' #' @return An sf object with zero or one+ rows (if multiple features share the same COMID). #' @export @@ -67,7 +74,7 @@ lc_get_watershed <- function( if (!all(have)) { missing <- needed[!have] if (isTRUE(install_missing)) { - install.packages(missing, repos = "https://cloud.r-project.org") + utils::install.packages(missing, repos = "https://cloud.r-project.org") have <- vapply(needed, requireNamespace, logical(1), quietly = TRUE) if (!all(have)) stop("Could not load packages after installation: ", paste(needed[!have], collapse = ", ")) } else { diff --git a/man/lc_get_nlcd.Rd b/man/lc_get_nlcd.Rd index c9c302c..14aba5e 100644 --- a/man/lc_get_nlcd.Rd +++ b/man/lc_get_nlcd.Rd @@ -38,6 +38,21 @@ Values: true|false} \item{showPctFull}{Return the pctfull for each dataset. The default value is false. Values: true|false} +\item{state}{Return metric information for COMIDs within a specific state. Use a state's abbreviation to +query for a given state. +Syntax: state=,} + +\item{county}{Return metric information for COMIDs within a specific county. +Users must use the FIPS code, not county name, as a way to disambiguate counties. +Syntax: county=,} + +\item{region}{Return metric information for COMIDs within a specified hydroregion. +Syntax: region=,} + +\item{conus}{Return all COMIDs in the conterminous United States. +The default value is false. +Values: true|false} + \item{countOnly}{Return a CSV containing only the row count (ROWCOUNT) and the column count (COLUMNCOUNT) that the server expects to return in a request. The default value is false. Values: true|false} diff --git a/man/lc_get_watershed.Rd b/man/lc_get_watershed.Rd index dd72eb1..292777c 100644 --- a/man/lc_get_watershed.Rd +++ b/man/lc_get_watershed.Rd @@ -57,6 +57,18 @@ lc_get_watershed( \item{skip_counts}{Logical. Skip HUC2 counts step (default TRUE; no longer returned).} \item{sf_crs}{Integer or character. CRS for the output sf object (default 4326).} + +\item{retries}{Integer. Number of retries for transient S3/HTTP errors (default 5).} + +\item{retry_base_delay}{Numeric. Initial exponential backoff delay in seconds (default 0.5).} + +\item{retry_max_delay}{Numeric. Maximum backoff delay per attempt in seconds (default 8).} + +\item{url_style}{Character. S3 URL style used by DuckDB httpfs, one of "path" or "virtual_hosted". +Passed to `match.arg()`, default "path".} + +\item{s3_endpoint}{Optional character(1). Custom S3 endpoint hostname (e.g., "s3.amazonaws.com"). +NULL uses the default for the selected region.} } \value{ An sf object with zero or one+ rows (if multiple features share the same COMID). From 8cb6d3a66ee17cf026dc003581dc2b5e06cb959f Mon Sep 17 00:00:00 2001 From: Weber Date: Thu, 14 May 2026 14:41:10 -0700 Subject: [PATCH 7/7] version update in Description --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fd5aec4..c804fdb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: StreamCatTools Type: Package Title: 'StreamCatTools' -Version: 0.10.0 +Version: 0.11.0 Authors@R: c(person(given = "Marc", family = "Weber", role = c("aut", "cre"),