From c354ec8188a880646e3f258e62e80f954cdaf3e8 Mon Sep 17 00:00:00 2001 From: Fran Barton Date: Thu, 26 Mar 2026 23:12:23 +0000 Subject: [PATCH] :nail_care: Improve list_files function Should close #108 --- R/list_files.R | 48 ++++++++++++++++++++--------------------------- man/list_files.Rd | 27 ++++++++++++-------------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/R/list_files.R b/R/list_files.R index 99cad18..909a9f7 100644 --- a/R/list_files.R +++ b/R/list_files.R @@ -1,26 +1,23 @@ #' List files in a container #' -#' Recursively (or not, if desired) lists all files found in a container. Search -#' can be restricted to a particular 'subdirectory' of the container, and/or -#' to files with a specific extension. The function assumes that all file names -#' end with a ".ext" extension of some sort. +#' Lists all files (recursively, if desired) found in a container within a +#' given directory (`dir`). The search can be restricted to files with a +#' specific extension. #' #' The function does not support filtering by file name, only by file extension. #' #' The returned file list (character vector) contains the full paths to the -#' files, ready to be passed perhaps to a `read_azure_*` function, or further -#' filtered by you. If you just want the names of the files without the folder -#' path, use [basename()] to extract these. +#' files, ready to be passed perhaps to a `read_azure_*` function, or filtered +#' further. If you just want the names of the files without the folder path, +#' use [basename] to extract these. #' #' @inheritParams read_azure_parquet -#' @param path (optional) subdirectory of the container to list files within. -#' `""` (the root folder of the container) by default +#' @param dir (optional) The directory of the container to list files within. +#' `""` (the root directory of the container) by default #' @param ext (optional) A string giving the extension of a particular file type -#' you want to restrict the list to. No need to include the initial ".". The -#' default, `""`, means no filtering by file extension will be applied. Can be -#' a regular expression. -#' @param recursive A Boolean value: whether to list files recursively. `TRUE` -#' by default +#' to restrict the list to. No need to include the initial ".". The default, +#' `""`, means no filtering by file extension will be applied. +#' @param recursive logical: whether to list files recursively. Default `FALSE` #' #' @importFrom rlang .data #' @returns A vector of file names, or an empty character vector if none found @@ -28,28 +25,23 @@ #' list_files(get_container("example"), ext = "csv") #' } #' @export -list_files <- function(container, path = "", ext = "", recursive = TRUE) { - stopifnot(rlang::is_character(c(path, ext), 2)) +list_files <- function(container, dir = "", ext = "", recursive = FALSE) { + stopifnot(rlang::is_character(c(dir, ext), 2)) stopifnot(rlang::is_bool(recursive)) pnf_msg <- ct_error_msg("Path {.val {path}} not found") - check_that(path, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg) + check_that(dir, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg) - tbl <- AzureStor::list_blobs(container, path, recursive = recursive) - if (nrow(tbl) > 0) { - ext_rx <- if (nzchar(ext)) sub("^\\.+", "", ext) else ".*" # nolint - tbl <- tbl |> - dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "\\.{ext_rx}$")) - } + ext_rx <- ifelse(nzchar(ext), gsub("^\\.+", "\\.", ext), ".*") # nolint + tbl <- AzureStor::list_blobs(container, dir, recursive = recursive) |> + dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "{ext_rx}$")) - # A zero-row tbl can result if `path` is initially empty, or via the filter - # step above. We handle this the same way, no matter which route led here. + # A zero-row tbl can result if the directory is actually empty, or via + # filtering out. We handle this the same way no matter which route led here. if (nrow(tbl) == 0) { fix_path <- \(p) sub("^/+$", "", sub("^([^/])(.*)", "/\\1\\2", p)) # nolint ext <- if (nzchar(ext)) paste0(" ", ext) msg <- "No{ext} files found in {.val [{container$name}]:{fix_path(path)}}" - if (rlang::is_interactive()) { - cli::cli_alert_info(msg) - } + cli::cli_alert_info(msg) invisible(character(0)) } else { tbl[["name"]] diff --git a/man/list_files.Rd b/man/list_files.Rd index 12132e8..9b0214f 100644 --- a/man/list_files.Rd +++ b/man/list_files.Rd @@ -4,38 +4,35 @@ \alias{list_files} \title{List files in a container} \usage{ -list_files(container, path = "", ext = "", recursive = TRUE) +list_files(container, dir = "", ext = "", recursive = FALSE) } \arguments{ \item{container}{An Azure container object, as returned by \link{get_container}} -\item{path}{(optional) subdirectory of the container to list files within. -\code{""} (the root folder of the container) by default} +\item{dir}{(optional) The directory of the container to list files within. +\code{""} (the root directory of the container) by default} \item{ext}{(optional) A string giving the extension of a particular file type -you want to restrict the list to. No need to include the initial ".". The -default, \code{""}, means no filtering by file extension will be applied. Can be -a regular expression.} +to restrict the list to. No need to include the initial ".". The default, +\code{""}, means no filtering by file extension will be applied.} -\item{recursive}{A Boolean value: whether to list files recursively. \code{TRUE} -by default} +\item{recursive}{logical: whether to list files recursively. Default \code{FALSE}} } \value{ A vector of file names, or an empty character vector if none found } \description{ -Recursively (or not, if desired) lists all files found in a container. Search -can be restricted to a particular 'subdirectory' of the container, and/or -to files with a specific extension. The function assumes that all file names -end with a ".ext" extension of some sort. +Lists all files (recursively, if desired) found in a container within a +given directory (\code{dir}). The search can be restricted to files with a +specific extension. } \details{ The function does not support filtering by file name, only by file extension. The returned file list (character vector) contains the full paths to the -files, ready to be passed perhaps to a \verb{read_azure_*} function, or further -filtered by you. If you just want the names of the files without the folder -path, use \code{\link[=basename]{basename()}} to extract these. +files, ready to be passed perhaps to a \verb{read_azure_*} function, or filtered +further. If you just want the names of the files without the folder path, +use \link{basename} to extract these. } \examples{ \dontrun{