From ce06351c3a6803deb3d2b8732ce79ad75cbe91e0 Mon Sep 17 00:00:00 2001
From: Fran Barton <francis.barton@nhs.net>
Date: Thu, 26 Mar 2026 15:11:27 +0000
Subject: [PATCH] :fire: Remove `path` variable from `read_azure_*` functions
 Should close #98 and closes #95

---
 R/read_azure_files.R      | 108 ++++++--------------------------------
 man/check_blob_exists.Rd  |  34 ------------
 man/read_azure_csv.Rd     |  19 +------
 man/read_azure_file.Rd    |  22 +-------
 man/read_azure_json.Rd    |  19 +------
 man/read_azure_jsongz.Rd  |  19 +------
 man/read_azure_parquet.Rd |  24 +--------
 man/read_azure_rds.Rd     |  19 +------
 8 files changed, 29 insertions(+), 235 deletions(-)
 delete mode 100644 man/check_blob_exists.Rd

diff --git a/R/read_azure_files.R b/R/read_azure_files.R
index 70c5d5e..5c89b43 100644
--- a/R/read_azure_files.R
+++ b/R/read_azure_files.R
@@ -1,35 +1,16 @@
 #' Read a parquet file from Azure storage
 #'
 #' @param container An Azure container object, as returned by [get_container]
-#' @param file The name of the file to be read, as a string. NB The file
-#'  extension does not need to be included (though it can be). The function
-#'  will error if multiple files are somehow matched.
-#' @param path The path to the directory where `file` is located, as a string.
-#'  Only needed if `file` does not already contain its full path. If file is
-#'  just a file name with no path, then provide the path to the directory here.
-#'  This must be the full path to the file location, as the function will not
-#'  search into subdirectories recursively. Set to `"/"` (the root of the
-#'  container) by default.
-#' @param info Boolean. Whether to print user feedback about the file that is
-#'  being read. Useful for checking the function is doing what is expected, but
-#'  can be turned off with `FALSE`. Can be set persistently with the option
-#'  "azkit.info". If `NULL` then it will default to the value of
-#'  [rlang::is_interactive] (that is, `TRUE` for interactive sessions).
+#' @param file string The path to the file to be read.
 #' @param ... optional arguments to be passed through to [arrow::read_parquet]
 #' @returns A tibble
 #' @examples \dontrun{
-#'   # if a full filepath is available then path can be ignored
 #'   read_azure_parquet(cont, "data/folder/path/1.parquet")
-#'   # you can provide a filename without the '.parquet' extension
-#'   # if you wish to use this partial file name matching it is probably easier
-#'   # to provide a 'path'
-#'   read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29")
 #' }
 #' @export
-read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "parquet", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
+read_azure_parquet <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL) |>
     arrow::read_parquet(...)
 }
 
@@ -41,10 +22,9 @@ read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
 #'  [yyjsonr::read_json_raw]
 #' @returns A list
 #' @export
-read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "json", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
+read_azure_json <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL) |>
     yyjsonr::read_json_raw(...)
 }
 
@@ -56,13 +36,12 @@ read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
 #'  [yyjsonr::read_json_file]
 #' @returns A list
 #' @export
-read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
-  full_path <- check_blob_exists(container, file, "json.gz", info, path)
+read_azure_jsongz <- function(container, file, ...) {
   dl <- withr::local_tempfile(
-    pattern = tools::file_path_sans_ext(basename(full_path), TRUE),
+    pattern = tools::file_path_sans_ext(basename(file), TRUE),
     fileext = "json.gz"
   )
-  AzureStor::download_blob(container, src = full_path, dest = dl)
+  AzureStor::download_blob(container, file, dest = dl)
   yyjsonr::read_json_file(dl, ...)
 }
 
@@ -78,12 +57,11 @@ read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
 #   If nothing is provided here, the compression type will be set to "none".
 #' @returns The data object that was stored in the rds file
 #' @export
-read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
+read_azure_rds <- function(container, file, ...) {
   # If the user doesn't specify a (de)compression type with `type` in `...`, we
   # will set a `type` of "none", as this seems to be the standard on SU Azure
   dots <- rlang::dots_list(..., type = "none", .homonyms = "first")
-  blob <- check_blob_exists(container, file, "rds", info, path)
-  rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots))
+  rlang::inject(AzureStor::storage_load_rds(container, file, !!!dots))
 }
 
 
@@ -93,71 +71,19 @@ read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
 #' @param ... optional arguments to be passed through to [readr::read_delim]
 #' @returns A tibble
 #' @export
-read_azure_csv <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "csv", info, path) |>
-    AzureStor::storage_read_csv(container, file = _, ...)
+read_azure_csv <- function(container, file, ...) {
+  AzureStor::storage_read_csv(container, file, ...)
 }
 
 
 #' Read any file from Azure storage
 #'
 #' @inheritParams read_azure_parquet
-#' @param ext If a custom extension needs to be supplied, you can specify it
-#'  here. If `NULL`, the default, the extension of `file` will be used
 #' @param ... optional arguments to be passed through to
 #'  [AzureStor::download_blob]
 #' @returns A raw data stream
 #' @export
-read_azure_file <- function(
-  container,
-  file,
-  path = "/",
-  info = NULL,
-  ext = NULL,
-  ...
-) {
-  ext <- ext %||% tools::file_ext(file)
-  check_blob_exists(container, file, ext, info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL, ...)
-}
-
-
-#' Ensures that the filepath for the file to read exists
-#'
-#' @inheritParams read_azure_parquet
-#' @param ext The standard file extension for the file type, e.g. "json"
-#' @keywords internal
-check_blob_exists <- function(container, file, ext, info, path) {
-  stopifnot("no container found" = inherits(container, "blob_container"))
-  path <- if (path %in% c("", "/")) "" else path
-  stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-  dir_name <- if (dirname(file) == ".") "" else dirname(file)
-  # Potentially the user could provide a partial file path in `path` and a
-  # further sub-directory as part of `file`. This handles that eventuality,
-  # though this usage pattern should be quite rare!
-  dpath <- file.path(path, dir_name)
-  fname <- basename(file)
-  if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) {
-    fname <- glue::glue("{fname}.{ext}")
-  }
-  # remove duplicate slashes and any initial slashes
-  file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname)))
-
-  filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |>
-    dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
-    dplyr::pull("name")
-
-  msg1 <- ct_error_msg("no matching {ext} file found")
-  msg2 <- cst_error_msg("multiple matching {ext} files found")
-  check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0
-  check_scalar_type(filepath_out, "character", msg2) # check length == 1
-
-  info_option <- getOption("azkit.info")
-  stopifnot(rlang::is_scalar_logical(info) || is.null(info))
-  stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option))
-  if (info %||% info_option %||% rlang::is_interactive()) {
-    cli::cli_alert_info("File {.val {filepath_out}} will be read in")
-  }
-  filepath_out
+read_azure_file <- function(container, file, ...) {
+  # using `dest = NULL` means pass the data through as a raw vector
+  AzureStor::download_blob(container, file, dest = NULL, ...)
 }
diff --git a/man/check_blob_exists.Rd b/man/check_blob_exists.Rd
deleted file mode 100644
index 472231a..0000000
--- a/man/check_blob_exists.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/read_azure_files.R
-\name{check_blob_exists}
-\alias{check_blob_exists}
-\title{Ensures that the filepath for the file to read exists}
-\usage{
-check_blob_exists(container, file, ext, info, path)
-}
-\arguments{
-\item{container}{An Azure container object, as returned by \link{get_container}}
-
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{ext}{The standard file extension for the file type, e.g. "json"}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-}
-\description{
-Ensures that the filepath for the file to read exists
-}
-\keyword{internal}
diff --git a/man/read_azure_csv.Rd b/man/read_azure_csv.Rd
index 29f55ed..d4ef7da 100644
--- a/man/read_azure_csv.Rd
+++ b/man/read_azure_csv.Rd
@@ -4,27 +4,12 @@
 \alias{read_azure_csv}
 \title{Read a csv file from Azure storage}
 \usage{
-read_azure_csv(container, file, path = "/", info = NULL, ...)
+read_azure_csv(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to \link[readr:read_delim]{readr::read_delim}}
 }
diff --git a/man/read_azure_file.Rd b/man/read_azure_file.Rd
index eaa51ea..b61ce53 100644
--- a/man/read_azure_file.Rd
+++ b/man/read_azure_file.Rd
@@ -4,30 +4,12 @@
 \alias{read_azure_file}
 \title{Read any file from Azure storage}
 \usage{
-read_azure_file(container, file, path = "/", info = NULL, ext = NULL, ...)
+read_azure_file(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
-
-\item{ext}{If a custom extension needs to be supplied, you can specify it
-here. If \code{NULL}, the default, the extension of \code{file} will be used}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to
 \link[AzureStor:blob]{AzureStor::download_blob}}
diff --git a/man/read_azure_json.Rd b/man/read_azure_json.Rd
index 2df1630..0f57df3 100644
--- a/man/read_azure_json.Rd
+++ b/man/read_azure_json.Rd
@@ -4,27 +4,12 @@
 \alias{read_azure_json}
 \title{Read a json file from Azure storage}
 \usage{
-read_azure_json(container, file, path = "/", info = NULL, ...)
+read_azure_json(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to
 \link[yyjsonr:read_json_raw]{yyjsonr::read_json_raw}}
diff --git a/man/read_azure_jsongz.Rd b/man/read_azure_jsongz.Rd
index b44a541..f4c4251 100644
--- a/man/read_azure_jsongz.Rd
+++ b/man/read_azure_jsongz.Rd
@@ -4,27 +4,12 @@
 \alias{read_azure_jsongz}
 \title{Read a json.gz file from Azure storage}
 \usage{
-read_azure_jsongz(container, file, path = "/", info = NULL, ...)
+read_azure_jsongz(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to
 \link[yyjsonr:read_json_file]{yyjsonr::read_json_file}}
diff --git a/man/read_azure_parquet.Rd b/man/read_azure_parquet.Rd
index 2bee797..bb47808 100644
--- a/man/read_azure_parquet.Rd
+++ b/man/read_azure_parquet.Rd
@@ -4,27 +4,12 @@
 \alias{read_azure_parquet}
 \title{Read a parquet file from Azure storage}
 \usage{
-read_azure_parquet(container, file, path = "/", info = NULL, ...)
+read_azure_parquet(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to \link[arrow:read_parquet]{arrow::read_parquet}}
 }
@@ -36,11 +21,6 @@ Read a parquet file from Azure storage
 }
 \examples{
 \dontrun{
-  # if a full filepath is available then path can be ignored
   read_azure_parquet(cont, "data/folder/path/1.parquet")
-  # you can provide a filename without the '.parquet' extension
-  # if you wish to use this partial file name matching it is probably easier
-  # to provide a 'path'
-  read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29")
 }
 }
diff --git a/man/read_azure_rds.Rd b/man/read_azure_rds.Rd
index 8f3e0e4..d74d13a 100644
--- a/man/read_azure_rds.Rd
+++ b/man/read_azure_rds.Rd
@@ -4,27 +4,12 @@
 \alias{read_azure_rds}
 \title{Read an rds file from Azure storage}
 \usage{
-read_azure_rds(container, file, path = "/", info = NULL, ...)
+read_azure_rds(container, file, ...)
 }
 \arguments{
 \item{container}{An Azure container object, as returned by \link{get_container}}
 
-\item{file}{The name of the file to be read, as a string. NB The file
-extension does not need to be included (though it can be). The function
-will error if multiple files are somehow matched.}
-
-\item{path}{The path to the directory where \code{file} is located, as a string.
-Only needed if \code{file} does not already contain its full path. If file is
-just a file name with no path, then provide the path to the directory here.
-This must be the full path to the file location, as the function will not
-search into subdirectories recursively. Set to \code{"/"} (the root of the
-container) by default.}
-
-\item{info}{Boolean. Whether to print user feedback about the file that is
-being read. Useful for checking the function is doing what is expected, but
-can be turned off with \code{FALSE}. Can be set persistently with the option
-"azkit.info". If \code{NULL} then it will default to the value of
-\link[rlang:is_interactive]{rlang::is_interactive} (that is, \code{TRUE} for interactive sessions).}
+\item{file}{string The path to the file to be read.}
 
 \item{...}{optional arguments to be passed through to
 \link[AzureStor:storage_save]{AzureStor::storage_load_rds}. For example, a compression type (one of