From 4aff1f99ed251e9d66bcf83df4b80bbac4c407f3 Mon Sep 17 00:00:00 2001
From: Tom Jemmett <thomas.jemmett@nhs.net>
Date: Fri, 13 Mar 2026 16:54:12 +0000
Subject: [PATCH] runs air format

---
 R/get_auth_token.R                     | 550 ++++++++++----------
 R/get_container.R                      | 166 +++---
 R/list_files.R                         | 114 ++--
 R/read_azure_files.R                   | 326 ++++++------
 R/read_azure_table.R                   |  78 +--
 tests/testthat/test-read_azure_files.R | 686 ++++++++++++-------------
 6 files changed, 960 insertions(+), 960 deletions(-)

diff --git a/R/get_auth_token.R b/R/get_auth_token.R
index 9e8a0b8..3a0e7a5 100644
--- a/R/get_auth_token.R
+++ b/R/get_auth_token.R
@@ -1,275 +1,275 @@
-#' Get Azure authentication token
-#'
-#' This function retrieves an Azure token for a specified resource.
-#'
-#' If the environment variables `AZ_TENANT_ID`, `AZ_CLIENT_ID` and
-#'  `AZ_APP_SECRET` are all set, it will try to use these to return a token.
-#'
-#' Otherwise it will try to get a managed token from a managed resource such as
-#'  Azure VM or Azure App Service.
-#'
-#' If neither of these approaches has returned a token, it will try to retrieve
-#'  a user token using the provided parameters, requiring the user to have
-#'  authenticated using their device. If `force_refresh` is set to `TRUE`, a
-#'  fresh web authentication process should be launched. Otherwise it will
-#'  attempt to use a cached token matching the given `resource`, `tenant` and
-#'  `aad_version`.
-#'
-#' @param resource For v1, a simple URL such as `"https://storage.azure.com/"`
-#'  should be supplied.For v2, a vector specifying the URL of the Azure resource
-#'  for which the token is requested as well as any desired scopes. See
-#'  [AzureAuth::get_azure_token] for details. Use [generate_resource]
-#'  to help provide an appropriate string or vector. The values default to
-#'  `c("https://storage.azure.com/.default", "openid", "offline_access")`.
-#'  If setting version to 2, ensure that the `aad_version` argument is also set
-#'  to 2. Both are set to use AAD version 1 by default.
-#' @param tenant A string specifying the Azure tenant. Defaults to
-#'  `"common"`. See [AzureAuth::get_azure_token] for other values.
-#' @param client_id A string specifying the application ID (client ID). If
-#'  `NULL`, (the default) the function attempts to obtain the client ID from the
-#'  Azure Resource Manager token, or prompts the user to log in to obtain it.
-#' @param auth_method A string specifying the authentication method. Defaults to
-#'  `"authorization_code"`. See [AzureAuth::get_azure_token] for other values.
-#' @param aad_version Numeric. The AAD version, either 1 or 2 (1 by default)
-#' @param force_refresh Boolean: whether to use a stored token if available
-#'  (`FALSE`, the default), or try to obtain a new one from Azure (`TRUE`).
-#'  This may be useful if you wish to generate a new token with the same
-#'  `resource` value as an existing token, but a different `tenant` or
-#'  `auth_method`. Note that you can also try using [refresh_token] which will
-#'  cause an existing token to refresh itself, without obtaining a new token
-#'  from Azure via online reauthentication
-#' @param ... Optional arguments (`token_args` or `use_cache`) to be passed on
-#'  to [AzureAuth::get_managed_token] or [AzureAuth::get_azure_token].
-#'
-#' @returns An Azure token object
-#' @examples
-#' \dontrun{
-#' # Get a token for the default resource
-#' token <- get_auth_token()
-#'
-#' # Force generation of a new token via online reauthentication
-#' token <- get_auth_token(force_refresh = TRUE)
-#'
-#' # Get a token for a specific resource and tenant
-#' token <- get_auth_token(
-#'  resource = "https://graph.microsoft.com",
-#'  tenant = "my-tenant-id"
-#' )
-#'
-#' # Get a token using a specific app ID
-#' token <- get_auth_token(client_id = "my-app-id")
-#' }
-#' @export
-get_auth_token <- function(
-  resource = generate_resource(),
-  tenant = "common",
-  client_id = NULL,
-  auth_method = "authorization_code",
-  aad_version = 1,
-  force_refresh = FALSE,
-  ...
-) {
-  aad_msg <- "Invalid {.arg aad_version} variable supplied (must be 1 or 2)"
-  aad_version <- check_that(aad_version, \(x) x %in% seq(2), aad_msg)
-
-  safely_get_token <- \(...) purrr::safely(AzureAuth::get_azure_token)(...)
-  get_azure_token <- purrr::partial(
-    safely_get_token,
-    resource = resource,
-    version = aad_version
-  )
-  possibly_get_mtk <- \(...) purrr::possibly(AzureAuth::get_managed_token)(...)
-
-  dots <- rlang::list2(...)
-  # If the user specifies force_refresh = TRUE we turn off `use_cache`,
-  # otherwise we leave `use_cache` as it is (or as `NULL`, its default value)
-  use_cached <- !force_refresh && (dots[["use_cache"]] %||% TRUE)
-  dots <- rlang::dots_list(!!!dots, use_cache = use_cached, .homonyms = "last")
-
-  # We have 4 approaches to get a token, depending on the context
-  # 1. Use environment variables if all three are set
-  token_resp <- rlang::inject(try_token_from_vars(get_azure_token, !!!dots))
-  token <- token_resp[["result"]]
-  token_error <- token_resp[["error"]]
-
-  # 2. Try to get a managed token (for example on Azure VM, App Service)
-  if (is.null(token)) {
-    token <- rlang::inject(possibly_get_mtk(resource, !!!dots))
-  }
-
-  # 3. If neither of those has worked, try to get an already stored user token
-  #    (unless `force_refresh` is on, in which case skip to option 4)
-  if (is.null(token) && use_cached) {
-    token <- match_cached_token(resource, tenant, aad_version)
-  }
-
-  # 4. If we still don't have a token, try to get a new one via reauthentication
-  if (is.null(token)) {
-    if (!force_refresh) {
-      cli::cli_alert_info("No matching cached token found: fetching new token")
-    }
-    client_id <- client_id %||% get_client_id()
-    token_resp <- rlang::inject(
-      get_azure_token(
-        tenant = tenant,
-        app = client_id,
-        auth_type = auth_method,
-        !!!dots
-      )
-    )
-    token <- token_resp[["result"]]
-    token_error <- token_error %||% token_resp[["error"]]
-  }
-
-  # Give some helpful feedback if the steps above have not succeeded
-  if (is.null(token) || length(token) == 0) {
-    cli::cli_alert_info("No authentication token was obtained.")
-    cli::cli_alert_info("Please check any variables you have supplied.")
-    cli::cli_alert_info(
-      "Alternatively, running {.fn AzureRMR::get_azure_login} or
-      {.fn AzureRMR::list_azure_tokens} may shed some light on the problem."
-    )
-    error_msg <- "{.fn get_auth_token}: No authentication token was obtained."
-    cli::cli_abort(as.character(token_error %||% error_msg))
-  } else {
-    if (aad_version == 2) {
-      check_that(token, AzureAuth::is_azure_v2_token, "Invalid token returned")
-    } else {
-      check_that(token, AzureAuth::is_azure_v1_token, "Invalid token returned")
-    }
-  }
-}
-
-
-#' Get token via app and secret environment variables
-#' Sub-routine for `get_auth_token()`
-#' @keywords internal
-#' @returns A list with elements `result` and `error`. If this method is
-#'  successful, the `result` element will contain a token.
-try_token_from_vars <- function(get_token_fun, ...) {
-  tenant_id_env <- Sys.getenv("AZ_TENANT_ID")
-  client_id_env <- Sys.getenv("AZ_CLIENT_ID")
-  client_secret <- Sys.getenv("AZ_APP_SECRET")
-
-  if (all(nzchar(c(tenant_id_env, client_id_env, client_secret)))) {
-    rlang::inject(
-      get_token_fun(
-        tenant = tenant_id_env,
-        app = client_id_env,
-        password = client_secret,
-        ...
-      )
-    )
-  } else {
-    list(result = NULL, error = NULL)
-  }
-}
-
-
-#' Find an already cached token that matches desired parameters
-#' Sub-routine for `get_auth_token()`
-#' @keywords internal
-#' @returns A token from local cache, or NULL if none matches
-match_cached_token <- function(resource, tenant, aad_version) {
-  # list tokens already locally cached
-  local_tokens <- AzureAuth::list_azure_tokens()
-  if (length(local_tokens) > 0) {
-    resources <- purrr::map(local_tokens, "resource")
-    scopes <- purrr::map(local_tokens, list("scope", 1))
-    resources <- purrr::map2_chr(resources, scopes, `%||%`)
-    tenants <- purrr::map_chr(local_tokens, "tenant")
-    versions <- purrr::map_int(local_tokens, "version")
-
-    resource_index <- gregg(resources, "^{resource[[1]]}")
-    tenant_index <- tenants == tenant
-    version_index <- versions == aad_version
-
-    # return a token matching `resource`, `tenant` and `version`, if any
-    token_index <- which(resource_index & tenant_index & version_index)[1]
-    if (!is.na(token_index)) local_tokens[[token_index]] else NULL
-  } else {
-    NULL
-  }
-}
-
-
-#' Sub-routine for `get_auth_token()`
-#'
-#' Pulled out mainly to tidy up the main function code a bit
-#' @keywords internal
-#' @returns A string (the client ID)
-get_client_id <- function() {
-  pluck_client_id <- function() {
-    suppressMessages(AzureRMR::get_azure_login()) |>
-      purrr::pluck("token", "client", "client_id")
-  }
-  possibly_pluck_client_id <- \(...) purrr::possibly(pluck_client_id)(...)
-  azure_cli_default_client_id <- "04b07795-8ddb-461a-bbee-02f9e1bf7b46"
-
-  client_id <- possibly_pluck_client_id()
-  # if that fails, prompt the user to log in, then try again...
-  if (is.null(client_id)) {
-    AzureRMR::create_azure_login()
-    # ...using the default Azure CLI client ID as a final fallback
-    client_id <- possibly_pluck_client_id() %||% azure_cli_default_client_id
-  }
-  client_id
-}
-
-
-#' Generate appropriate values for the `resource` parameter in [get_auth_token]
-#'
-#' A helper function to generate appropriate values. Ensure that the `version`
-#'  argument matches the `aad_version` argument to [get_auth_token].
-#'  It's unlikely that you will ever want to set `authorise` to `FALSE` but it's
-#'  here as an option since [AzureAuth::get_azure_token] supports it. Similarly,
-#'  you are likely to want to keep `refresh` turned on (this argument has no
-#'  effect on v1 tokens, it only applies to v2).
-#'
-#' @param version numeric. The AAD version, either 1 or 2 (1 by default)
-#' @param url The URL of the Azure resource host
-#' @param path For v2, the path designating the access scope
-#' @param authorise Boolean, whether to return a token with authorisation scope,
-#'  (TRUE, the default) or one that just provides authentication. You are
-#'  unlikely to want to turn this off
-#' @param refresh Boolean, applies to v2 tokens only, whether to return a token
-#'  that has a refresh token also supplied.
-#' @returns A scalar character, or (in most v2 situations) a character vector
-#' @export
-generate_resource <- function(
-  version = 1,
-  url = "https://storage.azure.com",
-  path = "/.default",
-  authorise = TRUE,
-  refresh = TRUE
-) {
-  stopifnot("version must be 1 or 2" = version %in% seq(2))
-  scopes <- if (refresh) c("openid", "offline_access") else "openid"
-  if (authorise) {
-    if (version == 2) {
-      c(paste0(url, path), scopes)
-    } else {
-      url
-    }
-  } else {
-    if (version == 2) {
-      scopes
-    } else {
-      ""
-    }
-  }
-}
-
-
-#' Use a token's internal `refresh()` method to refresh it
-#'
-#' This method avoids the need to refresh by re-authenticating online. It seems
-#'  like this only works with v1 tokens. v2 tokens always seem to refresh by
-#'  re-authenticating with Azure online. But v2 tokens _ought_ to refresh
-#'  automatically and not need manual refreshing. To instead generate a
-#'  completely fresh token, pass `use_cache = FALSE` or `force_refresh = TRUE`
-#'  to [get_auth_token].
-#' @param token An Azure authentication token
-#' @returns An Azure authentication token
-#' @export
-refresh_token <- \(token) token$refresh()
+#' Get Azure authentication token
+#'
+#' This function retrieves an Azure token for a specified resource.
+#'
+#' If the environment variables `AZ_TENANT_ID`, `AZ_CLIENT_ID` and
+#'  `AZ_APP_SECRET` are all set, it will try to use these to return a token.
+#'
+#' Otherwise it will try to get a managed token from a managed resource such as
+#'  Azure VM or Azure App Service.
+#'
+#' If neither of these approaches has returned a token, it will try to retrieve
+#'  a user token using the provided parameters, requiring the user to have
+#'  authenticated using their device. If `force_refresh` is set to `TRUE`, a
+#'  fresh web authentication process should be launched. Otherwise it will
+#'  attempt to use a cached token matching the given `resource`, `tenant` and
+#'  `aad_version`.
+#'
+#' @param resource For v1, a simple URL such as `"https://storage.azure.com/"`
+#'  should be supplied.For v2, a vector specifying the URL of the Azure resource
+#'  for which the token is requested as well as any desired scopes. See
+#'  [AzureAuth::get_azure_token] for details. Use [generate_resource]
+#'  to help provide an appropriate string or vector. The values default to
+#'  `c("https://storage.azure.com/.default", "openid", "offline_access")`.
+#'  If setting version to 2, ensure that the `aad_version` argument is also set
+#'  to 2. Both are set to use AAD version 1 by default.
+#' @param tenant A string specifying the Azure tenant. Defaults to
+#'  `"common"`. See [AzureAuth::get_azure_token] for other values.
+#' @param client_id A string specifying the application ID (client ID). If
+#'  `NULL`, (the default) the function attempts to obtain the client ID from the
+#'  Azure Resource Manager token, or prompts the user to log in to obtain it.
+#' @param auth_method A string specifying the authentication method. Defaults to
+#'  `"authorization_code"`. See [AzureAuth::get_azure_token] for other values.
+#' @param aad_version Numeric. The AAD version, either 1 or 2 (1 by default)
+#' @param force_refresh Boolean: whether to use a stored token if available
+#'  (`FALSE`, the default), or try to obtain a new one from Azure (`TRUE`).
+#'  This may be useful if you wish to generate a new token with the same
+#'  `resource` value as an existing token, but a different `tenant` or
+#'  `auth_method`. Note that you can also try using [refresh_token] which will
+#'  cause an existing token to refresh itself, without obtaining a new token
+#'  from Azure via online reauthentication
+#' @param ... Optional arguments (`token_args` or `use_cache`) to be passed on
+#'  to [AzureAuth::get_managed_token] or [AzureAuth::get_azure_token].
+#'
+#' @returns An Azure token object
+#' @examples
+#' \dontrun{
+#' # Get a token for the default resource
+#' token <- get_auth_token()
+#'
+#' # Force generation of a new token via online reauthentication
+#' token <- get_auth_token(force_refresh = TRUE)
+#'
+#' # Get a token for a specific resource and tenant
+#' token <- get_auth_token(
+#'  resource = "https://graph.microsoft.com",
+#'  tenant = "my-tenant-id"
+#' )
+#'
+#' # Get a token using a specific app ID
+#' token <- get_auth_token(client_id = "my-app-id")
+#' }
+#' @export
+get_auth_token <- function(
+  resource = generate_resource(),
+  tenant = "common",
+  client_id = NULL,
+  auth_method = "authorization_code",
+  aad_version = 1,
+  force_refresh = FALSE,
+  ...
+) {
+  aad_msg <- "Invalid {.arg aad_version} variable supplied (must be 1 or 2)"
+  aad_version <- check_that(aad_version, \(x) x %in% seq(2), aad_msg)
+
+  safely_get_token <- \(...) purrr::safely(AzureAuth::get_azure_token)(...)
+  get_azure_token <- purrr::partial(
+    safely_get_token,
+    resource = resource,
+    version = aad_version
+  )
+  possibly_get_mtk <- \(...) purrr::possibly(AzureAuth::get_managed_token)(...)
+
+  dots <- rlang::list2(...)
+  # If the user specifies force_refresh = TRUE we turn off `use_cache`,
+  # otherwise we leave `use_cache` as it is (or as `NULL`, its default value)
+  use_cached <- !force_refresh && (dots[["use_cache"]] %||% TRUE)
+  dots <- rlang::dots_list(!!!dots, use_cache = use_cached, .homonyms = "last")
+
+  # We have 4 approaches to get a token, depending on the context
+  # 1. Use environment variables if all three are set
+  token_resp <- rlang::inject(try_token_from_vars(get_azure_token, !!!dots))
+  token <- token_resp[["result"]]
+  token_error <- token_resp[["error"]]
+
+  # 2. Try to get a managed token (for example on Azure VM, App Service)
+  if (is.null(token)) {
+    token <- rlang::inject(possibly_get_mtk(resource, !!!dots))
+  }
+
+  # 3. If neither of those has worked, try to get an already stored user token
+  #    (unless `force_refresh` is on, in which case skip to option 4)
+  if (is.null(token) && use_cached) {
+    token <- match_cached_token(resource, tenant, aad_version)
+  }
+
+  # 4. If we still don't have a token, try to get a new one via reauthentication
+  if (is.null(token)) {
+    if (!force_refresh) {
+      cli::cli_alert_info("No matching cached token found: fetching new token")
+    }
+    client_id <- client_id %||% get_client_id()
+    token_resp <- rlang::inject(
+      get_azure_token(
+        tenant = tenant,
+        app = client_id,
+        auth_type = auth_method,
+        !!!dots
+      )
+    )
+    token <- token_resp[["result"]]
+    token_error <- token_error %||% token_resp[["error"]]
+  }
+
+  # Give some helpful feedback if the steps above have not succeeded
+  if (is.null(token) || length(token) == 0) {
+    cli::cli_alert_info("No authentication token was obtained.")
+    cli::cli_alert_info("Please check any variables you have supplied.")
+    cli::cli_alert_info(
+      "Alternatively, running {.fn AzureRMR::get_azure_login} or
+      {.fn AzureRMR::list_azure_tokens} may shed some light on the problem."
+    )
+    error_msg <- "{.fn get_auth_token}: No authentication token was obtained."
+    cli::cli_abort(as.character(token_error %||% error_msg))
+  } else {
+    if (aad_version == 2) {
+      check_that(token, AzureAuth::is_azure_v2_token, "Invalid token returned")
+    } else {
+      check_that(token, AzureAuth::is_azure_v1_token, "Invalid token returned")
+    }
+  }
+}
+
+
+#' Get token via app and secret environment variables
+#' Sub-routine for `get_auth_token()`
+#' @keywords internal
+#' @returns A list with elements `result` and `error`. If this method is
+#'  successful, the `result` element will contain a token.
+try_token_from_vars <- function(get_token_fun, ...) {
+  tenant_id_env <- Sys.getenv("AZ_TENANT_ID")
+  client_id_env <- Sys.getenv("AZ_CLIENT_ID")
+  client_secret <- Sys.getenv("AZ_APP_SECRET")
+
+  if (all(nzchar(c(tenant_id_env, client_id_env, client_secret)))) {
+    rlang::inject(
+      get_token_fun(
+        tenant = tenant_id_env,
+        app = client_id_env,
+        password = client_secret,
+        ...
+      )
+    )
+  } else {
+    list(result = NULL, error = NULL)
+  }
+}
+
+
+#' Find an already cached token that matches desired parameters
+#' Sub-routine for `get_auth_token()`
+#' @keywords internal
+#' @returns A token from local cache, or NULL if none matches
+match_cached_token <- function(resource, tenant, aad_version) {
+  # list tokens already locally cached
+  local_tokens <- AzureAuth::list_azure_tokens()
+  if (length(local_tokens) > 0) {
+    resources <- purrr::map(local_tokens, "resource")
+    scopes <- purrr::map(local_tokens, list("scope", 1))
+    resources <- purrr::map2_chr(resources, scopes, `%||%`)
+    tenants <- purrr::map_chr(local_tokens, "tenant")
+    versions <- purrr::map_int(local_tokens, "version")
+
+    resource_index <- gregg(resources, "^{resource[[1]]}")
+    tenant_index <- tenants == tenant
+    version_index <- versions == aad_version
+
+    # return a token matching `resource`, `tenant` and `version`, if any
+    token_index <- which(resource_index & tenant_index & version_index)[1]
+    if (!is.na(token_index)) local_tokens[[token_index]] else NULL
+  } else {
+    NULL
+  }
+}
+
+
+#' Sub-routine for `get_auth_token()`
+#'
+#' Pulled out mainly to tidy up the main function code a bit
+#' @keywords internal
+#' @returns A string (the client ID)
+get_client_id <- function() {
+  pluck_client_id <- function() {
+    suppressMessages(AzureRMR::get_azure_login()) |>
+      purrr::pluck("token", "client", "client_id")
+  }
+  possibly_pluck_client_id <- \(...) purrr::possibly(pluck_client_id)(...)
+  azure_cli_default_client_id <- "04b07795-8ddb-461a-bbee-02f9e1bf7b46"
+
+  client_id <- possibly_pluck_client_id()
+  # if that fails, prompt the user to log in, then try again...
+  if (is.null(client_id)) {
+    AzureRMR::create_azure_login()
+    # ...using the default Azure CLI client ID as a final fallback
+    client_id <- possibly_pluck_client_id() %||% azure_cli_default_client_id
+  }
+  client_id
+}
+
+
+#' Generate appropriate values for the `resource` parameter in [get_auth_token]
+#'
+#' A helper function to generate appropriate values. Ensure that the `version`
+#'  argument matches the `aad_version` argument to [get_auth_token].
+#'  It's unlikely that you will ever want to set `authorise` to `FALSE` but it's
+#'  here as an option since [AzureAuth::get_azure_token] supports it. Similarly,
+#'  you are likely to want to keep `refresh` turned on (this argument has no
+#'  effect on v1 tokens, it only applies to v2).
+#'
+#' @param version numeric. The AAD version, either 1 or 2 (1 by default)
+#' @param url The URL of the Azure resource host
+#' @param path For v2, the path designating the access scope
+#' @param authorise Boolean, whether to return a token with authorisation scope,
+#'  (TRUE, the default) or one that just provides authentication. You are
+#'  unlikely to want to turn this off
+#' @param refresh Boolean, applies to v2 tokens only, whether to return a token
+#'  that has a refresh token also supplied.
+#' @returns A scalar character, or (in most v2 situations) a character vector
+#' @export
+generate_resource <- function(
+  version = 1,
+  url = "https://storage.azure.com",
+  path = "/.default",
+  authorise = TRUE,
+  refresh = TRUE
+) {
+  stopifnot("version must be 1 or 2" = version %in% seq(2))
+  scopes <- if (refresh) c("openid", "offline_access") else "openid"
+  if (authorise) {
+    if (version == 2) {
+      c(paste0(url, path), scopes)
+    } else {
+      url
+    }
+  } else {
+    if (version == 2) {
+      scopes
+    } else {
+      ""
+    }
+  }
+}
+
+
+#' Use a token's internal `refresh()` method to refresh it
+#'
+#' This method avoids the need to refresh by re-authenticating online. It seems
+#'  like this only works with v1 tokens. v2 tokens always seem to refresh by
+#'  re-authenticating with Azure online. But v2 tokens _ought_ to refresh
+#'  automatically and not need manual refreshing. To instead generate a
+#'  completely fresh token, pass `use_cache = FALSE` or `force_refresh = TRUE`
+#'  to [get_auth_token].
+#' @param token An Azure authentication token
+#' @returns An Azure authentication token
+#' @export
+refresh_token <- \(token) token$refresh()
diff --git a/R/get_container.R b/R/get_container.R
index 31a5d3d..2370ede 100644
--- a/R/get_container.R
+++ b/R/get_container.R
@@ -1,83 +1,83 @@
-#' Get Azure storage container
-#'
-#' The environment variable "AZ_STORAGE_EP" should be set. This provides the URL
-#'  for the default Azure storage endpoint.
-#' Use [list_container_names] to get a list of available container names.
-#'
-#' @param container_name Name of the container as a string. `NULL` by default,
-#'  which means the function will look instead for a container name stored in
-#'  the environment variable "AZ_CONTAINER"
-#' @param token An Azure authentication token. If left as `NULL`, a token
-#'  returned by [get_auth_token] will be used
-#' @param endpoint_url An Azure endpoint URL. If left as `NULL`, the default,
-#'  the value of the environment variable "AZ_STORAGE_EP" will be used
-#' @param ... arguments to be passed through to [get_auth_token], if a token is
-#'  not already supplied
-#' @returns An Azure blob container (list object of class "blob_container")
-#' @export
-get_container <- function(
-  container_name = NULL,
-  token = NULL,
-  endpoint_url = NULL,
-  ...
-) {
-  msg1 <- paste0(
-    "{.var container_name} is empty. ",
-    "Did you forget to set an environment variable?"
-  )
-  msg2 <- paste0(
-    "{.var endpoint_url} is empty. ",
-    "Did you forget to set an environment variable?"
-  )
-  container_name <- (container_name %||% check_envvar("AZ_CONTAINER")) |>
-    check_nzchar(msg1)
-  endpoint_url <- (endpoint_url %||% check_envvar("AZ_STORAGE_EP")) |>
-    check_nzchar(msg2)
-  token <- token %||% get_auth_token(...)
-
-  get_azure_endpoint(token, endpoint_url) |>
-    AzureStor::blob_container(container_name)
-}
-
-
-#' Return a list of container names that are found at the endpoint
-#'
-#' @inheritParams get_container
-#' @returns A character vector of all container names found
-#' @export
-list_container_names <- function(token = NULL, endpoint_url = NULL, ...) {
-  token <- token %||% get_auth_token(...)
-  endpoint <- get_azure_endpoint(token, endpoint_url)
-  container_list <- AzureStor::list_blob_containers(endpoint)
-  stopifnot("no containers found" = length(container_list) >= 1L)
-  names(container_list)
-}
-
-
-#' Return an Azure "blob_endpoint"
-#'
-#' This function will return the endpoint specified in the environment variable
-#' "AZ_STORAGE_EP" by default
-#'
-#' @inheritParams get_container
-#' @returns An Azure blob endpoint (object of class "blob_endpoint")
-#' @keywords internal
-get_azure_endpoint <- function(token = NULL, endpoint_url = NULL, ...) {
-  token <- token %||% get_auth_token(...)
-  endpoint_url <- endpoint_url %||% check_envvar("AZ_STORAGE_EP")
-  AzureStor::blob_endpoint(endpoint_url, token = token)
-}
-
-
-#' Check that an environment variable exists
-#'
-#' The function prints a helpful error if the variable is not found, else
-#'  it returns the value of `Sys.getenv(x)`
-#'
-#' @param x the *name* of the environment variable to be found and checked
-#' @returns the value of the environment variable named in `x`
-#' @export
-check_envvar <- function(x) {
-  cst_msg <- cst_error_msg("The environment variable {.envvar {x}} is not set")
-  check_scalar_type(Sys.getenv(x, NA_character_), "string", cst_msg)
-}
+#' Get Azure storage container
+#'
+#' The environment variable "AZ_STORAGE_EP" should be set. This provides the URL
+#'  for the default Azure storage endpoint.
+#' Use [list_container_names] to get a list of available container names.
+#'
+#' @param container_name Name of the container as a string. `NULL` by default,
+#'  which means the function will look instead for a container name stored in
+#'  the environment variable "AZ_CONTAINER"
+#' @param token An Azure authentication token. If left as `NULL`, a token
+#'  returned by [get_auth_token] will be used
+#' @param endpoint_url An Azure endpoint URL. If left as `NULL`, the default,
+#'  the value of the environment variable "AZ_STORAGE_EP" will be used
+#' @param ... arguments to be passed through to [get_auth_token], if a token is
+#'  not already supplied
+#' @returns An Azure blob container (list object of class "blob_container")
+#' @export
+get_container <- function(
+  container_name = NULL,
+  token = NULL,
+  endpoint_url = NULL,
+  ...
+) {
+  msg1 <- paste0(
+    "{.var container_name} is empty. ",
+    "Did you forget to set an environment variable?"
+  )
+  msg2 <- paste0(
+    "{.var endpoint_url} is empty. ",
+    "Did you forget to set an environment variable?"
+  )
+  container_name <- (container_name %||% check_envvar("AZ_CONTAINER")) |>
+    check_nzchar(msg1)
+  endpoint_url <- (endpoint_url %||% check_envvar("AZ_STORAGE_EP")) |>
+    check_nzchar(msg2)
+  token <- token %||% get_auth_token(...)
+
+  get_azure_endpoint(token, endpoint_url) |>
+    AzureStor::blob_container(container_name)
+}
+
+
+#' Return a list of container names that are found at the endpoint
+#'
+#' @inheritParams get_container
+#' @returns A character vector of all container names found
+#' @export
+list_container_names <- function(token = NULL, endpoint_url = NULL, ...) {
+  token <- token %||% get_auth_token(...)
+  endpoint <- get_azure_endpoint(token, endpoint_url)
+  container_list <- AzureStor::list_blob_containers(endpoint)
+  stopifnot("no containers found" = length(container_list) >= 1L)
+  names(container_list)
+}
+
+
+#' Return an Azure "blob_endpoint"
+#'
+#' This function will return the endpoint specified in the environment variable
+#' "AZ_STORAGE_EP" by default
+#'
+#' @inheritParams get_container
+#' @returns An Azure blob endpoint (object of class "blob_endpoint")
+#' @keywords internal
+get_azure_endpoint <- function(token = NULL, endpoint_url = NULL, ...) {
+  token <- token %||% get_auth_token(...)
+  endpoint_url <- endpoint_url %||% check_envvar("AZ_STORAGE_EP")
+  AzureStor::blob_endpoint(endpoint_url, token = token)
+}
+
+
+#' Check that an environment variable exists
+#'
+#' The function prints a helpful error if the variable is not found, else
+#'  it returns the value of `Sys.getenv(x)`
+#'
+#' @param x the *name* of the environment variable to be found and checked
+#' @returns the value of the environment variable named in `x`
+#' @export
+check_envvar <- function(x) {
+  cst_msg <- cst_error_msg("The environment variable {.envvar {x}} is not set")
+  check_scalar_type(Sys.getenv(x, NA_character_), "string", cst_msg)
+}
diff --git a/R/list_files.R b/R/list_files.R
index cfff3f3..99cad18 100644
--- a/R/list_files.R
+++ b/R/list_files.R
@@ -1,57 +1,57 @@
-#' List files in a container
-#'
-#' Recursively (or not, if desired) lists all files found in a container. Search
-#' can be restricted to a particular 'subdirectory' of the container, and/or
-#' to files with a specific extension. The function assumes that all file names
-#' end with a ".ext" extension of some sort.
-#'
-#' The function does not support filtering by file name, only by file extension.
-#'
-#' The returned file list (character vector) contains the full paths to the
-#' files, ready to be passed perhaps to a `read_azure_*` function, or further
-#' filtered by you. If you just want the names of the files without the folder
-#' path, use [basename()] to extract these.
-#'
-#' @inheritParams read_azure_parquet
-#' @param path (optional) subdirectory of the container to list files within.
-#'  `""` (the root folder of the container) by default
-#' @param ext (optional) A string giving the extension of a particular file type
-#'  you want to restrict the list to. No need to include the initial ".". The
-#'  default, `""`, means no filtering by file extension will be applied. Can be
-#'  a regular expression.
-#' @param recursive A Boolean value: whether to list files recursively. `TRUE`
-#'  by default
-#'
-#' @importFrom rlang .data
-#' @returns A vector of file names, or an empty character vector if none found
-#' @examples \dontrun{
-#'   list_files(get_container("example"), ext = "csv")
-#' }
-#' @export
-list_files <- function(container, path = "", ext = "", recursive = TRUE) {
-  stopifnot(rlang::is_character(c(path, ext), 2))
-  stopifnot(rlang::is_bool(recursive))
-  pnf_msg <- ct_error_msg("Path {.val {path}} not found")
-  check_that(path, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg)
-
-  tbl <- AzureStor::list_blobs(container, path, recursive = recursive)
-  if (nrow(tbl) > 0) {
-    ext_rx <- if (nzchar(ext)) sub("^\\.+", "", ext) else ".*" # nolint
-    tbl <- tbl |>
-      dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "\\.{ext_rx}$"))
-  }
-
-  # A zero-row tbl can result if `path` is initially empty, or via the filter
-  # step above. We handle this the same way, no matter which route led here.
-  if (nrow(tbl) == 0) {
-    fix_path <- \(p) sub("^/+$", "", sub("^([^/])(.*)", "/\\1\\2", p)) # nolint
-    ext <- if (nzchar(ext)) paste0(" ", ext)
-    msg <- "No{ext} files found in {.val [{container$name}]:{fix_path(path)}}"
-    if (rlang::is_interactive()) {
-      cli::cli_alert_info(msg)
-    }
-    invisible(character(0))
-  } else {
-    tbl[["name"]]
-  }
-}
+#' List files in a container
+#'
+#' Recursively (or not, if desired) lists all files found in a container. Search
+#' can be restricted to a particular 'subdirectory' of the container, and/or
+#' to files with a specific extension. The function assumes that all file names
+#' end with a ".ext" extension of some sort.
+#'
+#' The function does not support filtering by file name, only by file extension.
+#'
+#' The returned file list (character vector) contains the full paths to the
+#' files, ready to be passed perhaps to a `read_azure_*` function, or further
+#' filtered by you. If you just want the names of the files without the folder
+#' path, use [basename()] to extract these.
+#'
+#' @inheritParams read_azure_parquet
+#' @param path (optional) subdirectory of the container to list files within.
+#'  `""` (the root folder of the container) by default
+#' @param ext (optional) A string giving the extension of a particular file type
+#'  you want to restrict the list to. No need to include the initial ".". The
+#'  default, `""`, means no filtering by file extension will be applied. Can be
+#'  a regular expression.
+#' @param recursive A Boolean value: whether to list files recursively. `TRUE`
+#'  by default
+#'
+#' @importFrom rlang .data
+#' @returns A vector of file names, or an empty character vector if none found
+#' @examples \dontrun{
+#'   list_files(get_container("example"), ext = "csv")
+#' }
+#' @export
+list_files <- function(container, path = "", ext = "", recursive = TRUE) {
+  stopifnot(rlang::is_character(c(path, ext), 2))
+  stopifnot(rlang::is_bool(recursive))
+  pnf_msg <- ct_error_msg("Path {.val {path}} not found")
+  check_that(path, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg)
+
+  tbl <- AzureStor::list_blobs(container, path, recursive = recursive)
+  if (nrow(tbl) > 0) {
+    ext_rx <- if (nzchar(ext)) sub("^\\.+", "", ext) else ".*" # nolint
+    tbl <- tbl |>
+      dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "\\.{ext_rx}$"))
+  }
+
+  # A zero-row tbl can result if `path` is initially empty, or via the filter
+  # step above. We handle this the same way, no matter which route led here.
+  if (nrow(tbl) == 0) {
+    fix_path <- \(p) sub("^/+$", "", sub("^([^/])(.*)", "/\\1\\2", p)) # nolint
+    ext <- if (nzchar(ext)) paste0(" ", ext)
+    msg <- "No{ext} files found in {.val [{container$name}]:{fix_path(path)}}"
+    if (rlang::is_interactive()) {
+      cli::cli_alert_info(msg)
+    }
+    invisible(character(0))
+  } else {
+    tbl[["name"]]
+  }
+}
diff --git a/R/read_azure_files.R b/R/read_azure_files.R
index e76183b..70c5d5e 100644
--- a/R/read_azure_files.R
+++ b/R/read_azure_files.R
@@ -1,163 +1,163 @@
-#' Read a parquet file from Azure storage
-#'
-#' @param container An Azure container object, as returned by [get_container]
-#' @param file The name of the file to be read, as a string. NB The file
-#'  extension does not need to be included (though it can be). The function
-#'  will error if multiple files are somehow matched.
-#' @param path The path to the directory where `file` is located, as a string.
-#'  Only needed if `file` does not already contain its full path. If file is
-#'  just a file name with no path, then provide the path to the directory here.
-#'  This must be the full path to the file location, as the function will not
-#'  search into subdirectories recursively. Set to `"/"` (the root of the
-#'  container) by default.
-#' @param info Boolean. Whether to print user feedback about the file that is
-#'  being read. Useful for checking the function is doing what is expected, but
-#'  can be turned off with `FALSE`. Can be set persistently with the option
-#'  "azkit.info". If `NULL` then it will default to the value of
-#'  [rlang::is_interactive] (that is, `TRUE` for interactive sessions).
-#' @param ... optional arguments to be passed through to [arrow::read_parquet]
-#' @returns A tibble
-#' @examples \dontrun{
-#'   # if a full filepath is available then path can be ignored
-#'   read_azure_parquet(cont, "data/folder/path/1.parquet")
-#'   # you can provide a filename without the '.parquet' extension
-#'   # if you wish to use this partial file name matching it is probably easier
-#'   # to provide a 'path'
-#'   read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29")
-#' }
-#' @export
-read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "parquet", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
-    arrow::read_parquet(...)
-}
-
-
-#' Read a json file from Azure storage
-#'
-#' @inheritParams read_azure_parquet
-#' @param ... optional arguments to be passed through to
-#'  [yyjsonr::read_json_raw]
-#' @returns A list
-#' @export
-read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "json", info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL) |>
-    yyjsonr::read_json_raw(...)
-}
-
-
-#' Read a json.gz file from Azure storage
-#'
-#' @inheritParams read_azure_parquet
-#' @param ... optional arguments to be passed through to
-#'  [yyjsonr::read_json_file]
-#' @returns A list
-#' @export
-read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
-  full_path <- check_blob_exists(container, file, "json.gz", info, path)
-  dl <- withr::local_tempfile(
-    pattern = tools::file_path_sans_ext(basename(full_path), TRUE),
-    fileext = "json.gz"
-  )
-  AzureStor::download_blob(container, src = full_path, dest = dl)
-  yyjsonr::read_json_file(dl, ...)
-}
-
-
-#' Read an rds file from Azure storage
-#'
-#' @inheritParams read_azure_parquet
-#' @param ... optional arguments to be passed through to
-#'  [AzureStor::storage_load_rds]. For example, a compression type (one of
-#'  c("unknown", "gzip", "bzip2", "xz", "zstd", "none")) can be provided using
-#'  the argument `type`, which will be passed on to [memDecompress] via
-#'  [AzureStor::storage_load_rds].
-#   If nothing is provided here, the compression type will be set to "none".
-#' @returns The data object that was stored in the rds file
-#' @export
-read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
-  # If the user doesn't specify a (de)compression type with `type` in `...`, we
-  # will set a `type` of "none", as this seems to be the standard on SU Azure
-  dots <- rlang::dots_list(..., type = "none", .homonyms = "first")
-  blob <- check_blob_exists(container, file, "rds", info, path)
-  rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots))
-}
-
-
-#' Read a csv file from Azure storage
-#'
-#' @inheritParams read_azure_parquet
-#' @param ... optional arguments to be passed through to [readr::read_delim]
-#' @returns A tibble
-#' @export
-read_azure_csv <- function(container, file, path = "/", info = NULL, ...) {
-  check_blob_exists(container, file, "csv", info, path) |>
-    AzureStor::storage_read_csv(container, file = _, ...)
-}
-
-
-#' Read any file from Azure storage
-#'
-#' @inheritParams read_azure_parquet
-#' @param ext If a custom extension needs to be supplied, you can specify it
-#'  here. If `NULL`, the default, the extension of `file` will be used
-#' @param ... optional arguments to be passed through to
-#'  [AzureStor::download_blob]
-#' @returns A raw data stream
-#' @export
-read_azure_file <- function(
-  container,
-  file,
-  path = "/",
-  info = NULL,
-  ext = NULL,
-  ...
-) {
-  ext <- ext %||% tools::file_ext(file)
-  check_blob_exists(container, file, ext, info, path) |>
-    # using `dest = NULL` means pass the data through as a raw vector
-    AzureStor::download_blob(container, src = _, dest = NULL, ...)
-}
-
-
-#' Ensures that the filepath for the file to read exists
-#'
-#' @inheritParams read_azure_parquet
-#' @param ext The standard file extension for the file type, e.g. "json"
-#' @keywords internal
-check_blob_exists <- function(container, file, ext, info, path) {
-  stopifnot("no container found" = inherits(container, "blob_container"))
-  path <- if (path %in% c("", "/")) "" else path
-  stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-  dir_name <- if (dirname(file) == ".") "" else dirname(file)
-  # Potentially the user could provide a partial file path in `path` and a
-  # further sub-directory as part of `file`. This handles that eventuality,
-  # though this usage pattern should be quite rare!
-  dpath <- file.path(path, dir_name)
-  fname <- basename(file)
-  if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) {
-    fname <- glue::glue("{fname}.{ext}")
-  }
-  # remove duplicate slashes and any initial slashes
-  file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname)))
-
-  filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |>
-    dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
-    dplyr::pull("name")
-
-  msg1 <- ct_error_msg("no matching {ext} file found")
-  msg2 <- cst_error_msg("multiple matching {ext} files found")
-  check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0
-  check_scalar_type(filepath_out, "character", msg2) # check length == 1
-
-  info_option <- getOption("azkit.info")
-  stopifnot(rlang::is_scalar_logical(info) || is.null(info))
-  stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option))
-  if (info %||% info_option %||% rlang::is_interactive()) {
-    cli::cli_alert_info("File {.val {filepath_out}} will be read in")
-  }
-  filepath_out
-}
+#' Read a parquet file from Azure storage
+#'
+#' @param container An Azure container object, as returned by [get_container]
+#' @param file The name of the file to be read, as a string. NB The file
+#'  extension does not need to be included (though it can be). The function
+#'  will error if multiple files are somehow matched.
+#' @param path The path to the directory where `file` is located, as a string.
+#'  Only needed if `file` does not already contain its full path. If file is
+#'  just a file name with no path, then provide the path to the directory here.
+#'  This must be the full path to the file location, as the function will not
+#'  search into subdirectories recursively. Set to `"/"` (the root of the
+#'  container) by default.
+#' @param info Boolean. Whether to print user feedback about the file that is
+#'  being read. Useful for checking the function is doing what is expected, but
+#'  can be turned off with `FALSE`. Can be set persistently with the option
+#'  "azkit.info". If `NULL` then it will default to the value of
+#'  [rlang::is_interactive] (that is, `TRUE` for interactive sessions).
+#' @param ... optional arguments to be passed through to [arrow::read_parquet]
+#' @returns A tibble
+#' @examples \dontrun{
+#'   # if a full filepath is available then path can be ignored
+#'   read_azure_parquet(cont, "data/folder/path/1.parquet")
+#'   # you can provide a filename without the '.parquet' extension
+#'   # if you wish to use this partial file name matching it is probably easier
+#'   # to provide a 'path'
+#'   read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29")
+#' }
+#' @export
+read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) {
+  check_blob_exists(container, file, "parquet", info, path) |>
+    # using `dest = NULL` means pass the data through as a raw vector
+    AzureStor::download_blob(container, src = _, dest = NULL) |>
+    arrow::read_parquet(...)
+}
+
+
+#' Read a json file from Azure storage
+#'
+#' @inheritParams read_azure_parquet
+#' @param ... optional arguments to be passed through to
+#'  [yyjsonr::read_json_raw]
+#' @returns A list
+#' @export
+read_azure_json <- function(container, file, path = "/", info = NULL, ...) {
+  check_blob_exists(container, file, "json", info, path) |>
+    # using `dest = NULL` means pass the data through as a raw vector
+    AzureStor::download_blob(container, src = _, dest = NULL) |>
+    yyjsonr::read_json_raw(...)
+}
+
+
+#' Read a json.gz file from Azure storage
+#'
+#' @inheritParams read_azure_parquet
+#' @param ... optional arguments to be passed through to
+#'  [yyjsonr::read_json_file]
+#' @returns A list
+#' @export
+read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) {
+  full_path <- check_blob_exists(container, file, "json.gz", info, path)
+  dl <- withr::local_tempfile(
+    pattern = tools::file_path_sans_ext(basename(full_path), TRUE),
+    fileext = "json.gz"
+  )
+  AzureStor::download_blob(container, src = full_path, dest = dl)
+  yyjsonr::read_json_file(dl, ...)
+}
+
+
+#' Read an rds file from Azure storage
+#'
+#' @inheritParams read_azure_parquet
+#' @param ... optional arguments to be passed through to
+#'  [AzureStor::storage_load_rds]. For example, a compression type (one of
+#'  c("unknown", "gzip", "bzip2", "xz", "zstd", "none")) can be provided using
+#'  the argument `type`, which will be passed on to [memDecompress] via
+#'  [AzureStor::storage_load_rds].
+#   If nothing is provided here, the compression type will be set to "none".
+#' @returns The data object that was stored in the rds file
+#' @export
+read_azure_rds <- function(container, file, path = "/", info = NULL, ...) {
+  # If the user doesn't specify a (de)compression type with `type` in `...`, we
+  # will set a `type` of "none", as this seems to be the standard on SU Azure
+  dots <- rlang::dots_list(..., type = "none", .homonyms = "first")
+  blob <- check_blob_exists(container, file, "rds", info, path)
+  rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots))
+}
+
+
+#' Read a csv file from Azure storage
+#'
+#' @inheritParams read_azure_parquet
+#' @param ... optional arguments to be passed through to [readr::read_delim]
+#' @returns A tibble
+#' @export
+read_azure_csv <- function(container, file, path = "/", info = NULL, ...) {
+  check_blob_exists(container, file, "csv", info, path) |>
+    AzureStor::storage_read_csv(container, file = _, ...)
+}
+
+
+#' Read any file from Azure storage
+#'
+#' @inheritParams read_azure_parquet
+#' @param ext If a custom extension needs to be supplied, you can specify it
+#'  here. If `NULL`, the default, the extension of `file` will be used
+#' @param ... optional arguments to be passed through to
+#'  [AzureStor::download_blob]
+#' @returns A raw data stream
+#' @export
+read_azure_file <- function(
+  container,
+  file,
+  path = "/",
+  info = NULL,
+  ext = NULL,
+  ...
+) {
+  ext <- ext %||% tools::file_ext(file)
+  check_blob_exists(container, file, ext, info, path) |>
+    # using `dest = NULL` means pass the data through as a raw vector
+    AzureStor::download_blob(container, src = _, dest = NULL, ...)
+}
+
+
+#' Ensures that the filepath for the file to read exists
+#'
+#' @inheritParams read_azure_parquet
+#' @param ext The standard file extension for the file type, e.g. "json"
+#' @keywords internal
+check_blob_exists <- function(container, file, ext, info, path) {
+  stopifnot("no container found" = inherits(container, "blob_container"))
+  path <- if (path %in% c("", "/")) "" else path
+  stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
+  dir_name <- if (dirname(file) == ".") "" else dirname(file)
+  # Potentially the user could provide a partial file path in `path` and a
+  # further sub-directory as part of `file`. This handles that eventuality,
+  # though this usage pattern should be quite rare!
+  dpath <- file.path(path, dir_name)
+  fname <- basename(file)
+  if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) {
+    fname <- glue::glue("{fname}.{ext}")
+  }
+  # remove duplicate slashes and any initial slashes
+  file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname)))
+
+  filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |>
+    dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
+    dplyr::pull("name")
+
+  msg1 <- ct_error_msg("no matching {ext} file found")
+  msg2 <- cst_error_msg("multiple matching {ext} files found")
+  check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0
+  check_scalar_type(filepath_out, "character", msg2) # check length == 1
+
+  info_option <- getOption("azkit.info")
+  stopifnot(rlang::is_scalar_logical(info) || is.null(info))
+  stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option))
+  if (info %||% info_option %||% rlang::is_interactive()) {
+    cli::cli_alert_info("File {.val {filepath_out}} will be read in")
+  }
+  filepath_out
+}
diff --git a/R/read_azure_table.R b/R/read_azure_table.R
index 5a85452..d95c3b4 100644
--- a/R/read_azure_table.R
+++ b/R/read_azure_table.R
@@ -1,39 +1,39 @@
-#' Read in data from an Azure table
-#'
-#' @param table_name name of the table to be read. If left as `NULL`,
-#'  the default, the function will look instead for a value stored in the
-#'  environment variable "AZ_TABLE_NAME"
-#' @param table_endpoint URL of the Azure table endpoint. If left as `NULL`,
-#'  the default, the function will look instead for a value stored in the
-#'  environment variable "AZ_TABLE_EP"
-#' @param ... parameters to be passed through to [get_auth_token]
-#' @inheritParams get_container
-#' @returns A tibble
-#' @export
-read_azure_table <- function(
-  table_name = NULL,
-  table_endpoint = NULL,
-  token = NULL,
-  ...
-) {
-  table_name <- table_name %||% check_envvar("AZ_TABLE_NAME")
-  table_ep <- table_endpoint %||% check_envvar("AZ_TABLE_EP")
-  token <- token %||% get_auth_token(...)
-  access_token <- token |>
-    purrr::pluck("credentials", "access_token")
-  headers <- list("2025-11-05", "application/json;odata=nometadata") |>
-    purrr::set_names(c("x-ms-version", "Accept"))
-
-  resp <- httr2::request(table_ep) |>
-    httr2::req_url_path_append(table_name) |>
-    httr2::req_auth_bearer_token(access_token) |>
-    httr2::req_headers(!!!headers) |>
-    httr2::req_perform() |>
-    httr2::resp_check_status()
-
-  resp |>
-    httr2::resp_body_json() |>
-    purrr::pluck("value") |>
-    purrr::map(tibble::as_tibble) |>
-    purrr::list_rbind()
-}
+#' Read in data from an Azure table
+#'
+#' @param table_name name of the table to be read. If left as `NULL`,
+#'  the default, the function will look instead for a value stored in the
+#'  environment variable "AZ_TABLE_NAME"
+#' @param table_endpoint URL of the Azure table endpoint. If left as `NULL`,
+#'  the default, the function will look instead for a value stored in the
+#'  environment variable "AZ_TABLE_EP"
+#' @param ... parameters to be passed through to [get_auth_token]
+#' @inheritParams get_container
+#' @returns A tibble
+#' @export
+read_azure_table <- function(
+  table_name = NULL,
+  table_endpoint = NULL,
+  token = NULL,
+  ...
+) {
+  table_name <- table_name %||% check_envvar("AZ_TABLE_NAME")
+  table_ep <- table_endpoint %||% check_envvar("AZ_TABLE_EP")
+  token <- token %||% get_auth_token(...)
+  access_token <- token |>
+    purrr::pluck("credentials", "access_token")
+  headers <- list("2025-11-05", "application/json;odata=nometadata") |>
+    purrr::set_names(c("x-ms-version", "Accept"))
+
+  resp <- httr2::request(table_ep) |>
+    httr2::req_url_path_append(table_name) |>
+    httr2::req_auth_bearer_token(access_token) |>
+    httr2::req_headers(!!!headers) |>
+    httr2::req_perform() |>
+    httr2::resp_check_status()
+
+  resp |>
+    httr2::resp_body_json() |>
+    purrr::pluck("value") |>
+    purrr::map(tibble::as_tibble) |>
+    purrr::list_rbind()
+}
diff --git a/tests/testthat/test-read_azure_files.R b/tests/testthat/test-read_azure_files.R
index 1f9e0fc..de7432e 100644
--- a/tests/testthat/test-read_azure_files.R
+++ b/tests/testthat/test-read_azure_files.R
@@ -1,343 +1,343 @@
-test_that("basic success", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (ie locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    token <- get_auth_token()
-    expect_false(is.null(token))
-    expect_s3_class(token, "AzureToken")
-
-    # explore behaviour with blob_endpoint
-    ep <- AzureStor::blob_endpoint(endpoint_uri, token = token) |>
-      expect_no_error()
-    expect_s3_class(ep, "blob_endpoint")
-    inputs_data <- AzureStor::blob_container(ep, "inputs-data") |>
-      expect_no_error()
-    expect_s3_class(inputs_data, "blob_container")
-    files <- AzureStor::list_blobs(inputs_data, "dev", recursive = FALSE) |>
-      dplyr::filter(grepl("\\.parquet$", .data[["name"]])) |>
-      dplyr::pull("name")
-    name <- "baseline"
-    pqt_file <- grep(name, files, value = TRUE)
-    expect_length(pqt_file, 1)
-    expect_true(AzureStor::blob_exists(inputs_data, pqt_file))
-    out <- AzureStor::download_blob(inputs_data, pqt_file, dest = NULL) |>
-      arrow::read_parquet()
-    expect_s3_class(out, "tbl_df")
-    expect_length(out, 6) # ncol
-  }
-})
-
-
-test_that("understand some new errors in check_blob_exists", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    inp <- expect_no_error(get_container("inputs-data"))
-    path <- "dev"
-    file <- "baseline"
-    ext <- "parquet"
-    path <- if (path %in% c("", "/")) "" else path
-    expect_equal(path, "dev")
-    dir_name <- if (dirname(file) == ".") "" else dirname(file)
-    expect_equal(dir_name, "")
-    p2 <- file.path(path, dir_name)
-    expect_equal(p2, "dev")
-    file_name <- paste0(basename(file), ".", ext)
-    expect_equal(file_name, "baseline.parquet")
-    file_path <- sub("^/", "", sub("/+", "/", file.path(p2, file_name)))
-    expect_equal(file_path, "dev/baseline.parquet")
-    dir_list <- AzureStor::list_blobs(inp, p2, recursive = FALSE)
-    file_name_out <- dir_list |>
-      dplyr::filter(dplyr::if_any("name", \(x) x == file_path)) |>
-      dplyr::pull("name")
-    expect_equal(file_name_out, "dev/baseline.parquet")
-    expect_no_error(check_blob_exists(inp, file, ext, FALSE, path))
-
-    # check still works if full filepath is passed to file arg
-    path <- ""
-    file <- "dev/baseline.parquet"
-    path <- if (path %in% c("", "/")) "" else path
-    expect_equal(path, "")
-    dir_name <- if (dirname(file) == ".") "" else dirname(file)
-    expect_equal(dir_name, "dev")
-    p2 <- glue::glue("{path}/{dir_name}")
-    expect_equal(p2, "/dev")
-    file_name <- basename(file)
-    expect_equal(file_name, "baseline.parquet")
-    file_path <- sub("^/", "", sub("/+", "/", glue::glue("{p2}/{file_name}")))
-    expect_equal(file_path, "dev/baseline.parquet")
-    dir_list <- AzureStor::list_blobs(inp, p2, recursive = FALSE)
-    file_name_out <- dir_list |>
-      dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
-      dplyr::pull("name")
-    expect_equal(file_name_out, file)
-    expect_no_error(check_blob_exists(inp, file, ext, FALSE, path))
-  }
-})
-
-
-test_that("whole read_parquet function works", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    inputs_container <- expect_no_error(get_container("inputs-data"))
-    expect_no_error(read_azure_parquet(
-      inputs_container,
-      "baseline",
-      path = "dev"
-    ))
-    out1 <- read_azure_parquet(inputs_container, "baseline", path = "dev")
-    expect_s3_class(out1, "tbl_df")
-    # check that it works with the file extension included
-    out2 <- read_azure_parquet(
-      inputs_container,
-      "baseline.parquet",
-      path = "dev"
-    )
-    expect_length(out2, 6) # ncol
-
-    res <- get_container(Sys.getenv("AZ_RESULTS_CONTAINER"))
-    pqt_file <- Sys.getenv("TEST_PARQUET_FILE")
-    path <- "/"
-    file_ext <- "parquet"
-
-    # experiment with changes to code:
-    path <- if (path %in% c("", "/")) "" else path
-    expect_equal(path, "")
-    dir_name <- if (dirname(pqt_file) == ".") "" else dirname(pqt_file)
-    dpath <- glue::glue("{path}/{dir_name}")
-    file_name <- sub(glue::glue("\\.{file_ext}$"), "", basename(pqt_file))
-
-    file_name <- if (gregg(basename(pqt_file), "\\.{file_ext}$")) {
-      basename(pqt_file)
-    } else {
-      glue::glue("{basename(pqt_file)}.{file_ext}")
-    }
-    # remove duplicate slashes and any initial slashes
-    fpath <- sub("^/", "", sub("/+", "/", glue::glue("{dpath}/{file_name}")))
-    expect_equal(fpath, pqt_file)
-    # now function should run without error
-    expect_no_error(check_blob_exists(res, pqt_file, "parquet", FALSE, "/"))
-    # we want this to error if the file_ext doesn't match the file
-    expect_error(check_blob_exists(res, pqt_file, "rds", FALSE, "/"))
-  }
-})
-
-
-test_that("read_azure_json basically works", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    download_azure_blob <- function(container, file, file_ext, path = "") {
-      check_blob_exists(container, file, file_ext, FALSE, path) |>
-        AzureStor::download_blob(container, src = _, dest = NULL)
-    }
-    expect_no_error(support_container <- get_container("supporting-data"))
-    raw_out <- support_container |>
-      download_azure_blob("providers", "json")
-    expect_type(raw_out, "raw")
-    expect_no_error(yyjsonr::read_json_raw(raw_out))
-    out <- yyjsonr::read_json_raw(raw_out)
-    expect_type(out, "character")
-    expect_length(out, 138)
-  }
-})
-
-test_that("dirname and basename logic works", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    expect_no_error(support_container <- get_container("supporting-data"))
-    file <- "neecom_table.rds"
-    path <- ""
-    expect_no_error(AzureStor::blob_exists(support_container, file))
-    file2 <- paste0("/", file)
-    file3 <- paste0("//", file)
-    expect_no_error(AzureStor::blob_exists(support_container, file2))
-    expect_no_error(AzureStor::blob_exists(support_container, file3))
-    expect_equal(dirname(file), ".")
-    file <- paste0(path, "/", file)
-    expect_equal(dirname(file), "/")
-    path <- "/"
-    file <- paste0(path, "/", file)
-    expect_equal(dirname(file), "/")
-
-    new_path <- sub("^\\.$", "/", dirname(file))
-    expect_equal(new_path, "/")
-    path <- "QA"
-    file <- "none.txt"
-    file <- paste0(path, "/", file)
-    new_path <- sub("^\\.$", "/", dirname(file))
-    expect_equal(new_path, "QA")
-
-    res <- get_container("results")
-    path <- "prod/dev/national/national"
-    file <- "test-2025"
-    file_ext <- "json.gz"
-    filepath <- sub("^/+", "", paste0(path, "/", file))
-    expect_equal(filepath, "prod/dev/national/national/test-2025")
-
-    path <- "/"
-    filepath <- sub("^/+", "", paste0(path, "/", file))
-    expect_equal(dirname(filepath), ".")
-
-    path <- "prod/dev/national/national"
-    filepath <- sub("^/+", "", paste0(path, "/", file))
-    expect_equal(filepath, "prod/dev/national/national/test-2025")
-    path <- sub("^\\.$", "/", dirname(filepath))
-
-    filepath_out <- AzureStor::list_blobs(res, path, recursive = FALSE) |>
-      dplyr::filter(
-        !dplyr::if_any("isdir") &
-          dplyr::if_any("name", \(x) {
-            gregg(x, "\\.{file_ext}$") & gregg(x, "^{filepath}")
-          })
-      ) |>
-      dplyr::pull("name")
-    expect_match(filepath_out, "prod/dev/national/national/test-2025.*json.gz")
-  }
-})
-
-test_that("tdd of check_blob_exists", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    expect_no_error(support_container <- get_container("supporting-data"))
-    check_blob_exists <- function(container, file, path = "/") {
-      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-    }
-    expect_no_error(check_blob_exists(support_container, "file"))
-
-    check_blob_exists <- function(container, file, path = "/") {
-      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-      file_ext <- "json"
-      AzureStor::list_blobs(container, path, recursive = FALSE) |>
-        dplyr::filter(
-          !dplyr::if_any("isdir") &
-            dplyr::if_any("name", \(x) {
-              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
-            })
-        ) |>
-        dplyr::pull("name")
-    }
-    expect_no_error(check_blob_exists(support_container, "sites"))
-    expect_length(check_blob_exists(support_container, "sites"), 2)
-
-    check_blob_exists <- function(container, file, path = "/") {
-      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-      file_ext <- "json"
-      filepath <- AzureStor::list_blobs(container, path, recursive = FALSE) |>
-        dplyr::filter(
-          !dplyr::if_any("isdir") &
-            dplyr::if_any("name", \(x) {
-              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
-            })
-        ) |>
-        dplyr::pull("name")
-      stop_msg1 <- glue::glue("no matching {file_ext} file found")
-      stop_msg2 <- glue::glue("multiple matching {file_ext} files found")
-      check_that(filepath, \(x) length(x) > 0, stop_msg1) # check length > 0
-      check_scalar_type(filepath, "character", stop_msg2) # check length == 1
-    }
-    expect_error(check_blob_exists(support_container, "unmatched"), "matching")
-    expect_error(check_blob_exists(support_container, "sites"), "multiple")
-
-    check_blob_exists <- function(container, file, path = "/") {
-      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
-      file_ext <- "json"
-      filepath <- AzureStor::list_blobs(container, path, recursive = FALSE) |>
-        dplyr::filter(
-          !dplyr::if_any("isdir") &
-            dplyr::if_any("name", \(x) {
-              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
-            })
-        ) |>
-        dplyr::pull("name")
-      stop_msg1 <- glue::glue("no matching {file_ext} file found")
-      stop_msg2 <- glue::glue("multiple matching {file_ext} files found")
-      check_that(filepath, \(x) length(x) > 0, stop_msg1) # check length > 0
-      check_scalar_type(filepath, "character", stop_msg2) # check length == 1
-      filepath
-    }
-    expect_no_error(check_blob_exists(support_container, "providers"))
-    expect_no_error(check_blob_exists(support_container, "providers.json"))
-  }
-})
-
-
-# parquet and json read functions need `download_blob` first before reading in.
-# However there are 'native' {AzureStor} functions for csv and rds files, so we
-# should use those instead. Requiring a slightly different (simpler) workflow.
-
-test_that("read_azure_csv basically works", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    download_azure_blob <- function(container, file, file_ext, path = "") {
-      check_blob_exists(container, file, file_ext, FALSE, path) |>
-        AzureStor::download_blob(container, src = _, dest = NULL)
-    }
-    expect_no_error(support_container <- get_container("supporting-data"))
-    support_container |>
-      # should construct the file name OK
-      download_azure_blob("mitigator-lookup", "csv") |>
-      expect_no_error()
-    support_container |>
-      download_azure_blob("mitigator-lookup.csv", "csv") |>
-      expect_no_error()
-    raw_out <- support_container |>
-      download_azure_blob("mitigator-lookup.csv", "csv")
-    expect_type(raw_out, "raw")
-    expect_no_error(readr::read_csv(raw_out, show_col_types = FALSE))
-    dat <- readr::read_csv(raw_out, show_col_types = FALSE)
-    expect_type(dat, "list")
-    expect_s3_class(dat, "tbl_df")
-  }
-})
-
-
-test_that("... parameters are passed through", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    download_azure_blob <- function(container, file, file_ext, path = "") {
-      check_blob_exists(container, file, file_ext, FALSE, path) |>
-        AzureStor::download_blob(container, src = _, dest = NULL)
-    }
-    expect_no_error(support_container <- get_container("supporting-data"))
-    col_types <- "ccc------"
-    csv_out1 <- support_container |>
-      download_azure_blob("mitigator-lookup.csv", "csv") |>
-      readr::read_csv(col_types = col_types) |>
-      expect_no_error()
-    csv_out2 <- support_container |>
-      AzureStor::storage_read_csv(
-        "mitigator-lookup.csv",
-        col_types = col_types
-      ) |>
-      expect_no_error()
-    expect_identical(csv_out1, csv_out2)
-    expect_length(csv_out1, 3) # ncol
-  }
-})
-
-test_that("read functions all work a bit at least", {
-  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
-  # only run the test if this variable is set (i.e. locally, but not on GitHub)
-  if (nzchar(endpoint_uri)) {
-    res <- get_container(Sys.getenv("AZ_RESULTS_CONTAINER"))
-    expect_no_error(read_azure_parquet(res, Sys.getenv("TEST_PARQUET_FILE")))
-    supp <- get_container(Sys.getenv("AZ_SUPPORT_CONTAINER"))
-    expect_no_error(read_azure_json(supp, Sys.getenv("TEST_JSON_FILE")))
-    expect_no_error(read_azure_rds(supp, Sys.getenv("TEST_RDS_FILE")))
-    read_azure_csv(supp, Sys.getenv("TEST_CSV_FILE"), show_col_types = FALSE) |>
-      expect_no_error()
-  }
-})
-
-
-test_that("check that sub-functions inherit options in R!", {
-  yes <- \() getOption("do_it") %||% "nope"
-  wrap <- \() yes()
-  expect_identical(withr::with_options(list(do_it = "yes"), wrap()), "yes")
-})
+test_that("basic success", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (ie locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    token <- get_auth_token()
+    expect_false(is.null(token))
+    expect_s3_class(token, "AzureToken")
+
+    # explore behaviour with blob_endpoint
+    ep <- AzureStor::blob_endpoint(endpoint_uri, token = token) |>
+      expect_no_error()
+    expect_s3_class(ep, "blob_endpoint")
+    inputs_data <- AzureStor::blob_container(ep, "inputs-data") |>
+      expect_no_error()
+    expect_s3_class(inputs_data, "blob_container")
+    files <- AzureStor::list_blobs(inputs_data, "dev", recursive = FALSE) |>
+      dplyr::filter(grepl("\\.parquet$", .data[["name"]])) |>
+      dplyr::pull("name")
+    name <- "baseline"
+    pqt_file <- grep(name, files, value = TRUE)
+    expect_length(pqt_file, 1)
+    expect_true(AzureStor::blob_exists(inputs_data, pqt_file))
+    out <- AzureStor::download_blob(inputs_data, pqt_file, dest = NULL) |>
+      arrow::read_parquet()
+    expect_s3_class(out, "tbl_df")
+    expect_length(out, 6) # ncol
+  }
+})
+
+
+test_that("understand some new errors in check_blob_exists", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    inp <- expect_no_error(get_container("inputs-data"))
+    path <- "dev"
+    file <- "baseline"
+    ext <- "parquet"
+    path <- if (path %in% c("", "/")) "" else path
+    expect_equal(path, "dev")
+    dir_name <- if (dirname(file) == ".") "" else dirname(file)
+    expect_equal(dir_name, "")
+    p2 <- file.path(path, dir_name)
+    expect_equal(p2, "dev")
+    file_name <- paste0(basename(file), ".", ext)
+    expect_equal(file_name, "baseline.parquet")
+    file_path <- sub("^/", "", sub("/+", "/", file.path(p2, file_name)))
+    expect_equal(file_path, "dev/baseline.parquet")
+    dir_list <- AzureStor::list_blobs(inp, p2, recursive = FALSE)
+    file_name_out <- dir_list |>
+      dplyr::filter(dplyr::if_any("name", \(x) x == file_path)) |>
+      dplyr::pull("name")
+    expect_equal(file_name_out, "dev/baseline.parquet")
+    expect_no_error(check_blob_exists(inp, file, ext, FALSE, path))
+
+    # check still works if full filepath is passed to file arg
+    path <- ""
+    file <- "dev/baseline.parquet"
+    path <- if (path %in% c("", "/")) "" else path
+    expect_equal(path, "")
+    dir_name <- if (dirname(file) == ".") "" else dirname(file)
+    expect_equal(dir_name, "dev")
+    p2 <- glue::glue("{path}/{dir_name}")
+    expect_equal(p2, "/dev")
+    file_name <- basename(file)
+    expect_equal(file_name, "baseline.parquet")
+    file_path <- sub("^/", "", sub("/+", "/", glue::glue("{p2}/{file_name}")))
+    expect_equal(file_path, "dev/baseline.parquet")
+    dir_list <- AzureStor::list_blobs(inp, p2, recursive = FALSE)
+    file_name_out <- dir_list |>
+      dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |>
+      dplyr::pull("name")
+    expect_equal(file_name_out, file)
+    expect_no_error(check_blob_exists(inp, file, ext, FALSE, path))
+  }
+})
+
+
+test_that("whole read_parquet function works", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    inputs_container <- expect_no_error(get_container("inputs-data"))
+    expect_no_error(read_azure_parquet(
+      inputs_container,
+      "baseline",
+      path = "dev"
+    ))
+    out1 <- read_azure_parquet(inputs_container, "baseline", path = "dev")
+    expect_s3_class(out1, "tbl_df")
+    # check that it works with the file extension included
+    out2 <- read_azure_parquet(
+      inputs_container,
+      "baseline.parquet",
+      path = "dev"
+    )
+    expect_length(out2, 6) # ncol
+
+    res <- get_container(Sys.getenv("AZ_RESULTS_CONTAINER"))
+    pqt_file <- Sys.getenv("TEST_PARQUET_FILE")
+    path <- "/"
+    file_ext <- "parquet"
+
+    # experiment with changes to code:
+    path <- if (path %in% c("", "/")) "" else path
+    expect_equal(path, "")
+    dir_name <- if (dirname(pqt_file) == ".") "" else dirname(pqt_file)
+    dpath <- glue::glue("{path}/{dir_name}")
+    file_name <- sub(glue::glue("\\.{file_ext}$"), "", basename(pqt_file))
+
+    file_name <- if (gregg(basename(pqt_file), "\\.{file_ext}$")) {
+      basename(pqt_file)
+    } else {
+      glue::glue("{basename(pqt_file)}.{file_ext}")
+    }
+    # remove duplicate slashes and any initial slashes
+    fpath <- sub("^/", "", sub("/+", "/", glue::glue("{dpath}/{file_name}")))
+    expect_equal(fpath, pqt_file)
+    # now function should run without error
+    expect_no_error(check_blob_exists(res, pqt_file, "parquet", FALSE, "/"))
+    # we want this to error if the file_ext doesn't match the file
+    expect_error(check_blob_exists(res, pqt_file, "rds", FALSE, "/"))
+  }
+})
+
+
+test_that("read_azure_json basically works", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    download_azure_blob <- function(container, file, file_ext, path = "") {
+      check_blob_exists(container, file, file_ext, FALSE, path) |>
+        AzureStor::download_blob(container, src = _, dest = NULL)
+    }
+    expect_no_error(support_container <- get_container("supporting-data"))
+    raw_out <- support_container |>
+      download_azure_blob("providers", "json")
+    expect_type(raw_out, "raw")
+    expect_no_error(yyjsonr::read_json_raw(raw_out))
+    out <- yyjsonr::read_json_raw(raw_out)
+    expect_type(out, "character")
+    expect_length(out, 138)
+  }
+})
+
+test_that("dirname and basename logic works", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    expect_no_error(support_container <- get_container("supporting-data"))
+    file <- "neecom_table.rds"
+    path <- ""
+    expect_no_error(AzureStor::blob_exists(support_container, file))
+    file2 <- paste0("/", file)
+    file3 <- paste0("//", file)
+    expect_no_error(AzureStor::blob_exists(support_container, file2))
+    expect_no_error(AzureStor::blob_exists(support_container, file3))
+    expect_equal(dirname(file), ".")
+    file <- paste0(path, "/", file)
+    expect_equal(dirname(file), "/")
+    path <- "/"
+    file <- paste0(path, "/", file)
+    expect_equal(dirname(file), "/")
+
+    new_path <- sub("^\\.$", "/", dirname(file))
+    expect_equal(new_path, "/")
+    path <- "QA"
+    file <- "none.txt"
+    file <- paste0(path, "/", file)
+    new_path <- sub("^\\.$", "/", dirname(file))
+    expect_equal(new_path, "QA")
+
+    res <- get_container("results")
+    path <- "prod/dev/national/national"
+    file <- "test-2025"
+    file_ext <- "json.gz"
+    filepath <- sub("^/+", "", paste0(path, "/", file))
+    expect_equal(filepath, "prod/dev/national/national/test-2025")
+
+    path <- "/"
+    filepath <- sub("^/+", "", paste0(path, "/", file))
+    expect_equal(dirname(filepath), ".")
+
+    path <- "prod/dev/national/national"
+    filepath <- sub("^/+", "", paste0(path, "/", file))
+    expect_equal(filepath, "prod/dev/national/national/test-2025")
+    path <- sub("^\\.$", "/", dirname(filepath))
+
+    filepath_out <- AzureStor::list_blobs(res, path, recursive = FALSE) |>
+      dplyr::filter(
+        !dplyr::if_any("isdir") &
+          dplyr::if_any("name", \(x) {
+            gregg(x, "\\.{file_ext}$") & gregg(x, "^{filepath}")
+          })
+      ) |>
+      dplyr::pull("name")
+    expect_match(filepath_out, "prod/dev/national/national/test-2025.*json.gz")
+  }
+})
+
+test_that("tdd of check_blob_exists", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    expect_no_error(support_container <- get_container("supporting-data"))
+    check_blob_exists <- function(container, file, path = "/") {
+      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
+    }
+    expect_no_error(check_blob_exists(support_container, "file"))
+
+    check_blob_exists <- function(container, file, path = "/") {
+      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
+      file_ext <- "json"
+      AzureStor::list_blobs(container, path, recursive = FALSE) |>
+        dplyr::filter(
+          !dplyr::if_any("isdir") &
+            dplyr::if_any("name", \(x) {
+              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
+            })
+        ) |>
+        dplyr::pull("name")
+    }
+    expect_no_error(check_blob_exists(support_container, "sites"))
+    expect_length(check_blob_exists(support_container, "sites"), 2)
+
+    check_blob_exists <- function(container, file, path = "/") {
+      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
+      file_ext <- "json"
+      filepath <- AzureStor::list_blobs(container, path, recursive = FALSE) |>
+        dplyr::filter(
+          !dplyr::if_any("isdir") &
+            dplyr::if_any("name", \(x) {
+              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
+            })
+        ) |>
+        dplyr::pull("name")
+      stop_msg1 <- glue::glue("no matching {file_ext} file found")
+      stop_msg2 <- glue::glue("multiple matching {file_ext} files found")
+      check_that(filepath, \(x) length(x) > 0, stop_msg1) # check length > 0
+      check_scalar_type(filepath, "character", stop_msg2) # check length == 1
+    }
+    expect_error(check_blob_exists(support_container, "unmatched"), "matching")
+    expect_error(check_blob_exists(support_container, "sites"), "multiple")
+
+    check_blob_exists <- function(container, file, path = "/") {
+      stopifnot("path not found" = AzureStor::blob_dir_exists(container, path))
+      file_ext <- "json"
+      filepath <- AzureStor::list_blobs(container, path, recursive = FALSE) |>
+        dplyr::filter(
+          !dplyr::if_any("isdir") &
+            dplyr::if_any("name", \(x) {
+              grepl(glue::glue("\\.{file_ext}$"), x) & grepl(file, x)
+            })
+        ) |>
+        dplyr::pull("name")
+      stop_msg1 <- glue::glue("no matching {file_ext} file found")
+      stop_msg2 <- glue::glue("multiple matching {file_ext} files found")
+      check_that(filepath, \(x) length(x) > 0, stop_msg1) # check length > 0
+      check_scalar_type(filepath, "character", stop_msg2) # check length == 1
+      filepath
+    }
+    expect_no_error(check_blob_exists(support_container, "providers"))
+    expect_no_error(check_blob_exists(support_container, "providers.json"))
+  }
+})
+
+
+# parquet and json read functions need `download_blob` first before reading in.
+# However there are 'native' {AzureStor} functions for csv and rds files, so we
+# should use those instead. Requiring a slightly different (simpler) workflow.
+
+test_that("read_azure_csv basically works", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    download_azure_blob <- function(container, file, file_ext, path = "") {
+      check_blob_exists(container, file, file_ext, FALSE, path) |>
+        AzureStor::download_blob(container, src = _, dest = NULL)
+    }
+    expect_no_error(support_container <- get_container("supporting-data"))
+    support_container |>
+      # should construct the file name OK
+      download_azure_blob("mitigator-lookup", "csv") |>
+      expect_no_error()
+    support_container |>
+      download_azure_blob("mitigator-lookup.csv", "csv") |>
+      expect_no_error()
+    raw_out <- support_container |>
+      download_azure_blob("mitigator-lookup.csv", "csv")
+    expect_type(raw_out, "raw")
+    expect_no_error(readr::read_csv(raw_out, show_col_types = FALSE))
+    dat <- readr::read_csv(raw_out, show_col_types = FALSE)
+    expect_type(dat, "list")
+    expect_s3_class(dat, "tbl_df")
+  }
+})
+
+
+test_that("... parameters are passed through", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    download_azure_blob <- function(container, file, file_ext, path = "") {
+      check_blob_exists(container, file, file_ext, FALSE, path) |>
+        AzureStor::download_blob(container, src = _, dest = NULL)
+    }
+    expect_no_error(support_container <- get_container("supporting-data"))
+    col_types <- "ccc------"
+    csv_out1 <- support_container |>
+      download_azure_blob("mitigator-lookup.csv", "csv") |>
+      readr::read_csv(col_types = col_types) |>
+      expect_no_error()
+    csv_out2 <- support_container |>
+      AzureStor::storage_read_csv(
+        "mitigator-lookup.csv",
+        col_types = col_types
+      ) |>
+      expect_no_error()
+    expect_identical(csv_out1, csv_out2)
+    expect_length(csv_out1, 3) # ncol
+  }
+})
+
+test_that("read functions all work a bit at least", {
+  endpoint_uri <- Sys.getenv("AZ_STORAGE_EP")
+  # only run the test if this variable is set (i.e. locally, but not on GitHub)
+  if (nzchar(endpoint_uri)) {
+    res <- get_container(Sys.getenv("AZ_RESULTS_CONTAINER"))
+    expect_no_error(read_azure_parquet(res, Sys.getenv("TEST_PARQUET_FILE")))
+    supp <- get_container(Sys.getenv("AZ_SUPPORT_CONTAINER"))
+    expect_no_error(read_azure_json(supp, Sys.getenv("TEST_JSON_FILE")))
+    expect_no_error(read_azure_rds(supp, Sys.getenv("TEST_RDS_FILE")))
+    read_azure_csv(supp, Sys.getenv("TEST_CSV_FILE"), show_col_types = FALSE) |>
+      expect_no_error()
+  }
+})
+
+
+test_that("check that sub-functions inherit options in R!", {
+  yes <- \() getOption("do_it") %||% "nope"
+  wrap <- \() yes()
+  expect_identical(withr::with_options(list(do_it = "yes"), wrap()), "yes")
+})