diff --git a/R/get_auth_token.R b/R/get_auth_token.R index 9e8a0b8..cf8044e 100644 --- a/R/get_auth_token.R +++ b/R/get_auth_token.R @@ -1,275 +1,270 @@ -#' Get Azure authentication token -#' -#' This function retrieves an Azure token for a specified resource. -#' -#' If the environment variables `AZ_TENANT_ID`, `AZ_CLIENT_ID` and -#' `AZ_APP_SECRET` are all set, it will try to use these to return a token. -#' -#' Otherwise it will try to get a managed token from a managed resource such as -#' Azure VM or Azure App Service. -#' -#' If neither of these approaches has returned a token, it will try to retrieve -#' a user token using the provided parameters, requiring the user to have -#' authenticated using their device. If `force_refresh` is set to `TRUE`, a -#' fresh web authentication process should be launched. Otherwise it will -#' attempt to use a cached token matching the given `resource`, `tenant` and -#' `aad_version`. -#' -#' @param resource For v1, a simple URL such as `"https://storage.azure.com/"` -#' should be supplied.For v2, a vector specifying the URL of the Azure resource -#' for which the token is requested as well as any desired scopes. See -#' [AzureAuth::get_azure_token] for details. Use [generate_resource] -#' to help provide an appropriate string or vector. The values default to -#' `c("https://storage.azure.com/.default", "openid", "offline_access")`. -#' If setting version to 2, ensure that the `aad_version` argument is also set -#' to 2. Both are set to use AAD version 1 by default. -#' @param tenant A string specifying the Azure tenant. Defaults to -#' `"common"`. See [AzureAuth::get_azure_token] for other values. -#' @param client_id A string specifying the application ID (client ID). If -#' `NULL`, (the default) the function attempts to obtain the client ID from the -#' Azure Resource Manager token, or prompts the user to log in to obtain it. -#' @param auth_method A string specifying the authentication method. Defaults to -#' `"authorization_code"`. See [AzureAuth::get_azure_token] for other values. -#' @param aad_version Numeric. The AAD version, either 1 or 2 (1 by default) -#' @param force_refresh Boolean: whether to use a stored token if available -#' (`FALSE`, the default), or try to obtain a new one from Azure (`TRUE`). -#' This may be useful if you wish to generate a new token with the same -#' `resource` value as an existing token, but a different `tenant` or -#' `auth_method`. Note that you can also try using [refresh_token] which will -#' cause an existing token to refresh itself, without obtaining a new token -#' from Azure via online reauthentication -#' @param ... Optional arguments (`token_args` or `use_cache`) to be passed on -#' to [AzureAuth::get_managed_token] or [AzureAuth::get_azure_token]. -#' -#' @returns An Azure token object -#' @examples -#' \dontrun{ -#' # Get a token for the default resource -#' token <- get_auth_token() -#' -#' # Force generation of a new token via online reauthentication -#' token <- get_auth_token(force_refresh = TRUE) -#' -#' # Get a token for a specific resource and tenant -#' token <- get_auth_token( -#' resource = "https://graph.microsoft.com", -#' tenant = "my-tenant-id" -#' ) -#' -#' # Get a token using a specific app ID -#' token <- get_auth_token(client_id = "my-app-id") -#' } -#' @export -get_auth_token <- function( - resource = generate_resource(), - tenant = "common", - client_id = NULL, - auth_method = "authorization_code", - aad_version = 1, - force_refresh = FALSE, - ... -) { - aad_msg <- "Invalid {.arg aad_version} variable supplied (must be 1 or 2)" - aad_version <- check_that(aad_version, \(x) x %in% seq(2), aad_msg) - - safely_get_token <- \(...) purrr::safely(AzureAuth::get_azure_token)(...) - get_azure_token <- purrr::partial( - safely_get_token, - resource = resource, - version = aad_version - ) - possibly_get_mtk <- \(...) purrr::possibly(AzureAuth::get_managed_token)(...) - - dots <- rlang::list2(...) - # If the user specifies force_refresh = TRUE we turn off `use_cache`, - # otherwise we leave `use_cache` as it is (or as `NULL`, its default value) - use_cached <- !force_refresh && (dots[["use_cache"]] %||% TRUE) - dots <- rlang::dots_list(!!!dots, use_cache = use_cached, .homonyms = "last") - - # We have 4 approaches to get a token, depending on the context - # 1. Use environment variables if all three are set - token_resp <- rlang::inject(try_token_from_vars(get_azure_token, !!!dots)) - token <- token_resp[["result"]] - token_error <- token_resp[["error"]] - - # 2. Try to get a managed token (for example on Azure VM, App Service) - if (is.null(token)) { - token <- rlang::inject(possibly_get_mtk(resource, !!!dots)) - } - - # 3. If neither of those has worked, try to get an already stored user token - # (unless `force_refresh` is on, in which case skip to option 4) - if (is.null(token) && use_cached) { - token <- match_cached_token(resource, tenant, aad_version) - } - - # 4. If we still don't have a token, try to get a new one via reauthentication - if (is.null(token)) { - if (!force_refresh) { - cli::cli_alert_info("No matching cached token found: fetching new token") - } - client_id <- client_id %||% get_client_id() - token_resp <- rlang::inject( - get_azure_token( - tenant = tenant, - app = client_id, - auth_type = auth_method, - !!!dots - ) - ) - token <- token_resp[["result"]] - token_error <- token_error %||% token_resp[["error"]] - } - - # Give some helpful feedback if the steps above have not succeeded - if (is.null(token) || length(token) == 0) { - cli::cli_alert_info("No authentication token was obtained.") - cli::cli_alert_info("Please check any variables you have supplied.") - cli::cli_alert_info( - "Alternatively, running {.fn AzureRMR::get_azure_login} or - {.fn AzureRMR::list_azure_tokens} may shed some light on the problem." - ) - error_msg <- "{.fn get_auth_token}: No authentication token was obtained." - cli::cli_abort(as.character(token_error %||% error_msg)) - } else { - if (aad_version == 2) { - check_that(token, AzureAuth::is_azure_v2_token, "Invalid token returned") - } else { - check_that(token, AzureAuth::is_azure_v1_token, "Invalid token returned") - } - } -} - - -#' Get token via app and secret environment variables -#' Sub-routine for `get_auth_token()` -#' @keywords internal -#' @returns A list with elements `result` and `error`. If this method is -#' successful, the `result` element will contain a token. -try_token_from_vars <- function(get_token_fun, ...) { - tenant_id_env <- Sys.getenv("AZ_TENANT_ID") - client_id_env <- Sys.getenv("AZ_CLIENT_ID") - client_secret <- Sys.getenv("AZ_APP_SECRET") - - if (all(nzchar(c(tenant_id_env, client_id_env, client_secret)))) { - rlang::inject( - get_token_fun( - tenant = tenant_id_env, - app = client_id_env, - password = client_secret, - ... - ) - ) - } else { - list(result = NULL, error = NULL) - } -} - - -#' Find an already cached token that matches desired parameters -#' Sub-routine for `get_auth_token()` -#' @keywords internal -#' @returns A token from local cache, or NULL if none matches -match_cached_token <- function(resource, tenant, aad_version) { - # list tokens already locally cached - local_tokens <- AzureAuth::list_azure_tokens() - if (length(local_tokens) > 0) { - resources <- purrr::map(local_tokens, "resource") - scopes <- purrr::map(local_tokens, list("scope", 1)) - resources <- purrr::map2_chr(resources, scopes, `%||%`) - tenants <- purrr::map_chr(local_tokens, "tenant") - versions <- purrr::map_int(local_tokens, "version") - - resource_index <- gregg(resources, "^{resource[[1]]}") - tenant_index <- tenants == tenant - version_index <- versions == aad_version - - # return a token matching `resource`, `tenant` and `version`, if any - token_index <- which(resource_index & tenant_index & version_index)[1] - if (!is.na(token_index)) local_tokens[[token_index]] else NULL - } else { - NULL - } -} - - -#' Sub-routine for `get_auth_token()` -#' -#' Pulled out mainly to tidy up the main function code a bit -#' @keywords internal -#' @returns A string (the client ID) -get_client_id <- function() { - pluck_client_id <- function() { - suppressMessages(AzureRMR::get_azure_login()) |> - purrr::pluck("token", "client", "client_id") - } - possibly_pluck_client_id <- \(...) purrr::possibly(pluck_client_id)(...) - azure_cli_default_client_id <- "04b07795-8ddb-461a-bbee-02f9e1bf7b46" - - client_id <- possibly_pluck_client_id() - # if that fails, prompt the user to log in, then try again... - if (is.null(client_id)) { - AzureRMR::create_azure_login() - # ...using the default Azure CLI client ID as a final fallback - client_id <- possibly_pluck_client_id() %||% azure_cli_default_client_id - } - client_id -} - - -#' Generate appropriate values for the `resource` parameter in [get_auth_token] -#' -#' A helper function to generate appropriate values. Ensure that the `version` -#' argument matches the `aad_version` argument to [get_auth_token]. -#' It's unlikely that you will ever want to set `authorise` to `FALSE` but it's -#' here as an option since [AzureAuth::get_azure_token] supports it. Similarly, -#' you are likely to want to keep `refresh` turned on (this argument has no -#' effect on v1 tokens, it only applies to v2). -#' -#' @param version numeric. The AAD version, either 1 or 2 (1 by default) -#' @param url The URL of the Azure resource host -#' @param path For v2, the path designating the access scope -#' @param authorise Boolean, whether to return a token with authorisation scope, -#' (TRUE, the default) or one that just provides authentication. You are -#' unlikely to want to turn this off -#' @param refresh Boolean, applies to v2 tokens only, whether to return a token -#' that has a refresh token also supplied. -#' @returns A scalar character, or (in most v2 situations) a character vector -#' @export -generate_resource <- function( - version = 1, - url = "https://storage.azure.com", - path = "/.default", - authorise = TRUE, - refresh = TRUE -) { - stopifnot("version must be 1 or 2" = version %in% seq(2)) - scopes <- if (refresh) c("openid", "offline_access") else "openid" - if (authorise) { - if (version == 2) { - c(paste0(url, path), scopes) - } else { - url - } - } else { - if (version == 2) { - scopes - } else { - "" - } - } -} - - -#' Use a token's internal `refresh()` method to refresh it -#' -#' This method avoids the need to refresh by re-authenticating online. It seems -#' like this only works with v1 tokens. v2 tokens always seem to refresh by -#' re-authenticating with Azure online. But v2 tokens _ought_ to refresh -#' automatically and not need manual refreshing. To instead generate a -#' completely fresh token, pass `use_cache = FALSE` or `force_refresh = TRUE` -#' to [get_auth_token]. -#' @param token An Azure authentication token -#' @returns An Azure authentication token -#' @export -refresh_token <- \(token) token$refresh() +#' Get Azure authentication token +#' +#' This function retrieves an Azure token for a specified resource. +#' +#' If the environment variables `AZ_TENANT_ID`, `AZ_CLIENT_ID` and +#' `AZ_APP_SECRET` are all set, it will try to use these to return a token. +#' +#' Otherwise it will try to get a managed token from a managed resource such as +#' Azure VM or Azure App Service. +#' +#' If neither of these approaches has returned a token, it will try to retrieve +#' a user token using the provided parameters, requiring the user to have +#' authenticated using their device. If `force_refresh` is set to `TRUE`, a +#' fresh web authentication process should be launched. Otherwise it will +#' attempt to use a cached token matching the given `resource`, `tenant` and +#' `aad_version`. +#' +#' @param resource For v1, a simple URL such as `"https://storage.azure.com/"` +#' should be supplied.For v2, a vector specifying the URL of the Azure resource +#' for which the token is requested as well as any desired scopes. See +#' [AzureAuth::get_azure_token] for details. Use [generate_resource] +#' to help provide an appropriate string or vector. The values default to +#' `c("https://storage.azure.com/.default", "openid", "offline_access")`. +#' If setting version to 2, ensure that the `aad_version` argument is also set +#' to 2. Both are set to use AAD version 1 by default. +#' @param tenant A string specifying the Azure tenant. Defaults to +#' `"common"`. See [AzureAuth::get_azure_token] for other values. +#' @param client_id A string specifying the application ID (client ID). If +#' `NULL`, (the default) the function attempts to obtain the client ID from the +#' Azure Resource Manager token, or prompts the user to log in to obtain it. +#' @param auth_method A string specifying the authentication method. Defaults to +#' `"authorization_code"`. See [AzureAuth::get_azure_token] for other values. +#' @param aad_version Numeric. The AAD version, either 1 or 2 (1 by default) +#' @param force_refresh Boolean: whether to use a stored token if available +#' (`FALSE`, the default), or try to obtain a new one from Azure (`TRUE`). +#' This may be useful if you wish to generate a new token with the same +#' `resource` value as an existing token, but a different `tenant` or +#' `auth_method`. Note that you can also try using [refresh_token] which will +#' cause an existing token to refresh itself, without obtaining a new token +#' from Azure via online reauthentication +#' @param ... Optional arguments (`token_args` or `use_cache`) to be passed on +#' to [AzureAuth::get_managed_token] or [AzureAuth::get_azure_token]. +#' +#' @returns An Azure token object +#' @examples +#' \dontrun{ +#' # Get a token for the default resource +#' token <- get_auth_token() +#' +#' # Force generation of a new token via online reauthentication +#' token <- get_auth_token(force_refresh = TRUE) +#' +#' # Get a token for a specific resource and tenant +#' token <- get_auth_token( +#' resource = "https://graph.microsoft.com", +#' tenant = "my-tenant-id" +#' ) +#' +#' # Get a token using a specific app ID +#' token <- get_auth_token(client_id = "my-app-id") +#' } +#' @export +get_auth_token <- function( + resource = generate_resource(), + tenant = "common", + client_id = NULL, + auth_method = "authorization_code", + aad_version = 1, + force_refresh = FALSE, + ... +) { + aad_msg <- "Invalid {.arg aad_version} variable supplied (must be 1 or 2)" + aad_version <- check_that(aad_version, \(x) x %in% seq(2), aad_msg) + + safely_get_token <- \(...) purrr::safely(AzureAuth::get_azure_token)(...) + get_azure_token <- purrr::partial( + safely_get_token, + resource = resource, + version = aad_version + ) + possibly_get_mtk <- \(...) purrr::possibly(AzureAuth::get_managed_token)(...) + + dots <- rlang::list2(...) + # If the user specifies force_refresh = TRUE we turn off `use_cache`, + # otherwise we leave `use_cache` as it is (or as `NULL`, its default value) + use_cached <- !force_refresh && (dots[["use_cache"]] %||% TRUE) + dots <- rlang::dots_list(!!!dots, use_cache = use_cached, .homonyms = "last") + + # We have 4 approaches to get a token, depending on the context + # 1. Use environment variables if all three are set + token_resp <- rlang::inject(try_token_from_vars(get_azure_token, !!!dots)) + token <- token_resp[["result"]] + token_error <- token_resp[["error"]] + + # 2. Try to get a managed token (for example on Azure VM, App Service) + if (is.null(token)) { + token <- rlang::inject(possibly_get_mtk(resource, !!!dots)) + } + + # 3. If neither of those has worked, try to get an already stored user token + # (unless `force_refresh` is on, in which case skip to option 4) + if (is.null(token) && use_cached) { + token <- match_cached_token(resource, tenant, aad_version) + } + + # 4. If we still don't have a token, try to get a new one via reauthentication + if (is.null(token)) { + if (!force_refresh) { + cli::cli_alert_info("No matching cached token found: fetching new token") + } + client_id <- client_id %||% get_client_id() + token_resp <- rlang::inject( + get_azure_token( + tenant = tenant, + app = client_id, + auth_type = auth_method, + !!!dots + ) + ) + token <- token_resp[["result"]] + token_error <- token_error %||% token_resp[["error"]] + } + + # Give some helpful feedback if the steps above have not succeeded + if (is.null(token) || length(token) == 0) { + cli::cli_alert_info("No authentication token was obtained.") + cli::cli_alert_info("Please check any variables you have supplied.") + cli::cli_alert_info( + "Alternatively, running {.fn AzureRMR::get_azure_login} or + {.fn AzureRMR::list_azure_tokens} may shed some light on the problem." + ) + error_msg <- "{.fn get_auth_token}: No authentication token was obtained." + cli::cli_abort(as.character(token_error %||% error_msg)) + } else { + if (aad_version == 2) { + check_that(token, AzureAuth::is_azure_v2_token, "Invalid token returned") + } else { + check_that(token, AzureAuth::is_azure_v1_token, "Invalid token returned") + } + } +} + +#' Get token via app and secret environment variables +#' Sub-routine for `get_auth_token()` +#' @keywords internal +#' @returns A list with elements `result` and `error`. If this method is +#' successful, the `result` element will contain a token. +try_token_from_vars <- function(get_token_fun, ...) { + tenant_id_env <- Sys.getenv("AZ_TENANT_ID") + client_id_env <- Sys.getenv("AZ_CLIENT_ID") + client_secret <- Sys.getenv("AZ_APP_SECRET") + + if (all(nzchar(c(tenant_id_env, client_id_env, client_secret)))) { + rlang::inject( + get_token_fun( + tenant = tenant_id_env, + app = client_id_env, + password = client_secret, + ... + ) + ) + } else { + list(result = NULL, error = NULL) + } +} + +#' Find an already cached token that matches desired parameters +#' Sub-routine for `get_auth_token()` +#' @keywords internal +#' @returns A token from local cache, or NULL if none matches +match_cached_token <- function(resource, tenant, aad_version) { + # list tokens already locally cached + local_tokens <- AzureAuth::list_azure_tokens() + if (length(local_tokens) > 0) { + resources <- purrr::map(local_tokens, "resource") + scopes <- purrr::map(local_tokens, list("scope", 1)) + resources <- purrr::map2_chr(resources, scopes, `%||%`) + tenants <- purrr::map_chr(local_tokens, "tenant") + versions <- purrr::map_int(local_tokens, "version") + + resource_index <- gregg(resources, "^{resource[[1]]}") + tenant_index <- tenants == tenant + version_index <- versions == aad_version + + # return a token matching `resource`, `tenant` and `version`, if any + token_index <- which(resource_index & tenant_index & version_index)[1] + if (!is.na(token_index)) local_tokens[[token_index]] else NULL + } else { + NULL + } +} + +#' Sub-routine for `get_auth_token()` +#' +#' Pulled out mainly to tidy up the main function code a bit +#' @keywords internal +#' @returns A string (the client ID) +get_client_id <- function() { + pluck_client_id <- function() { + suppressMessages(AzureRMR::get_azure_login()) |> + purrr::pluck("token", "client", "client_id") + } + possibly_pluck_client_id <- \(...) purrr::possibly(pluck_client_id)(...) + azure_cli_default_client_id <- "04b07795-8ddb-461a-bbee-02f9e1bf7b46" + + client_id <- possibly_pluck_client_id() + # if that fails, prompt the user to log in, then try again... + if (is.null(client_id)) { + AzureRMR::create_azure_login() + # ...using the default Azure CLI client ID as a final fallback + client_id <- possibly_pluck_client_id() %||% azure_cli_default_client_id + } + client_id +} + +#' Generate appropriate values for the `resource` parameter in [get_auth_token] +#' +#' A helper function to generate appropriate values. Ensure that the `version` +#' argument matches the `aad_version` argument to [get_auth_token]. +#' It's unlikely that you will ever want to set `authorise` to `FALSE` but it's +#' here as an option since [AzureAuth::get_azure_token] supports it. Similarly, +#' you are likely to want to keep `refresh` turned on (this argument has no +#' effect on v1 tokens, it only applies to v2). +#' +#' @param version numeric. The AAD version, either 1 or 2 (1 by default) +#' @param url The URL of the Azure resource host +#' @param path For v2, the path designating the access scope +#' @param authorise Boolean, whether to return a token with authorisation scope, +#' (TRUE, the default) or one that just provides authentication. You are +#' unlikely to want to turn this off +#' @param refresh Boolean, applies to v2 tokens only, whether to return a token +#' that has a refresh token also supplied. +#' @returns A scalar character, or (in most v2 situations) a character vector +#' @export +generate_resource <- function( + version = 1, + url = "https://storage.azure.com", + path = "/.default", + authorise = TRUE, + refresh = TRUE +) { + stopifnot("version must be 1 or 2" = version %in% seq(2)) + scopes <- if (refresh) c("openid", "offline_access") else "openid" + if (authorise) { + if (version == 2) { + c(paste0(url, path), scopes) + } else { + url + } + } else { + if (version == 2) { + scopes + } else { + "" + } + } +} + +#' Use a token's internal `refresh()` method to refresh it +#' +#' This method avoids the need to refresh by re-authenticating online. It seems +#' like this only works with v1 tokens. v2 tokens always seem to refresh by +#' re-authenticating with Azure online. But v2 tokens _ought_ to refresh +#' automatically and not need manual refreshing. To instead generate a +#' completely fresh token, pass `use_cache = FALSE` or `force_refresh = TRUE` +#' to [get_auth_token]. +#' @param token An Azure authentication token +#' @returns An Azure authentication token +#' @export +refresh_token <- \(token) token$refresh() diff --git a/R/get_container.R b/R/get_container.R index 31a5d3d..249276f 100644 --- a/R/get_container.R +++ b/R/get_container.R @@ -1,83 +1,80 @@ -#' Get Azure storage container -#' -#' The environment variable "AZ_STORAGE_EP" should be set. This provides the URL -#' for the default Azure storage endpoint. -#' Use [list_container_names] to get a list of available container names. -#' -#' @param container_name Name of the container as a string. `NULL` by default, -#' which means the function will look instead for a container name stored in -#' the environment variable "AZ_CONTAINER" -#' @param token An Azure authentication token. If left as `NULL`, a token -#' returned by [get_auth_token] will be used -#' @param endpoint_url An Azure endpoint URL. If left as `NULL`, the default, -#' the value of the environment variable "AZ_STORAGE_EP" will be used -#' @param ... arguments to be passed through to [get_auth_token], if a token is -#' not already supplied -#' @returns An Azure blob container (list object of class "blob_container") -#' @export -get_container <- function( - container_name = NULL, - token = NULL, - endpoint_url = NULL, - ... -) { - msg1 <- paste0( - "{.var container_name} is empty. ", - "Did you forget to set an environment variable?" - ) - msg2 <- paste0( - "{.var endpoint_url} is empty. ", - "Did you forget to set an environment variable?" - ) - container_name <- (container_name %||% check_envvar("AZ_CONTAINER")) |> - check_nzchar(msg1) - endpoint_url <- (endpoint_url %||% check_envvar("AZ_STORAGE_EP")) |> - check_nzchar(msg2) - token <- token %||% get_auth_token(...) - - get_azure_endpoint(token, endpoint_url) |> - AzureStor::blob_container(container_name) -} - - -#' Return a list of container names that are found at the endpoint -#' -#' @inheritParams get_container -#' @returns A character vector of all container names found -#' @export -list_container_names <- function(token = NULL, endpoint_url = NULL, ...) { - token <- token %||% get_auth_token(...) - endpoint <- get_azure_endpoint(token, endpoint_url) - container_list <- AzureStor::list_blob_containers(endpoint) - stopifnot("no containers found" = length(container_list) >= 1L) - names(container_list) -} - - -#' Return an Azure "blob_endpoint" -#' -#' This function will return the endpoint specified in the environment variable -#' "AZ_STORAGE_EP" by default -#' -#' @inheritParams get_container -#' @returns An Azure blob endpoint (object of class "blob_endpoint") -#' @keywords internal -get_azure_endpoint <- function(token = NULL, endpoint_url = NULL, ...) { - token <- token %||% get_auth_token(...) - endpoint_url <- endpoint_url %||% check_envvar("AZ_STORAGE_EP") - AzureStor::blob_endpoint(endpoint_url, token = token) -} - - -#' Check that an environment variable exists -#' -#' The function prints a helpful error if the variable is not found, else -#' it returns the value of `Sys.getenv(x)` -#' -#' @param x the *name* of the environment variable to be found and checked -#' @returns the value of the environment variable named in `x` -#' @export -check_envvar <- function(x) { - cst_msg <- cst_error_msg("The environment variable {.envvar {x}} is not set") - check_scalar_type(Sys.getenv(x, NA_character_), "string", cst_msg) -} +#' Get Azure storage container +#' +#' The environment variable "AZ_STORAGE_EP" should be set. This provides the URL +#' for the default Azure storage endpoint. +#' Use [list_container_names] to get a list of available container names. +#' +#' @param container_name Name of the container as a string. `NULL` by default, +#' which means the function will look instead for a container name stored in +#' the environment variable "AZ_CONTAINER" +#' @param token An Azure authentication token. If left as `NULL`, a token +#' returned by [get_auth_token] will be used +#' @param endpoint_url An Azure endpoint URL. If left as `NULL`, the default, +#' the value of the environment variable "AZ_STORAGE_EP" will be used +#' @param ... arguments to be passed through to [get_auth_token], if a token is +#' not already supplied +#' @returns An Azure blob container (list object of class "blob_container") +#' @export +get_container <- function( + container_name = NULL, + token = NULL, + endpoint_url = NULL, + ... +) { + msg1 <- paste0( + "{.var container_name} is empty. ", + "Did you forget to set an environment variable?" + ) + msg2 <- paste0( + "{.var endpoint_url} is empty. ", + "Did you forget to set an environment variable?" + ) + container_name <- (container_name %||% check_envvar("AZ_CONTAINER")) |> + check_nzchar(msg1) + endpoint_url <- (endpoint_url %||% check_envvar("AZ_STORAGE_EP")) |> + check_nzchar(msg2) + token <- token %||% get_auth_token(...) + + get_azure_endpoint(token, endpoint_url) |> + AzureStor::blob_container(container_name) +} + +#' Return a list of container names that are found at the endpoint +#' +#' @inheritParams get_container +#' @returns A character vector of all container names found +#' @export +list_container_names <- function(token = NULL, endpoint_url = NULL, ...) { + token <- token %||% get_auth_token(...) + endpoint <- get_azure_endpoint(token, endpoint_url) + container_list <- AzureStor::list_blob_containers(endpoint) + stopifnot("no containers found" = length(container_list) >= 1L) + names(container_list) +} + +#' Return an Azure "blob_endpoint" +#' +#' This function will return the endpoint specified in the environment variable +#' "AZ_STORAGE_EP" by default +#' +#' @inheritParams get_container +#' @returns An Azure blob endpoint (object of class "blob_endpoint") +#' @keywords internal +get_azure_endpoint <- function(token = NULL, endpoint_url = NULL, ...) { + token <- token %||% get_auth_token(...) + endpoint_url <- endpoint_url %||% check_envvar("AZ_STORAGE_EP") + AzureStor::blob_endpoint(endpoint_url, token = token) +} + +#' Check that an environment variable exists +#' +#' The function prints a helpful error if the variable is not found, else +#' it returns the value of `Sys.getenv(x)` +#' +#' @param x the *name* of the environment variable to be found and checked +#' @returns the value of the environment variable named in `x` +#' @export +check_envvar <- function(x) { + cst_msg <- cst_error_msg("The environment variable {.envvar {x}} is not set") + check_scalar_type(Sys.getenv(x, NA_character_), "string", cst_msg) +} diff --git a/R/helpers.R b/R/helpers.R index 18afcd0..05b7392 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -14,13 +14,12 @@ #' @export check_that <- function(x, predicate, message, pf = parent.frame()) { if (predicate(x)) { - x + x } else { - cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) + cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) } } - #' @export ct_error_msg <- \(text) paste0("{.fn check_that}: ", text) @@ -57,16 +56,15 @@ check_vec <- function( w <- rlang::arg_match(which) test_call <- rlang::call2(w, .x = x, .p = predicate, .ns = "purrr") if (eval(test_call)) { - x + x } else { - cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) + cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) } } #' @export cv_error_msg <- \(text) paste0("{.fn check_vec}: ", text) - #' An alternative to stopifnot/assert_that etc #' #' This function makes it easy to use the `is_scalar_*` functions from `{rlang}` @@ -86,32 +84,31 @@ check_scalar_type <- function( pf = parent.frame() ) { opts <- c( - "character", - "logical", - "integer", - "double", - "string", - "bool", - "list", - "bytes", - "raw", - "vector", - "complex" + "character", + "logical", + "integer", + "double", + "string", + "bool", + "list", + "bytes", + "raw", + "vector", + "complex" ) t <- rlang::arg_match(type, opts) t <- if (t %in% c("string", "bool")) t else paste0("scalar_", t) test_call <- rlang::call2(paste0("is_", t), x = x, .ns = "rlang") if (eval(test_call)) { - x + x } else { - cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) + cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) } } #' @export cst_error_msg <- \(text) paste0("{.fn check_scalar_type}: ", text) - #' Check if a supplied non-NULL value is a string with >0 characters #' #' Will error if x is equal to `""`, or if it is otherwise missing or invalid. @@ -125,19 +122,18 @@ cst_error_msg <- \(text) paste0("{.fn check_scalar_type}: ", text) #' @export check_nzchar <- function(x, message, pf = parent.frame()) { if (is.null(x)) { - NULL + NULL } cnz <- "check_nzchar" # nolint check_scalar_type(x, "string", "{.fn {cnz}}: {.var x} is not a string") if (nzchar(x)) { - x + x } else { - message <- paste0("{.fn {cnz}}: ", message) - cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) + message <- paste0("{.fn {cnz}}: ", message) + cli::cli_abort(message, call = rlang::caller_call(), .envir = pf) } } - #' grepl a glued regex #' #' Use \{glue\} expressions in grepl (and put the arguments the right way round) @@ -151,16 +147,15 @@ check_nzchar <- function(x, message, pf = parent.frame()) { #' @keywords internal gregg <- \(x, rx, ..., g = parent.frame()) grepl(glue::glue_data(g, rx), x, ...) - #' Check that a container looks like a real container #' @inheritParams read_azure_parquet #' @export check_container_class <- function(container) { if (inherits(container, "blob_container")) { - container + container } else { - ccc <- "check_container_class" # nolint - cc <- rlang::caller_call() - cli::cli_abort("{.fn {ccc}}: This is not a valid blob container", call = cc) + ccc <- "check_container_class" # nolint + cc <- rlang::caller_call() + cli::cli_abort("{.fn {ccc}}: This is not a valid blob container", call = cc) } } diff --git a/R/list_files.R b/R/list_files.R index cfff3f3..b15e5ce 100644 --- a/R/list_files.R +++ b/R/list_files.R @@ -1,57 +1,57 @@ -#' List files in a container -#' -#' Recursively (or not, if desired) lists all files found in a container. Search -#' can be restricted to a particular 'subdirectory' of the container, and/or -#' to files with a specific extension. The function assumes that all file names -#' end with a ".ext" extension of some sort. -#' -#' The function does not support filtering by file name, only by file extension. -#' -#' The returned file list (character vector) contains the full paths to the -#' files, ready to be passed perhaps to a `read_azure_*` function, or further -#' filtered by you. If you just want the names of the files without the folder -#' path, use [basename()] to extract these. -#' -#' @inheritParams read_azure_parquet -#' @param path (optional) subdirectory of the container to list files within. -#' `""` (the root folder of the container) by default -#' @param ext (optional) A string giving the extension of a particular file type -#' you want to restrict the list to. No need to include the initial ".". The -#' default, `""`, means no filtering by file extension will be applied. Can be -#' a regular expression. -#' @param recursive A Boolean value: whether to list files recursively. `TRUE` -#' by default -#' -#' @importFrom rlang .data -#' @returns A vector of file names, or an empty character vector if none found -#' @examples \dontrun{ -#' list_files(get_container("example"), ext = "csv") -#' } -#' @export -list_files <- function(container, path = "", ext = "", recursive = TRUE) { - stopifnot(rlang::is_character(c(path, ext), 2)) - stopifnot(rlang::is_bool(recursive)) - pnf_msg <- ct_error_msg("Path {.val {path}} not found") - check_that(path, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg) - - tbl <- AzureStor::list_blobs(container, path, recursive = recursive) - if (nrow(tbl) > 0) { - ext_rx <- if (nzchar(ext)) sub("^\\.+", "", ext) else ".*" # nolint - tbl <- tbl |> - dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "\\.{ext_rx}$")) - } - - # A zero-row tbl can result if `path` is initially empty, or via the filter - # step above. We handle this the same way, no matter which route led here. - if (nrow(tbl) == 0) { - fix_path <- \(p) sub("^/+$", "", sub("^([^/])(.*)", "/\\1\\2", p)) # nolint - ext <- if (nzchar(ext)) paste0(" ", ext) - msg <- "No{ext} files found in {.val [{container$name}]:{fix_path(path)}}" - if (rlang::is_interactive()) { - cli::cli_alert_info(msg) - } - invisible(character(0)) - } else { - tbl[["name"]] - } -} +#' List files in a container +#' +#' Recursively (or not, if desired) lists all files found in a container. Search +#' can be restricted to a particular 'subdirectory' of the container, and/or +#' to files with a specific extension. The function assumes that all file names +#' end with a ".ext" extension of some sort. +#' +#' The function does not support filtering by file name, only by file extension. +#' +#' The returned file list (character vector) contains the full paths to the +#' files, ready to be passed perhaps to a `read_azure_*` function, or further +#' filtered by you. If you just want the names of the files without the folder +#' path, use [basename()] to extract these. +#' +#' @inheritParams read_azure_parquet +#' @param path (optional) subdirectory of the container to list files within. +#' `""` (the root folder of the container) by default +#' @param ext (optional) A string giving the extension of a particular file type +#' you want to restrict the list to. No need to include the initial ".". The +#' default, `""`, means no filtering by file extension will be applied. Can be +#' a regular expression. +#' @param recursive A Boolean value: whether to list files recursively. `TRUE` +#' by default +#' +#' @importFrom rlang .data +#' @returns A vector of file names, or an empty character vector if none found +#' @examples \dontrun{ +#' list_files(get_container("example"), ext = "csv") +#' } +#' @export +list_files <- function(container, path = "", ext = "", recursive = TRUE) { + stopifnot(rlang::is_character(c(path, ext), 2)) + stopifnot(rlang::is_bool(recursive)) + pnf_msg <- ct_error_msg("Path {.val {path}} not found") + check_that(path, \(x) AzureStor::blob_dir_exists(container, x), pnf_msg) + + tbl <- AzureStor::list_blobs(container, path, recursive = recursive) + if (nrow(tbl) > 0) { + ext_rx <- if (nzchar(ext)) sub("^\\.+", "", ext) else ".*" # nolint + tbl <- tbl |> + dplyr::filter(!.data[["isdir"]] & gregg(.data[["name"]], "\\.{ext_rx}$")) + } + + # A zero-row tbl can result if `path` is initially empty, or via the filter + # step above. We handle this the same way, no matter which route led here. + if (nrow(tbl) == 0) { + fix_path <- \(p) sub("^/+$", "", sub("^([^/])(.*)", "/\\1\\2", p)) # nolint + ext <- if (nzchar(ext)) paste0(" ", ext) + msg <- "No{ext} files found in {.val [{container$name}]:{fix_path(path)}}" + if (rlang::is_interactive()) { + cli::cli_alert_info(msg) + } + invisible(character(0)) + } else { + tbl[["name"]] + } +} diff --git a/R/read_azure_files.R b/R/read_azure_files.R index e76183b..7b58120 100644 --- a/R/read_azure_files.R +++ b/R/read_azure_files.R @@ -1,163 +1,157 @@ -#' Read a parquet file from Azure storage -#' -#' @param container An Azure container object, as returned by [get_container] -#' @param file The name of the file to be read, as a string. NB The file -#' extension does not need to be included (though it can be). The function -#' will error if multiple files are somehow matched. -#' @param path The path to the directory where `file` is located, as a string. -#' Only needed if `file` does not already contain its full path. If file is -#' just a file name with no path, then provide the path to the directory here. -#' This must be the full path to the file location, as the function will not -#' search into subdirectories recursively. Set to `"/"` (the root of the -#' container) by default. -#' @param info Boolean. Whether to print user feedback about the file that is -#' being read. Useful for checking the function is doing what is expected, but -#' can be turned off with `FALSE`. Can be set persistently with the option -#' "azkit.info". If `NULL` then it will default to the value of -#' [rlang::is_interactive] (that is, `TRUE` for interactive sessions). -#' @param ... optional arguments to be passed through to [arrow::read_parquet] -#' @returns A tibble -#' @examples \dontrun{ -#' # if a full filepath is available then path can be ignored -#' read_azure_parquet(cont, "data/folder/path/1.parquet") -#' # you can provide a filename without the '.parquet' extension -#' # if you wish to use this partial file name matching it is probably easier -#' # to provide a 'path' -#' read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29") -#' } -#' @export -read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) { - check_blob_exists(container, file, "parquet", info, path) |> - # using `dest = NULL` means pass the data through as a raw vector - AzureStor::download_blob(container, src = _, dest = NULL) |> - arrow::read_parquet(...) -} - - -#' Read a json file from Azure storage -#' -#' @inheritParams read_azure_parquet -#' @param ... optional arguments to be passed through to -#' [yyjsonr::read_json_raw] -#' @returns A list -#' @export -read_azure_json <- function(container, file, path = "/", info = NULL, ...) { - check_blob_exists(container, file, "json", info, path) |> - # using `dest = NULL` means pass the data through as a raw vector - AzureStor::download_blob(container, src = _, dest = NULL) |> - yyjsonr::read_json_raw(...) -} - - -#' Read a json.gz file from Azure storage -#' -#' @inheritParams read_azure_parquet -#' @param ... optional arguments to be passed through to -#' [yyjsonr::read_json_file] -#' @returns A list -#' @export -read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) { - full_path <- check_blob_exists(container, file, "json.gz", info, path) - dl <- withr::local_tempfile( - pattern = tools::file_path_sans_ext(basename(full_path), TRUE), - fileext = "json.gz" - ) - AzureStor::download_blob(container, src = full_path, dest = dl) - yyjsonr::read_json_file(dl, ...) -} - - -#' Read an rds file from Azure storage -#' -#' @inheritParams read_azure_parquet -#' @param ... optional arguments to be passed through to -#' [AzureStor::storage_load_rds]. For example, a compression type (one of -#' c("unknown", "gzip", "bzip2", "xz", "zstd", "none")) can be provided using -#' the argument `type`, which will be passed on to [memDecompress] via -#' [AzureStor::storage_load_rds]. -# If nothing is provided here, the compression type will be set to "none". -#' @returns The data object that was stored in the rds file -#' @export -read_azure_rds <- function(container, file, path = "/", info = NULL, ...) { - # If the user doesn't specify a (de)compression type with `type` in `...`, we - # will set a `type` of "none", as this seems to be the standard on SU Azure - dots <- rlang::dots_list(..., type = "none", .homonyms = "first") - blob <- check_blob_exists(container, file, "rds", info, path) - rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots)) -} - - -#' Read a csv file from Azure storage -#' -#' @inheritParams read_azure_parquet -#' @param ... optional arguments to be passed through to [readr::read_delim] -#' @returns A tibble -#' @export -read_azure_csv <- function(container, file, path = "/", info = NULL, ...) { - check_blob_exists(container, file, "csv", info, path) |> - AzureStor::storage_read_csv(container, file = _, ...) -} - - -#' Read any file from Azure storage -#' -#' @inheritParams read_azure_parquet -#' @param ext If a custom extension needs to be supplied, you can specify it -#' here. If `NULL`, the default, the extension of `file` will be used -#' @param ... optional arguments to be passed through to -#' [AzureStor::download_blob] -#' @returns A raw data stream -#' @export -read_azure_file <- function( - container, - file, - path = "/", - info = NULL, - ext = NULL, - ... -) { - ext <- ext %||% tools::file_ext(file) - check_blob_exists(container, file, ext, info, path) |> - # using `dest = NULL` means pass the data through as a raw vector - AzureStor::download_blob(container, src = _, dest = NULL, ...) -} - - -#' Ensures that the filepath for the file to read exists -#' -#' @inheritParams read_azure_parquet -#' @param ext The standard file extension for the file type, e.g. "json" -#' @keywords internal -check_blob_exists <- function(container, file, ext, info, path) { - stopifnot("no container found" = inherits(container, "blob_container")) - path <- if (path %in% c("", "/")) "" else path - stopifnot("path not found" = AzureStor::blob_dir_exists(container, path)) - dir_name <- if (dirname(file) == ".") "" else dirname(file) - # Potentially the user could provide a partial file path in `path` and a - # further sub-directory as part of `file`. This handles that eventuality, - # though this usage pattern should be quite rare! - dpath <- file.path(path, dir_name) - fname <- basename(file) - if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) { - fname <- glue::glue("{fname}.{ext}") - } - # remove duplicate slashes and any initial slashes - file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname))) - - filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |> - dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |> - dplyr::pull("name") - - msg1 <- ct_error_msg("no matching {ext} file found") - msg2 <- cst_error_msg("multiple matching {ext} files found") - check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0 - check_scalar_type(filepath_out, "character", msg2) # check length == 1 - - info_option <- getOption("azkit.info") - stopifnot(rlang::is_scalar_logical(info) || is.null(info)) - stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option)) - if (info %||% info_option %||% rlang::is_interactive()) { - cli::cli_alert_info("File {.val {filepath_out}} will be read in") - } - filepath_out -} +#' Read a parquet file from Azure storage +#' +#' @param container An Azure container object, as returned by [get_container] +#' @param file The name of the file to be read, as a string. NB The file +#' extension does not need to be included (though it can be). The function +#' will error if multiple files are somehow matched. +#' @param path The path to the directory where `file` is located, as a string. +#' Only needed if `file` does not already contain its full path. If file is +#' just a file name with no path, then provide the path to the directory here. +#' This must be the full path to the file location, as the function will not +#' search into subdirectories recursively. Set to `"/"` (the root of the +#' container) by default. +#' @param info Boolean. Whether to print user feedback about the file that is +#' being read. Useful for checking the function is doing what is expected, but +#' can be turned off with `FALSE`. Can be set persistently with the option +#' "azkit.info". If `NULL` then it will default to the value of +#' [rlang::is_interactive] (that is, `TRUE` for interactive sessions). +#' @param ... optional arguments to be passed through to [arrow::read_parquet] +#' @returns A tibble +#' @examples \dontrun{ +#' # if a full filepath is available then path can be ignored +#' read_azure_parquet(cont, "data/folder/path/1.parquet") +#' # you can provide a filename without the '.parquet' extension +#' # if you wish to use this partial file name matching it is probably easier +#' # to provide a 'path' +#' read_azure_parquet(cont, "case_details", "storage/parquet/2025/06/29") +#' } +#' @export +read_azure_parquet <- function(container, file, path = "/", info = NULL, ...) { + check_blob_exists(container, file, "parquet", info, path) |> + # using `dest = NULL` means pass the data through as a raw vector + AzureStor::download_blob(container, src = _, dest = NULL) |> + arrow::read_parquet(...) +} + +#' Read a json file from Azure storage +#' +#' @inheritParams read_azure_parquet +#' @param ... optional arguments to be passed through to +#' [yyjsonr::read_json_raw] +#' @returns A list +#' @export +read_azure_json <- function(container, file, path = "/", info = NULL, ...) { + check_blob_exists(container, file, "json", info, path) |> + # using `dest = NULL` means pass the data through as a raw vector + AzureStor::download_blob(container, src = _, dest = NULL) |> + yyjsonr::read_json_raw(...) +} + +#' Read a json.gz file from Azure storage +#' +#' @inheritParams read_azure_parquet +#' @param ... optional arguments to be passed through to +#' [yyjsonr::read_json_file] +#' @returns A list +#' @export +read_azure_jsongz <- function(container, file, path = "/", info = NULL, ...) { + full_path <- check_blob_exists(container, file, "json.gz", info, path) + dl <- withr::local_tempfile( + pattern = tools::file_path_sans_ext(basename(full_path), TRUE), + fileext = "json.gz" + ) + AzureStor::download_blob(container, src = full_path, dest = dl) + yyjsonr::read_json_file(dl, ...) +} + +#' Read an rds file from Azure storage +#' +#' @inheritParams read_azure_parquet +#' @param ... optional arguments to be passed through to +#' [AzureStor::storage_load_rds]. For example, a compression type (one of +#' c("unknown", "gzip", "bzip2", "xz", "zstd", "none")) can be provided using +#' the argument `type`, which will be passed on to [memDecompress] via +#' [AzureStor::storage_load_rds]. +# If nothing is provided here, the compression type will be set to "none". +#' @returns The data object that was stored in the rds file +#' @export +read_azure_rds <- function(container, file, path = "/", info = NULL, ...) { + # If the user doesn't specify a (de)compression type with `type` in `...`, we + # will set a `type` of "none", as this seems to be the standard on SU Azure + dots <- rlang::dots_list(..., type = "none", .homonyms = "first") + blob <- check_blob_exists(container, file, "rds", info, path) + rlang::inject(AzureStor::storage_load_rds(container, blob, !!!dots)) +} + +#' Read a csv file from Azure storage +#' +#' @inheritParams read_azure_parquet +#' @param ... optional arguments to be passed through to [readr::read_delim] +#' @returns A tibble +#' @export +read_azure_csv <- function(container, file, path = "/", info = NULL, ...) { + check_blob_exists(container, file, "csv", info, path) |> + AzureStor::storage_read_csv(container, file = _, ...) +} + +#' Read any file from Azure storage +#' +#' @inheritParams read_azure_parquet +#' @param ext If a custom extension needs to be supplied, you can specify it +#' here. If `NULL`, the default, the extension of `file` will be used +#' @param ... optional arguments to be passed through to +#' [AzureStor::download_blob] +#' @returns A raw data stream +#' @export +read_azure_file <- function( + container, + file, + path = "/", + info = NULL, + ext = NULL, + ... +) { + ext <- ext %||% tools::file_ext(file) + check_blob_exists(container, file, ext, info, path) |> + # using `dest = NULL` means pass the data through as a raw vector + AzureStor::download_blob(container, src = _, dest = NULL, ...) +} + +#' Ensures that the filepath for the file to read exists +#' +#' @inheritParams read_azure_parquet +#' @param ext The standard file extension for the file type, e.g. "json" +#' @keywords internal +check_blob_exists <- function(container, file, ext, info, path) { + stopifnot("no container found" = inherits(container, "blob_container")) + path <- if (path %in% c("", "/")) "" else path + stopifnot("path not found" = AzureStor::blob_dir_exists(container, path)) + dir_name <- if (dirname(file) == ".") "" else dirname(file) + # Potentially the user could provide a partial file path in `path` and a + # further sub-directory as part of `file`. This handles that eventuality, + # though this usage pattern should be quite rare! + dpath <- file.path(path, dir_name) + fname <- basename(file) + if (nzchar(ext) && !gregg(fname, "\\.{ext}$")) { + fname <- glue::glue("{fname}.{ext}") + } + # remove duplicate slashes and any initial slashes + file_path <- sub("^/", "", gsub("/+", "/", file.path(dpath, fname))) + + filepath_out <- AzureStor::list_blobs(container, dpath, recursive = FALSE) |> + dplyr::filter(dplyr::if_any("name", \(x) x == {{ file_path }})) |> + dplyr::pull("name") + + msg1 <- ct_error_msg("no matching {ext} file found") + msg2 <- cst_error_msg("multiple matching {ext} files found") + check_that(filepath_out, \(x) length(x) > 0, msg1) # check length > 0 + check_scalar_type(filepath_out, "character", msg2) # check length == 1 + + info_option <- getOption("azkit.info") + stopifnot(rlang::is_scalar_logical(info) || is.null(info)) + stopifnot(rlang::is_scalar_logical(info_option) || is.null(info_option)) + if (info %||% info_option %||% rlang::is_interactive()) { + cli::cli_alert_info("File {.val {filepath_out}} will be read in") + } + filepath_out +} diff --git a/R/read_azure_table.R b/R/read_azure_table.R index 5a85452..3dd3998 100644 --- a/R/read_azure_table.R +++ b/R/read_azure_table.R @@ -1,39 +1,39 @@ -#' Read in data from an Azure table -#' -#' @param table_name name of the table to be read. If left as `NULL`, -#' the default, the function will look instead for a value stored in the -#' environment variable "AZ_TABLE_NAME" -#' @param table_endpoint URL of the Azure table endpoint. If left as `NULL`, -#' the default, the function will look instead for a value stored in the -#' environment variable "AZ_TABLE_EP" -#' @param ... parameters to be passed through to [get_auth_token] -#' @inheritParams get_container -#' @returns A tibble -#' @export -read_azure_table <- function( - table_name = NULL, - table_endpoint = NULL, - token = NULL, - ... -) { - table_name <- table_name %||% check_envvar("AZ_TABLE_NAME") - table_ep <- table_endpoint %||% check_envvar("AZ_TABLE_EP") - token <- token %||% get_auth_token(...) - access_token <- token |> - purrr::pluck("credentials", "access_token") - headers <- list("2025-11-05", "application/json;odata=nometadata") |> - purrr::set_names(c("x-ms-version", "Accept")) - - resp <- httr2::request(table_ep) |> - httr2::req_url_path_append(table_name) |> - httr2::req_auth_bearer_token(access_token) |> - httr2::req_headers(!!!headers) |> - httr2::req_perform() |> - httr2::resp_check_status() - - resp |> - httr2::resp_body_json() |> - purrr::pluck("value") |> - purrr::map(tibble::as_tibble) |> - purrr::list_rbind() -} +#' Read in data from an Azure table +#' +#' @param table_name name of the table to be read. If left as `NULL`, +#' the default, the function will look instead for a value stored in the +#' environment variable "AZ_TABLE_NAME" +#' @param table_endpoint URL of the Azure table endpoint. If left as `NULL`, +#' the default, the function will look instead for a value stored in the +#' environment variable "AZ_TABLE_EP" +#' @param ... parameters to be passed through to [get_auth_token] +#' @inheritParams get_container +#' @returns A tibble +#' @export +read_azure_table <- function( + table_name = NULL, + table_endpoint = NULL, + token = NULL, + ... +) { + table_name <- table_name %||% check_envvar("AZ_TABLE_NAME") + table_ep <- table_endpoint %||% check_envvar("AZ_TABLE_EP") + token <- token %||% get_auth_token(...) + access_token <- token |> + purrr::pluck("credentials", "access_token") + headers <- list("2025-11-05", "application/json;odata=nometadata") |> + purrr::set_names(c("x-ms-version", "Accept")) + + resp <- httr2::request(table_ep) |> + httr2::req_url_path_append(table_name) |> + httr2::req_auth_bearer_token(access_token) |> + httr2::req_headers(!!!headers) |> + httr2::req_perform() |> + httr2::resp_check_status() + + resp |> + httr2::resp_body_json() |> + purrr::pluck("value") |> + purrr::map(tibble::as_tibble) |> + purrr::list_rbind() +} diff --git a/azkit.Rproj b/azkit.Rproj index 38b9011..69fafd4 100644 --- a/azkit.Rproj +++ b/azkit.Rproj @@ -1,22 +1,22 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes -LineEndingConversion: Posix - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/azkit.code-workspace b/azkit.code-workspace index 7f23b68..01efb84 100644 --- a/azkit.code-workspace +++ b/azkit.code-workspace @@ -1,59 +1,59 @@ { - "folders": [ - { - "path": "." - } - ], - "settings": { - "workbench.colorCustomizations": { - "editorRuler.foreground": "#ff4081", - "statusBarItem.warningBackground": "#41e6ec", - "statusBarItem.warningForeground": "#000000", - "statusBarItem.warningHoverBackground": "#41e6ec", - "statusBarItem.warningHoverForeground": "#00000090", - "statusBarItem.remoteBackground": "#4ef3f9", - "statusBarItem.remoteForeground": "#000000", - "statusBarItem.remoteHoverBackground": "#5bffff", - "statusBarItem.remoteHoverForeground": "#00000090", - "statusBar.background": "#41e6ec", - "statusBar.foreground": "#000000", - "statusBar.border": "#41e6ec", - "statusBar.debuggingBackground": "#41e6ec", - "statusBar.debuggingForeground": "#000000", - "statusBar.debuggingBorder": "#41e6ec", - "statusBar.noFolderBackground": "#41e6ec", - "statusBar.noFolderForeground": "#000000", - "statusBar.noFolderBorder": "#41e6ec", - "statusBar.prominentBackground": "#41e6ec", - "statusBar.prominentForeground": "#000000", - "statusBar.prominentHoverBackground": "#41e6ec", - "statusBar.prominentHoverForeground": "#00000090", - "focusBorder": "#41e6ec99", - "progressBar.background": "#41e6ec", - "textLink.foreground": "#81ffff", - "textLink.activeForeground": "#8effff", - "selection.background": "#34d9df", - "list.highlightForeground": "#41e6ec", - "list.focusAndSelectionOutline": "#41e6ec99", - "button.background": "#41e6ec", - "button.foreground": "#000000", - "button.hoverBackground": "#4ef3f9", - "tab.activeBorderTop": "#4ef3f9", - "pickerGroup.foreground": "#4ef3f9", - "list.activeSelectionBackground": "#41e6ec4d", - "panelTitle.activeBorder": "#4ef3f9", - "activityBar.activeBorder": "#41e6ec", - "activityBarBadge.foreground": "#000000", - "activityBarBadge.background": "#41e6ec" - }, - "window.title": "{azkit}", - "projectColors.name": "{azkit}", - "projectColors.mainColor": "#41e6ec", - "projectColors.isActivityBarColored": false, - "projectColors.isTitleBarColored": false, - "projectColors.isStatusBarColored": true, - "projectColors.isProjectNameColored": true, - "projectColors.isActiveItemsColored": true, - "projectColors.setWindowTitle": true - } + "folders": [ + { + "path": "." + } + ], + "settings": { + "workbench.colorCustomizations": { + "editorRuler.foreground": "#ff4081", + "statusBarItem.warningBackground": "#41e6ec", + "statusBarItem.warningForeground": "#000000", + "statusBarItem.warningHoverBackground": "#41e6ec", + "statusBarItem.warningHoverForeground": "#00000090", + "statusBarItem.remoteBackground": "#4ef3f9", + "statusBarItem.remoteForeground": "#000000", + "statusBarItem.remoteHoverBackground": "#5bffff", + "statusBarItem.remoteHoverForeground": "#00000090", + "statusBar.background": "#41e6ec", + "statusBar.foreground": "#000000", + "statusBar.border": "#41e6ec", + "statusBar.debuggingBackground": "#41e6ec", + "statusBar.debuggingForeground": "#000000", + "statusBar.debuggingBorder": "#41e6ec", + "statusBar.noFolderBackground": "#41e6ec", + "statusBar.noFolderForeground": "#000000", + "statusBar.noFolderBorder": "#41e6ec", + "statusBar.prominentBackground": "#41e6ec", + "statusBar.prominentForeground": "#000000", + "statusBar.prominentHoverBackground": "#41e6ec", + "statusBar.prominentHoverForeground": "#00000090", + "focusBorder": "#41e6ec99", + "progressBar.background": "#41e6ec", + "textLink.foreground": "#81ffff", + "textLink.activeForeground": "#8effff", + "selection.background": "#34d9df", + "list.highlightForeground": "#41e6ec", + "list.focusAndSelectionOutline": "#41e6ec99", + "button.background": "#41e6ec", + "button.foreground": "#000000", + "button.hoverBackground": "#4ef3f9", + "tab.activeBorderTop": "#4ef3f9", + "pickerGroup.foreground": "#4ef3f9", + "list.activeSelectionBackground": "#41e6ec4d", + "panelTitle.activeBorder": "#4ef3f9", + "activityBar.activeBorder": "#41e6ec", + "activityBarBadge.foreground": "#000000", + "activityBarBadge.background": "#41e6ec" + }, + "window.title": "{azkit}", + "projectColors.name": "{azkit}", + "projectColors.mainColor": "#41e6ec", + "projectColors.isActivityBarColored": false, + "projectColors.isTitleBarColored": false, + "projectColors.isStatusBarColored": true, + "projectColors.isProjectNameColored": true, + "projectColors.isActiveItemsColored": true, + "projectColors.setWindowTitle": true + } } diff --git a/inst/azkit.svg b/inst/azkit.svg index d0427bf..f845cb4 100644 --- a/inst/azkit.svg +++ b/inst/azkit.svg @@ -260,8 +260,8 @@ inkscape:label="cloud" style="display:none" sodipodi:insensitive="true"> - - + +