diff --git a/DESCRIPTION b/DESCRIPTION index 35e9bce..0327e21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,7 +63,7 @@ Imports: graph, Matrix, methods, - Rarr, + Rarr (>= 2.1.9), RBGL, rlang, sf, diff --git a/R/read.R b/R/read.R index 3a3831d..fb2c9c0 100644 --- a/R/read.R +++ b/R/read.R @@ -44,30 +44,37 @@ NULL #' @importFrom Rarr read_zarr_attributes #' @importFrom ZarrArray ZarrArray -.readArray <- function(x, ...) { - md <- read_zarr_attributes(x) +.readArray <- function(x, md = NULL, ...) { + md <- md %||% read_zarr_attributes(x) mdattr <- SpatialDataAttrs(md) # TODO: paths to datasets have to be validated properly in the future # https://ngff.openmicroscopy.org/specifications/0.5/index.html#images # The name of the array is arbitrary with the ordering defined by # by the "multiscales" metadata, but is often a sequence starting at 0. - ds <- .validate_multiscales_paths(x, datasets(mdattr)) - ds <- file.path(x, as.character(ds)) + if (!any(startsWith(x, c("http://", "https://", "s3://")))) { + # Until we have a complete store interface (https://github.com/Huber-group-EMBL/Rarr/pull/176), + # only local objects can be fully validated. + ds <- .validate_multiscales_paths(x, datasets(mdattr)) + } else { + # For remote objects, we skip validation and assume that the datasets are in the expected location. + ds <- datasets(mdattr) + } + ds <- paste0(x, ds) as <- lapply(ds, ZarrArray) list(array=as, mdattr=mdattr) } #' @rdname readSpatialData #' @export -readImage <- function(x, ...) { - l <- .readArray(x, ...) +readImage <- function(x, md = NULL, ...) { + l <- .readArray(x, md = md, ...) SpatialDataImage(data=l$array, meta=l$mdattr, ...) } #' @rdname readSpatialData #' @export -readLabel <- function(x, ...) { - l <- .readArray(x, ...) +readLabel <- function(x, md = NULL, ...) { + l <- .readArray(x, md = md, ...) SpatialDataLabel(data=l$array, meta=l$mdattr, ...) } @@ -76,9 +83,9 @@ readLabel <- function(x, ...) { #' @importFrom Rarr read_zarr_attributes #' @importFrom dplyr sql #' @export -readPoint <- function(x, ...) { - pq <- list.files(x, "\\.parquet$", full.names=TRUE) - md <- read_zarr_attributes(x) +readPoint <- function(x, md = NULL, ...) { + pq <- paste0(x, file.path("points.parquet", "part.0.parquet")) + md <- md %||% read_zarr_attributes(x) ax <- unlist(md$axes) df <- ddbs_open_dataset(pq, conn=.conn()) |> mutate(geometry=sql(sprintf("ST_Point(%s, %s)", ax[1], ax[2]))) |> @@ -105,9 +112,10 @@ readPoint <- function(x, ...) { #' @importFrom duckspatial ddbs_open_dataset #' @import geoarrow #' @export -readShape <- function(x, ...) { - md <- read_zarr_attributes(x) - pq <- list.files(x, "\\.parquet$", full.names=TRUE) +readShape <- function(x, md = NULL, ...) { + md <- md %||% read_zarr_attributes(x) + # "shapes.parquet" currently hardcoded in SpatialData.io + pq <- paste0(x, "shapes.parquet") df <- ddbs_open_dataset(pq, conn=.conn(), crs=NA_character_) SpatialDataShape(data=df, meta=SpatialDataAttrs(md)) } @@ -118,7 +126,7 @@ readShape <- function(x, ...) { #' @importFrom S4Vectors metadata metadata<- #' @importFrom SummarizedExperiment colData colData<- #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata int_metadata<- -readTable <- function(x) { +readTable <- function(x, ...) { suppressWarnings({ # suppress warnings related to hidden files sce <- anndataR::read_zarr(x, as="SingleCellExperiment") }) @@ -145,10 +153,19 @@ readSpatialData <- function(x, args <- as.list(environment())[.LAYERS] skip <- vapply(args, isFALSE, logical(1)) + x <- Rarr:::.normalize_array_path(x) + store_meta <- Rarr:::.read_consolidated_metadata(x)$metadata + # is.null(.$data_type) is a hack that works for both v2 and v3 Zarr stores, to keep only + # groups, but not arrays + # In v3, we could just do .$node_type == "group", but in v2, there is no node_type. + store_groups <- names(store_meta[vapply(store_meta, \(.) is.null(.$data_type), logical(1))]) + # helper for layer reading .readLayer <- \(l) { - j <- list.dirs(file.path(x, l), recursive=FALSE, full.names=TRUE) + message(" reading ", l, "...") + j <- store_groups[startsWith(store_groups, paste0(l, "/"))] names(j) <- basename(j) + opt <- args[[l]] if (!isTRUE(opt)) { if (is.numeric(opt) && opt > (. <- length(j))) @@ -157,8 +174,12 @@ readSpatialData <- function(x, stop("couldn't find ", l, " of name", .) j <- j[opt] } - f <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))) - lapply(j, \(.) do.call(f, list(.))) + reader <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))) + lapply(j, function(el) { + md <- store_meta[[el]]$attributes + el <- paste0(x, el, "/", recycle0 = TRUE) + reader(el, md=md) + }) } names(ls) <- ls <- .LAYERS[!skip]