From 67f60c25e0428cdac5fbcccc2552832ba499ce23 Mon Sep 17 00:00:00 2001 From: Hugo Gruson Date: Fri, 15 May 2026 16:43:31 +0200 Subject: [PATCH 1/5] S3 reading support --- R/read.R | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/R/read.R b/R/read.R index 3a3831d..0f6605c 100644 --- a/R/read.R +++ b/R/read.R @@ -51,8 +51,8 @@ NULL # https://ngff.openmicroscopy.org/specifications/0.5/index.html#images # The name of the array is arbitrary with the ordering defined by # by the "multiscales" metadata, but is often a sequence starting at 0. - ds <- .validate_multiscales_paths(x, datasets(mdattr)) - ds <- file.path(x, as.character(ds)) + # ds <- .validate_multiscales_paths(x, datasets(mdattr)) + ds <- paste0(x, ds) as <- lapply(ds, ZarrArray) list(array=as, mdattr=mdattr) } @@ -77,7 +77,7 @@ readLabel <- function(x, ...) { #' @importFrom dplyr sql #' @export readPoint <- function(x, ...) { - pq <- list.files(x, "\\.parquet$", full.names=TRUE) + pq <- paste0(x, file.path("points.parquet", "part.0.parquet")) md <- read_zarr_attributes(x) ax <- unlist(md$axes) df <- ddbs_open_dataset(pq, conn=.conn()) |> @@ -107,7 +107,8 @@ readPoint <- function(x, ...) { #' @export readShape <- function(x, ...) { md <- read_zarr_attributes(x) - pq <- list.files(x, "\\.parquet$", full.names=TRUE) + # "shapes.parquet" currently hardcoded in SpatialData.io + pq <- paste0(x, "shapes.parquet") df <- ddbs_open_dataset(pq, conn=.conn(), crs=NA_character_) SpatialDataShape(data=df, meta=SpatialDataAttrs(md)) } @@ -145,10 +146,22 @@ readSpatialData <- function(x, args <- as.list(environment())[.LAYERS] skip <- vapply(args, isFALSE, logical(1)) + x <- Rarr:::.normalize_array_path(x) + store_meta <- Rarr:::.read_consolidated_metadata(x)$metadata + # is.null(.$data_type) is a hack that works for both v2 and v3 Zarr stores, to keep only + # groups, but not arrays + # In v3, we could just do .$node_type == "group", but in v2, there is no node_type. + store_groups <- names(store_meta[vapply(store_meta, \(.) is.null(.$data_type), logical(1))]) + # helper for layer reading .readLayer <- \(l) { - j <- list.dirs(file.path(x, l), recursive=FALSE, full.names=TRUE) - names(j) <- basename(j) + message(" reading ", l, "...") + j <- store_groups[startsWith(store_groups, paste0(l, "/"))] + j <- setNames( + paste0(x, j, "/", recycle0 = TRUE), + basename(j) + ) + opt <- args[[l]] if (!isTRUE(opt)) { if (is.numeric(opt) && opt > (. <- length(j))) @@ -157,8 +170,8 @@ readSpatialData <- function(x, stop("couldn't find ", l, " of name", .) j <- j[opt] } - f <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))) - lapply(j, \(.) do.call(f, list(.))) + reader <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))) + lapply(j, reader) } names(ls) <- ls <- .LAYERS[!skip] From b4ce49a8856825ee41381d7c1ca922d5d9c6083a Mon Sep 17 00:00:00 2001 From: Hugo Gruson Date: Fri, 15 May 2026 16:58:50 +0200 Subject: [PATCH 2/5] Restore validation on local paths to avoid regression --- R/read.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/read.R b/R/read.R index 0f6605c..5483a8d 100644 --- a/R/read.R +++ b/R/read.R @@ -51,7 +51,11 @@ NULL # https://ngff.openmicroscopy.org/specifications/0.5/index.html#images # The name of the array is arbitrary with the ordering defined by # by the "multiscales" metadata, but is often a sequence starting at 0. - # ds <- .validate_multiscales_paths(x, datasets(mdattr)) + if (!any(startsWith(x, c("http://", "https://", "s3://")))) { + # Until we have a complete store interface (https://github.com/Huber-group-EMBL/Rarr/pull/176), + # only local objects can be fully validated. + ds <- .validate_multiscales_paths(x, datasets(mdattr)) + } ds <- paste0(x, ds) as <- lapply(ds, ZarrArray) list(array=as, mdattr=mdattr) From 4d35c5bb9377826db5902a16f1f6ecb0c1a58b65 Mon Sep 17 00:00:00 2001 From: Hugo Gruson Date: Fri, 15 May 2026 17:13:52 +0200 Subject: [PATCH 3/5] Set minimum Rarr version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 35e9bce..0327e21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -63,7 +63,7 @@ Imports: graph, Matrix, methods, - Rarr, + Rarr (>= 2.1.9), RBGL, rlang, sf, From c7ea1f63962e1b678d58f4a3eb0b352e0454240f Mon Sep 17 00:00:00 2001 From: Hugo Gruson Date: Fri, 15 May 2026 22:13:05 +0200 Subject: [PATCH 4/5] Ensure ds is defined for remote stores --- R/read.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/read.R b/R/read.R index 5483a8d..59a2c79 100644 --- a/R/read.R +++ b/R/read.R @@ -55,6 +55,9 @@ NULL # Until we have a complete store interface (https://github.com/Huber-group-EMBL/Rarr/pull/176), # only local objects can be fully validated. ds <- .validate_multiscales_paths(x, datasets(mdattr)) + } else { + # For remote objects, we skip validation and assume that the datasets are in the expected location. + ds <- datasets(mdattr) } ds <- paste0(x, ds) as <- lapply(ds, ZarrArray) From 78efbc2f517bcc85c33b6d8d05533f5dc7ec2ede Mon Sep 17 00:00:00 2001 From: Hugo Gruson Date: Mon, 18 May 2026 09:20:57 +0200 Subject: [PATCH 5/5] Pass attributes from consolidated metadata for better performance --- R/read.R | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/R/read.R b/R/read.R index 59a2c79..fb2c9c0 100644 --- a/R/read.R +++ b/R/read.R @@ -44,8 +44,8 @@ NULL #' @importFrom Rarr read_zarr_attributes #' @importFrom ZarrArray ZarrArray -.readArray <- function(x, ...) { - md <- read_zarr_attributes(x) +.readArray <- function(x, md = NULL, ...) { + md <- md %||% read_zarr_attributes(x) mdattr <- SpatialDataAttrs(md) # TODO: paths to datasets have to be validated properly in the future # https://ngff.openmicroscopy.org/specifications/0.5/index.html#images @@ -66,15 +66,15 @@ NULL #' @rdname readSpatialData #' @export -readImage <- function(x, ...) { - l <- .readArray(x, ...) +readImage <- function(x, md = NULL, ...) { + l <- .readArray(x, md = md, ...) SpatialDataImage(data=l$array, meta=l$mdattr, ...) } #' @rdname readSpatialData #' @export -readLabel <- function(x, ...) { - l <- .readArray(x, ...) +readLabel <- function(x, md = NULL, ...) { + l <- .readArray(x, md = md, ...) SpatialDataLabel(data=l$array, meta=l$mdattr, ...) } @@ -83,9 +83,9 @@ readLabel <- function(x, ...) { #' @importFrom Rarr read_zarr_attributes #' @importFrom dplyr sql #' @export -readPoint <- function(x, ...) { +readPoint <- function(x, md = NULL, ...) { pq <- paste0(x, file.path("points.parquet", "part.0.parquet")) - md <- read_zarr_attributes(x) + md <- md %||% read_zarr_attributes(x) ax <- unlist(md$axes) df <- ddbs_open_dataset(pq, conn=.conn()) |> mutate(geometry=sql(sprintf("ST_Point(%s, %s)", ax[1], ax[2]))) |> @@ -112,8 +112,8 @@ readPoint <- function(x, ...) { #' @importFrom duckspatial ddbs_open_dataset #' @import geoarrow #' @export -readShape <- function(x, ...) { - md <- read_zarr_attributes(x) +readShape <- function(x, md = NULL, ...) { + md <- md %||% read_zarr_attributes(x) # "shapes.parquet" currently hardcoded in SpatialData.io pq <- paste0(x, "shapes.parquet") df <- ddbs_open_dataset(pq, conn=.conn(), crs=NA_character_) @@ -126,7 +126,7 @@ readShape <- function(x, ...) { #' @importFrom S4Vectors metadata metadata<- #' @importFrom SummarizedExperiment colData colData<- #' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata int_metadata<- -readTable <- function(x) { +readTable <- function(x, ...) { suppressWarnings({ # suppress warnings related to hidden files sce <- anndataR::read_zarr(x, as="SingleCellExperiment") }) @@ -164,10 +164,7 @@ readSpatialData <- function(x, .readLayer <- \(l) { message(" reading ", l, "...") j <- store_groups[startsWith(store_groups, paste0(l, "/"))] - j <- setNames( - paste0(x, j, "/", recycle0 = TRUE), - basename(j) - ) + names(j) <- basename(j) opt <- args[[l]] if (!isTRUE(opt)) { @@ -178,7 +175,11 @@ readSpatialData <- function(x, j <- j[opt] } reader <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1))) - lapply(j, reader) + lapply(j, function(el) { + md <- store_meta[[el]]$attributes + el <- paste0(x, el, "/", recycle0 = TRUE) + reader(el, md=md) + }) } names(ls) <- ls <- .LAYERS[!skip]