Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Imports:
graph,
Matrix,
methods,
Rarr,
Rarr (>= 2.1.9),
RBGL,
rlang,
sf,
Expand Down
57 changes: 39 additions & 18 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,30 +44,37 @@ NULL

#' @importFrom Rarr read_zarr_attributes
#' @importFrom ZarrArray ZarrArray
.readArray <- function(x, ...) {
md <- read_zarr_attributes(x)
.readArray <- function(x, md = NULL, ...) {
md <- md %||% read_zarr_attributes(x)
mdattr <- SpatialDataAttrs(md)
# TODO: paths to datasets have to be validated properly in the future
# https://ngff.openmicroscopy.org/specifications/0.5/index.html#images
# The name of the array is arbitrary with the ordering defined by
# by the "multiscales" metadata, but is often a sequence starting at 0.
ds <- .validate_multiscales_paths(x, datasets(mdattr))
ds <- file.path(x, as.character(ds))
if (!any(startsWith(x, c("http://", "https://", "s3://")))) {
# Until we have a complete store interface (https://github.com/Huber-group-EMBL/Rarr/pull/176),
# only local objects can be fully validated.
Comment on lines +55 to +56
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will be handled by rome eventually anyways.

ds <- .validate_multiscales_paths(x, datasets(mdattr))
} else {
# For remote objects, we skip validation and assume that the datasets are in the expected location.
ds <- datasets(mdattr)
}
ds <- paste0(x, ds)
as <- lapply(ds, ZarrArray)
list(array=as, mdattr=mdattr)
}

#' @rdname readSpatialData
#' @export
readImage <- function(x, ...) {
l <- .readArray(x, ...)
readImage <- function(x, md = NULL, ...) {
l <- .readArray(x, md = md, ...)
SpatialDataImage(data=l$array, meta=l$mdattr, ...)
}

#' @rdname readSpatialData
#' @export
readLabel <- function(x, ...) {
l <- .readArray(x, ...)
readLabel <- function(x, md = NULL, ...) {
l <- .readArray(x, md = md, ...)
SpatialDataLabel(data=l$array, meta=l$mdattr, ...)
}

Expand All @@ -76,9 +83,9 @@ readLabel <- function(x, ...) {
#' @importFrom Rarr read_zarr_attributes
#' @importFrom dplyr sql
#' @export
readPoint <- function(x, ...) {
pq <- list.files(x, "\\.parquet$", full.names=TRUE)
md <- read_zarr_attributes(x)
readPoint <- function(x, md = NULL, ...) {
pq <- paste0(x, file.path("points.parquet", "part.0.parquet"))
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: open an issue in Spatialdata python. Why do points have an extra nesting level?

  • shapes are shapes/layer_name/shapes.parquet
  • points are points/layer_name/points.parquet/part.0.parquet

Is this really intended? If so, if this desirable?

md <- md %||% read_zarr_attributes(x)
ax <- unlist(md$axes)
df <- ddbs_open_dataset(pq, conn=.conn()) |>
mutate(geometry=sql(sprintf("ST_Point(%s, %s)", ax[1], ax[2]))) |>
Expand All @@ -105,9 +112,10 @@ readPoint <- function(x, ...) {
#' @importFrom duckspatial ddbs_open_dataset
#' @import geoarrow
#' @export
readShape <- function(x, ...) {
md <- read_zarr_attributes(x)
pq <- list.files(x, "\\.parquet$", full.names=TRUE)
readShape <- function(x, md = NULL, ...) {
md <- md %||% read_zarr_attributes(x)
# "shapes.parquet" currently hardcoded in SpatialData.io
pq <- paste0(x, "shapes.parquet")
df <- ddbs_open_dataset(pq, conn=.conn(), crs=NA_character_)
SpatialDataShape(data=df, meta=SpatialDataAttrs(md))
}
Expand All @@ -118,7 +126,7 @@ readShape <- function(x, ...) {
#' @importFrom S4Vectors metadata metadata<-
#' @importFrom SummarizedExperiment colData colData<-
#' @importFrom SingleCellExperiment int_colData int_colData<- int_metadata int_metadata<-
readTable <- function(x) {
readTable <- function(x, ...) {
suppressWarnings({ # suppress warnings related to hidden files
sce <- anndataR::read_zarr(x, as="SingleCellExperiment")
})
Expand All @@ -145,10 +153,19 @@ readSpatialData <- function(x,
args <- as.list(environment())[.LAYERS]
skip <- vapply(args, isFALSE, logical(1))

x <- Rarr:::.normalize_array_path(x)
store_meta <- Rarr:::.read_consolidated_metadata(x)$metadata
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Bioconductor, it seems acceptable to use internal functions (see, e.g., ZarrArray) but ideally, this will be eventually exported: Huber-group-EMBL/Rarr#116

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Spatialdata python uses zmetadata instead of .zmetadata. Is there a special reason for this? AFAIK, zarr python uses the hidden version by default.

# is.null(.$data_type) is a hack that works for both v2 and v3 Zarr stores, to keep only
# groups, but not arrays
# In v3, we could just do .$node_type == "group", but in v2, there is no node_type.
store_groups <- names(store_meta[vapply(store_meta, \(.) is.null(.$data_type), logical(1))])

# helper for layer reading
.readLayer <- \(l) {
j <- list.dirs(file.path(x, l), recursive=FALSE, full.names=TRUE)
message(" reading ", l, "...")
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put this for now because S3 reading is quite slow and I wanted to see what was happening but I don't usually love super verbose functions.

j <- store_groups[startsWith(store_groups, paste0(l, "/"))]
names(j) <- basename(j)

opt <- args[[l]]
if (!isTRUE(opt)) {
if (is.numeric(opt) && opt > (. <- length(j)))
Expand All @@ -157,8 +174,12 @@ readSpatialData <- function(x,
stop("couldn't find ", l, " of name", .)
j <- j[opt]
}
f <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1)))
lapply(j, \(.) do.call(f, list(.)))
reader <- get(paste0("read", toupper(substr(l, 1, 1)), substr(l, 2, nchar(l)-1)))
lapply(j, function(el) {
md <- store_meta[[el]]$attributes
el <- paste0(x, el, "/", recycle0 = TRUE)
reader(el, md=md)
})
}

names(ls) <- ls <- .LAYERS[!skip]
Expand Down
Loading