diff --git a/src/cpp/interface/interface.cpp b/src/cpp/interface/interface.cpp index ed7916f..96374ef 100644 --- a/src/cpp/interface/interface.cpp +++ b/src/cpp/interface/interface.cpp @@ -114,14 +114,21 @@ PYBIND11_MODULE(libbfiocpp, m) { py::enum_(m, "FileType") .value("OmeTiff", bfiocpp::FileType::OmeTiff) - .value("OmeZarr", bfiocpp::FileType::OmeZarr) + .value("OmeZarrV2", bfiocpp::FileType::OmeZarrV2) + .value("OmeZarrV3", bfiocpp::FileType::OmeZarrV3) .export_values(); m.def("get_ome_xml", &bfiocpp::GetOmeXml); // Writer class - py::class_>(m, "TsWriterCPP") - .def(py::init&, const std::vector&, const std::string&, const std::string&>()) + py::class_>(m, "TsWriterCPP") + .def(py::init&, const std::vector&, const std::string&, const std::string&, bfiocpp::FileType>(), + py::arg("filename"), + py::arg("image_shape"), + py::arg("chunk_shape"), + py::arg("dtype"), + py::arg("dimension_order"), + py::arg("file_type") = bfiocpp::FileType::OmeZarrV2) .def("write_image_data", &bfiocpp::TsWriterCPP::WriteImageData); } diff --git a/src/cpp/reader/tsreader.cpp b/src/cpp/reader/tsreader.cpp index 0db5423..5f759bd 100644 --- a/src/cpp/reader/tsreader.cpp +++ b/src/cpp/reader/tsreader.cpp @@ -21,7 +21,7 @@ TsReaderCPP::TsReaderCPP(const std::string& fname, FileType ft, const std::strin if (ft == FileType::OmeTiff){ return GetOmeTiffSpecToRead(fname); } else { - return GetZarrSpecToRead(fname); + return GetZarrSpecToRead(fname, ft); } }(); diff --git a/src/cpp/reader/tsreader.h b/src/cpp/reader/tsreader.h index 4062856..503e496 100644 --- a/src/cpp/reader/tsreader.h +++ b/src/cpp/reader/tsreader.h @@ -4,12 +4,10 @@ #include #include #include -#include -#include #include -#include #include "tensorstore/tensorstore.h" #include "../utilities/sequence.h" +#include "../utilities/utilities.h" using image_data = std::variant, std::vector, std::vector, @@ -26,9 +24,6 @@ using iter_indicies = std::tuple& image_shape, - const std::vector& chunk_shape, - const std::string& dtype){ +std::string GetZarrV3DataType(uint16_t data_type_code) { + // Zarr v3 uses plain dtype names instead of encoded format like "& image_shape, + const std::vector& chunk_shape, + const std::string& dtype, + FileType ft){ + + if (ft == FileType::OmeZarrV3) { + // Zarr v3 spec + return tensorstore::Spec::FromJson({{"driver", "zarr3"}, + {"kvstore", {{"driver", "file"}, + {"path", filename}} + }, + {"context", { + {"cache_pool", {{"total_bytes_limit", 1000000000}}}, + {"data_copy_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + {"file_io_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + }}, + {"metadata", { + {"shape", image_shape}, + {"chunk_grid", { + {"name", "regular"}, + {"configuration", {{"chunk_shape", chunk_shape}}} + }}, + {"chunk_key_encoding", {{"name", "default"}}}, + {"data_type", dtype}, + {"codecs", {{{"name", "bytes"}, {"configuration", {{"endian", "little"}}}}}} + }, + }}).value(); + } else { + // Zarr v2 spec (existing) + // valid values for dtype are subset of + // https://google.github.io/tensorstore/spec.html#json-dtype + return tensorstore::Spec::FromJson({{"driver", "zarr"}, + {"kvstore", {{"driver", "file"}, + {"path", filename}} + }, + {"context", { + {"cache_pool", {{"total_bytes_limit", 1000000000}}}, + {"data_copy_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + {"file_io_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + }}, + {"metadata", { + {"zarr_format", 2}, + {"shape", image_shape}, + {"chunks", chunk_shape}, + {"dtype", dtype}, + }, + }}).value(); + } } } // ns bfiocpp \ No newline at end of file diff --git a/src/cpp/utilities/utilities.h b/src/cpp/utilities/utilities.h index 7516a08..8195b34 100644 --- a/src/cpp/utilities/utilities.h +++ b/src/cpp/utilities/utilities.h @@ -11,16 +11,20 @@ namespace bfiocpp { +enum class FileType {OmeTiff, OmeZarrV2, OmeZarrV3}; + tensorstore::Spec GetOmeTiffSpecToRead(const std::string& filename); -tensorstore::Spec GetZarrSpecToRead(const std::string& filename); +tensorstore::Spec GetZarrSpecToRead(const std::string& filename, FileType ft); uint16_t GetDataTypeCode (std::string_view type_name); std::string GetEncodedType(uint16_t data_type_code); std::string GetUTCString(); std::string GetOmeXml(const std::string& file_path); std::tuple, std::optional, std::optional>ParseMultiscaleMetadata(const std::string& axes_list, int len); -tensorstore::Spec GetZarrSpecToWrite(const std::string& filename, - const std::vector& image_shape, +tensorstore::Spec GetZarrSpecToWrite(const std::string& filename, + const std::vector& image_shape, const std::vector& chunk_shape, - const std::string& dtype); + const std::string& dtype, + FileType ft); +std::string GetZarrV3DataType(uint16_t data_type_code); } // ns bfiocpp \ No newline at end of file diff --git a/src/cpp/writer/tswriter.cpp b/src/cpp/writer/tswriter.cpp index 121ecd0..69d641b 100644 --- a/src/cpp/writer/tswriter.cpp +++ b/src/cpp/writer/tswriter.cpp @@ -15,15 +15,20 @@ TsWriterCPP::TsWriterCPP( const std::vector& image_shape, const std::vector& chunk_shape, const std::string& dtype_str, - const std::string& dimension_order + const std::string& dimension_order, + FileType file_type ): _filename(fname), _image_shape(image_shape), _chunk_shape(chunk_shape), _dtype_code(GetDataTypeCode(dtype_str)) { - + // Use appropriate dtype encoding based on file type + std::string encoded_dtype = (file_type == FileType::OmeZarrV3) + ? GetZarrV3DataType(_dtype_code) + : GetEncodedType(_dtype_code); + TENSORSTORE_CHECK_OK_AND_ASSIGN(_source, tensorstore::Open( - GetZarrSpecToWrite(_filename, _image_shape, _chunk_shape, GetEncodedType(_dtype_code)), + GetZarrSpecToWrite(_filename, _image_shape, _chunk_shape, encoded_dtype, file_type), tensorstore::OpenMode::create | tensorstore::OpenMode::delete_existing, tensorstore::ReadWriteMode::write).result() diff --git a/src/cpp/writer/tswriter.h b/src/cpp/writer/tswriter.h index 98130a4..0120388 100644 --- a/src/cpp/writer/tswriter.h +++ b/src/cpp/writer/tswriter.h @@ -2,8 +2,10 @@ #include #include +#include #include "tensorstore/tensorstore.h" #include "../utilities/sequence.h" +#include "../utilities/utilities.h" #include namespace py = pybind11; @@ -13,11 +15,12 @@ namespace bfiocpp{ class TsWriterCPP{ public: TsWriterCPP ( - const std::string& fname, - const std::vector& image_shape, + const std::string& fname, + const std::vector& image_shape, const std::vector& chunk_shape, const std::string& dtype_str, - const std::string& dimension_order + const std::string& dimension_order, + FileType file_type = FileType::OmeZarrV2 ); void WriteImageData ( diff --git a/src/python/bfiocpp/tswriter.py b/src/python/bfiocpp/tswriter.py index 4867201..d91e672 100644 --- a/src/python/bfiocpp/tswriter.py +++ b/src/python/bfiocpp/tswriter.py @@ -1,6 +1,6 @@ import numpy as np from typing import Optional -from .libbfiocpp import TsWriterCPP, Seq +from .libbfiocpp import TsWriterCPP, Seq, FileType class TSWriter: @@ -12,14 +12,20 @@ def __init__( chunk_shape: list, dtype: np.dtype, dimension_order: str, + file_type: FileType = FileType.OmeZarrV2, ): """Initialize tensorstore Zarr writer file_name: Path to write file to + image_shape: Shape of the image [T, C, Z, Y, X] + chunk_shape: Shape of chunks [T, C, Z, Y, X] + dtype: Data type of the image + dimension_order: Order of dimensions (e.g., "TCZYX") + file_type: FileType.OmeZarrV2 (default) or FileType.OmeZarrV3 """ self._image_writer: TsWriterCPP = TsWriterCPP( - file_name, image_shape, chunk_shape, str(dtype), dimension_order + file_name, image_shape, chunk_shape, str(dtype), dimension_order, file_type ) def write_image_data( diff --git a/tests/test_read.py b/tests/test_read.py index f59bf30..e97adc6 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -211,7 +211,7 @@ def test_read_zarr_2d_slice(self): """test_read_zarr_2d_slice - Read tiff using TSReader""" br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) assert br._X == 2702 @@ -235,7 +235,7 @@ def test_read_zarr_4d_slice(self): """test_read_zarr_4d_slice - Read tiff using TSReader""" br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) diff --git a/tests/test_write.py b/tests/test_write.py index aef1c55..92eb662 100644 --- a/tests/test_write.py +++ b/tests/test_write.py @@ -59,7 +59,7 @@ def test_write_zarr_5d(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -92,7 +92,7 @@ def test_write_zarr_5d(self): br = TSReader( str(test_file_path), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -107,7 +107,7 @@ def test_write_zarr_3d(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -140,7 +140,7 @@ def test_write_zarr_3d(self): br = TSReader( str(test_file_path), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -155,7 +155,7 @@ def test_write_zarr_chunk_5d(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -202,7 +202,7 @@ def test_write_zarr_chunk_5d(self): br = TSReader( str(test_file_path), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -225,7 +225,7 @@ def test_write_zarr_3d(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) assert br._X == 2702 @@ -260,7 +260,7 @@ def test_write_zarr_3d(self): br = TSReader( str(test_file_path), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -276,7 +276,7 @@ def test_write_zarr_chunk_3d(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) assert br._X == 2702 @@ -331,7 +331,7 @@ def test_write_zarr_chunk_3d(self): br = TSReader( str(test_file_path), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -351,7 +351,7 @@ def test_invalid_dimension_order_no_X(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -374,7 +374,7 @@ def test_invalid_dimension_order_no_Y(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -397,7 +397,7 @@ def test_invalid_dimension_order_character(self): br = TSReader( str(TEST_DIR.joinpath("5025551.zarr/0")), - FileType.OmeZarr, + FileType.OmeZarrV2, "", ) @@ -414,4 +414,146 @@ def test_invalid_dimension_order_character(self): chunk_size = (1,1,1,2700,2702) with self.assertRaises(Exception): - TSWriter(test_file_path, tmp.shape, chunk_size, str(tmp.dtype), "ATCZYX") \ No newline at end of file + TSWriter(test_file_path, tmp.shape, chunk_size, str(tmp.dtype), "ATCZYX") + + +class TestZarrV3Write(unittest.TestCase): + """Tests for Zarr v3 write support""" + + def test_write_zarr_v3_basic(self): + """Test basic Zarr v3 write and read-back""" + with tempfile.TemporaryDirectory() as dir: + test_file_path = os.path.join(dir, 'test_v3.zarr') + + # Create test data + shape = [1, 1, 1, 100, 100] + chunk_shape = [1, 1, 1, 64, 64] + test_data = np.arange(100 * 100, dtype=np.uint16).reshape(shape) + + # Write using v3 format + bw = TSWriter(test_file_path, shape, chunk_shape, "uint16", "TCZYX", FileType.OmeZarrV3) + rows = Seq(0, 99, 1) + cols = Seq(0, 99, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + bw.write_image_data(test_data, rows, cols, layers, channels, tsteps) + bw.close() + + # Verify zarr.json exists (v3 format indicator) + zarr_json_path = os.path.join(test_file_path, 'zarr.json') + self.assertTrue(os.path.exists(zarr_json_path), "zarr.json should exist for v3 format") + + # Verify .zarray does NOT exist (v2 format indicator) + zarray_path = os.path.join(test_file_path, '.zarray') + self.assertFalse(os.path.exists(zarray_path), ".zarray should not exist for v3 format") + + # Read back using v3 reader + br = TSReader(test_file_path, FileType.OmeZarrV3, "TCZYX") + read_data = br.data(rows, cols, layers, channels, tsteps) + + # Verify data integrity + self.assertEqual(read_data.shape, tuple(shape)) + self.assertEqual(read_data.dtype, np.uint16) + self.assertTrue(np.array_equal(read_data, test_data)) + + def test_write_zarr_v3_multiple_dtypes(self): + """Test Zarr v3 write with different data types""" + dtypes_to_test = [ + ("uint8", np.uint8), + ("uint16", np.uint16), + ("uint32", np.uint32), + ("float32", np.float32), + ] + + with tempfile.TemporaryDirectory() as dir: + for dtype_str, np_dtype in dtypes_to_test: + test_file_path = os.path.join(dir, f'test_v3_{dtype_str}.zarr') + + shape = [1, 1, 1, 50, 50] + chunk_shape = [1, 1, 1, 32, 32] + test_data = np.ones(shape, dtype=np_dtype) * 42 + + bw = TSWriter(test_file_path, shape, chunk_shape, dtype_str, "TCZYX", FileType.OmeZarrV3) + rows = Seq(0, 49, 1) + cols = Seq(0, 49, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + bw.write_image_data(test_data, rows, cols, layers, channels, tsteps) + bw.close() + + # Verify v3 format + self.assertTrue(os.path.exists(os.path.join(test_file_path, 'zarr.json')), + f"zarr.json should exist for {dtype_str}") + + # Read back and verify + br = TSReader(test_file_path, FileType.OmeZarrV3, "TCZYX") + read_data = br.data(rows, cols, layers, channels, tsteps) + self.assertEqual(read_data.dtype, np_dtype, f"dtype mismatch for {dtype_str}") + self.assertTrue(np.allclose(read_data, test_data), f"data mismatch for {dtype_str}") + + def test_write_zarr_v3_chunked(self): + """Test Zarr v3 write with chunked writes""" + with tempfile.TemporaryDirectory() as dir: + test_file_path = os.path.join(dir, 'test_v3_chunked.zarr') + + shape = [1, 1, 1, 200, 200] + chunk_shape = [1, 1, 1, 100, 100] + + bw = TSWriter(test_file_path, shape, chunk_shape, "uint16", "TCZYX", FileType.OmeZarrV3) + + # Write in 4 chunks (2x2 grid) + for y_start in [0, 100]: + for x_start in [0, 100]: + chunk_data = np.full([1, 1, 1, 100, 100], + fill_value=(y_start + x_start), dtype=np.uint16) + rows = Seq(y_start, y_start + 99, 1) + cols = Seq(x_start, x_start + 99, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + bw.write_image_data(chunk_data, rows, cols, layers, channels, tsteps) + + bw.close() + + # Read back full image + br = TSReader(test_file_path, FileType.OmeZarrV3, "TCZYX") + rows = Seq(0, 199, 1) + cols = Seq(0, 199, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + read_data = br.data(rows, cols, layers, channels, tsteps) + + # Verify each quadrant has correct value + self.assertTrue(np.all(read_data[0, 0, 0, :100, :100] == 0)) # top-left + self.assertTrue(np.all(read_data[0, 0, 0, :100, 100:] == 100)) # top-right + self.assertTrue(np.all(read_data[0, 0, 0, 100:, :100] == 100)) # bottom-left + self.assertTrue(np.all(read_data[0, 0, 0, 100:, 100:] == 200)) # bottom-right + + def test_write_zarr_v2_default(self): + """Test that default write (no FileType) creates v2 format""" + with tempfile.TemporaryDirectory() as dir: + test_file_path = os.path.join(dir, 'test_v2_default.zarr') + + shape = [1, 1, 1, 50, 50] + chunk_shape = [1, 1, 1, 32, 32] + test_data = np.ones(shape, dtype=np.uint16) + + # Write without specifying FileType (should default to v2) + bw = TSWriter(test_file_path, shape, chunk_shape, "uint16", "TCZYX") + rows = Seq(0, 49, 1) + cols = Seq(0, 49, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + bw.write_image_data(test_data, rows, cols, layers, channels, tsteps) + bw.close() + + # Verify .zarray exists (v2 format) + self.assertTrue(os.path.exists(os.path.join(test_file_path, '.zarray')), + ".zarray should exist for default v2 format") + # Verify zarr.json does NOT exist + self.assertFalse(os.path.exists(os.path.join(test_file_path, 'zarr.json')), + "zarr.json should not exist for v2 format") \ No newline at end of file