From 5c588fcd7cda9bc868dcf25743b2ebbe7955a414 Mon Sep 17 00:00:00 2001
From: Martin Helm <martinh@miltenyi.com>
Date: Wed, 10 Dec 2025 09:51:14 +0000
Subject: [PATCH 1/5] Parse cycle from ame in tiff metadata, not actual
 filename

---
 src/spatialdata_io/readers/macsima.py | 15 ++++++++++++++-
 tests/test_macsima.py                 | 25 +++++++++++++++++++++----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py
index 8bbe0532..b128eeea 100644
--- a/src/spatialdata_io/readers/macsima.py
+++ b/src/spatialdata_io/readers/macsima.py
@@ -13,6 +13,7 @@
 import pandas as pd
 import spatialdata as sd
 from dask_image.imread import imread
+from ome_types import from_tiff
 from spatialdata import SpatialData
 from spatialdata._logging import logger
 
@@ -64,7 +65,11 @@ def from_paths(
         cycles = []
         channels = []
         for p in path_files:
-            cycle = parse_name_to_cycle(p.stem)
+            try:
+                cycle = parse_cycle_from_tiff(p)
+            except ValueError:
+                logger.debug(f"Cannot parse cycle from OME metadata from {p}. Falling back to parsing from file name.")
+                cycle = parse_name_to_cycle(p.stem)
             cycles.append(cycle)
             try:
                 with warnings.catch_warnings():
@@ -300,6 +305,14 @@ def macsima(
         raise NotImplementedError("Parsing raw MACSima data is not yet implemented.")
 
 
+def parse_cycle_from_tiff(path: Path) -> int:
+    """Parse the cycle number from image name stored in tiff metadata."""
+    # Parsing from the metadata is more robust, as files can be easily renamed by users
+    images = from_tiff(path).images
+    name = images[0].name
+    return parse_name_to_cycle(name)
+
+
 def parse_name_to_cycle(name: str) -> int:
     """Parse the cycle number from the name of the image."""
     cycle = name.split("_")[0]
diff --git a/tests/test_macsima.py b/tests/test_macsima.py
index 63d6ca24..abe4f76d 100644
--- a/tests/test_macsima.py
+++ b/tests/test_macsima.py
@@ -15,16 +15,20 @@
     ChannelMetadata,
     MultiChannelImage,
     macsima,
+    parse_cycle_from_tiff,
     parse_name_to_cycle,
 )
 from tests._utils import skip_if_below_python_version
 
 RNG = da.random.default_rng(seed=0)
 
-if not (Path("./data/Lung_adc_demo").exists() or Path("./data/MACSimaData_HCA").exists()):
+if not (
+    Path("./data/Lung_adc_demo").exists() or Path("./data/MACSimaData_HCA").exists() or Path("./data/OMAP23").exists()
+):
     pytest.skip(
-        "Requires the Lung_adc_demo or MACSimaData_HCA datasets, please check "
-        "https://github.com/giovp/spatialdata-sandbox/macsima/Readme.md for instructions on how to get the data.",
+        "Requires the Lung_adc_demo or MACSimaData_HCA or OMAP23 datasets, please check "
+        "https://github.com/giovp/spatialdata-sandbox/blob/main/macsima_io/Readme.md for instructions on how to get the data."
+        "The OMAP23 dataset can be downloaded from https://zenodo.org/records/14008816",
         allow_module_level=True,
     )
 
@@ -140,7 +144,7 @@ def test_cycle_metadata(dataset: str, expected: list[str]) -> None:
     sdata = macsima(f, c_subset=3)
     table = sdata[list(sdata.tables.keys())[0]]
 
-    # get the channel names
+    # get the channel cycles
     cycles = table.var["cycle"]
     assert list(cycles) == expected
 
@@ -150,6 +154,19 @@ def test_parsing_style() -> None:
         macsima(Path(), parsing_style="not_a_parsing_style")
 
 
+@pytest.mark.parametrize(
+    "dataset,image,expected",
+    [("OMAP23", "OMAP23_CD3E.ome.tif", 1), ("OMAP23", "OMAP23_CD4.ome.tif", 4)],
+)
+def test_parsing_of_tiff_metadata_to_cycle(dataset: str, image: str, expected: int) -> None:
+    f = Path("./data") / dataset / image
+    assert f.is_file()
+
+    cycles = parse_cycle_from_tiff(f)
+
+    assert cycles == expected
+
+
 @pytest.mark.parametrize(
     "name,expected",
     [

From 8b49711f06a9356a122ef82816de4974afd5363a Mon Sep 17 00:00:00 2001
From: Martin Helm <martin@bio-ai.org>
Date: Mon, 15 Dec 2025 22:16:36 +0000
Subject: [PATCH 2/5] Parse additional metadata from filename stored in ome
 tiff

---
 src/spatialdata_io/readers/macsima.py | 238 +++++++++++++++++++++++---
 tests/test_macsima.py                 |  10 +-
 2 files changed, 221 insertions(+), 27 deletions(-)

diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py
index b128eeea..461cfd07 100644
--- a/src/spatialdata_io/readers/macsima.py
+++ b/src/spatialdata_io/readers/macsima.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+import os
+import re
 import warnings
 from collections import defaultdict
 from copy import deepcopy
@@ -45,6 +47,15 @@ class ChannelMetadata:
 
     name: str
     cycle: int
+    stainnumber: int | None = None
+    imagetype: str | None = None
+    region: int | None = None
+    well: str | None = None
+    roi: int | None = None
+    fluorophore: str | None = None
+    field: str | None = None
+    clone: str | None = None
+    exposure: float | None = None
 
 
 @dataclass
@@ -62,15 +73,9 @@ def from_paths(
         imread_kwargs: Mapping[str, Any],
         skip_rounds: list[int] | None = None,
     ) -> MultiChannelImage:
-        cycles = []
-        channels = []
+        channel_metadata: list[ChannelMetadata] = []
         for p in path_files:
-            try:
-                cycle = parse_cycle_from_tiff(p)
-            except ValueError:
-                logger.debug(f"Cannot parse cycle from OME metadata from {p}. Falling back to parsing from file name.")
-                cycle = parse_name_to_cycle(p.stem)
-            cycles.append(cycle)
+            cycle = parse_cycle(p)
             try:
                 with warnings.catch_warnings():
                     warnings.simplefilter("ignore")
@@ -81,24 +86,57 @@ def from_paths(
                         UserWarning,
                         stacklevel=2,
                     )
-                channels.append(channel_names[0])
+                channel = channel_names[0]
             except ValueError as e:
                 warnings.warn(
                     f"Cannot parse OME metadata from {p}. Error: {e}. Skipping this file.", UserWarning, stacklevel=2
                 )
+                continue
+
+            # Try to parse additional metadata
+            # If it succeeds, prefer the channelname from this additional metadata over the simple parsing.
+            try:
+                additional_metadata = parse_additional_metadata(p, convert_numerics=True)
+            except Exception as e:
+                warnings.warn(f"Cannot parse additional metadata from {p.stem}. Error: {e}.", stacklevel=2)
+                additional_metadata = None
+
+            if additional_metadata is not None:
+                # Rename markername for a background image, to prevent it from duplicating the stain image name
+                if additional_metadata["imagetype"] == "bleach":
+                    additional_metadata["marker"] = additional_metadata["marker"] + "_background"
+
+                channel_metadata.append(
+                    ChannelMetadata(
+                        name=additional_metadata["marker"],
+                        cycle=cycle,
+                        stainnumber=additional_metadata["stainnumber"],
+                        imagetype=additional_metadata["imagetype"],
+                        region=additional_metadata["region"],
+                        well=additional_metadata["well"],
+                        roi=additional_metadata["roi"],
+                        fluorophore=additional_metadata["channel"],
+                        field=additional_metadata["field"],
+                        clone=additional_metadata["clone"],
+                        exposure=additional_metadata["exposure"],
+                    )
+                )
+            else:
+                # Only the minimal required metadata
+                channel_metadata.append(ChannelMetadata(name=channel, cycle=cycle))
 
-        if len(path_files) != len(cycles) or len(path_files) != len(channels):
-            raise ValueError("Length of path_files, cycles and channels must be the same.")
+        if len(path_files) != len(channel_metadata):
+            raise ValueError("Length of path_files and metadata must be the same.")
         # if any of round_channels is in skip_rounds, remove that round from the list and from path_files
         if skip_rounds:
             logger.info(f"Skipping cycles: {skip_rounds}")
-            path_files, cycles, channels = map(
+            path_files, channel_metadata = map(
                 list,
                 zip(
                     *[
-                        (p, c, ch)
-                        for p, c, ch in zip(path_files, cycles, channels, strict=True)
-                        if c not in skip_rounds
+                        (p, ch_meta)
+                        for p, ch_meta in zip(path_files, channel_metadata, strict=True)
+                        if ch_meta.cycle not in skip_rounds
                     ],
                     strict=True,
                 ),
@@ -113,7 +151,7 @@ def from_paths(
         # create MultiChannelImage object with imgs and metadata
         output = cls(
             data=imgs,
-            metadata=[ChannelMetadata(name=ch, cycle=c) for c, ch in zip(cycles, channels, strict=True)],
+            metadata=channel_metadata,
         )
         return output
 
@@ -155,6 +193,42 @@ def get_cycles(self) -> list[int]:
         """Get the cycle numbers."""
         return [c.cycle for c in self.metadata]
 
+    def get_stainnumbers(self) -> list[int | None]:
+        """Get the staining numbers."""
+        return [c.stainnumber for c in self.metadata]
+
+    def get_image_types(self) -> list[str | None]:
+        """Get the staining types (stain or bleach)."""
+        return [c.imagetype for c in self.metadata]
+
+    def get_regions(self) -> list[int | None]:
+        """Get the regions."""
+        return [c.region for c in self.metadata]
+
+    def get_wells(self) -> list[str | None]:
+        """Get the wells."""
+        return [c.well for c in self.metadata]
+
+    def get_rois(self) -> list[int | None]:
+        """Get the ROIs."""
+        return [c.roi for c in self.metadata]
+
+    def get_fluorophores(self) -> list[str | None]:
+        """Get the fluorophores."""
+        return [c.fluorophore for c in self.metadata]
+
+    def get_fields(self) -> list[str | None]:
+        """Get the fields."""
+        return [c.field for c in self.metadata]
+
+    def get_clones(self) -> list[str | None]:
+        """Get the clones."""
+        return [c.clone for c in self.metadata]
+
+    def get_exposures(self) -> list[float | None]:
+        """Get the exposures."""
+        return [c.exposure for c in self.metadata]
+
     def sort_by_channel(self) -> None:
         """Sort the channels by cycle number."""
         self.data = [d for _, d in sorted(zip(self.metadata, self.data, strict=True), key=lambda x: x[0].cycle)]
@@ -305,15 +379,16 @@ def macsima(
         raise NotImplementedError("Parsing raw MACSima data is not yet implemented.")
 
 
-def parse_cycle_from_tiff(path: Path) -> int:
-    """Parse the cycle number from image name stored in tiff metadata."""
-    # Parsing from the metadata is more robust, as files can be easily renamed by users
-    images = from_tiff(path).images
+def _extract_filename_from_tif_metadata(path: Path) -> str | Any:
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        images = from_tiff(path).images
+
     name = images[0].name
-    return parse_name_to_cycle(name)
+    return name
 
 
-def parse_name_to_cycle(name: str) -> int:
+def _parse_name_to_cycle(name: str) -> int:
     """Parse the cycle number from the name of the image."""
     cycle = name.split("_")[0]
     if "-" in cycle:
@@ -321,6 +396,102 @@ def parse_name_to_cycle(name: str) -> int:
     return int(cycle)
 
 
+def parse_cycle(path: Path) -> int:
+    """Parse the cycle number.
+
+    Ideally the image name stored in tiff metadata is used. If this is not available it falls back to the filename itself
+    """
+    # Parsing from the metadata is more robust, as files can be easily renamed by users
+    try:
+        name = _extract_filename_from_tif_metadata(path)
+    except Exception:
+        logger.debug(f"Cannot parse cycle from OME metadata from {path.stem}. Falling back to parsing from file name.")
+        name = path.stem
+    return _parse_name_to_cycle(name)
+
+
+def parse_additional_metadata(path: Path, convert_numerics: bool = True) -> dict[str, Any]:
+    """Parse metadata from filename stored in tiff metadata."""
+    try:
+        filename = _extract_filename_from_tif_metadata(path)
+    except Exception:
+        logger.debug(
+            f"Cannot parse complete metadata from OME metadata from {path.stem}. "
+            f"Falling back to parsing from file name."
+        )
+        filename = path.name
+
+    lower = filename.lower()
+    if lower.endswith(".ome.tiff"):
+        filename = filename[: -len(".ome.tiff")]
+    elif lower.endswith(".ome.tif"):
+        filename = filename[: -len(".ome.tif")]
+    else:
+        filename, _ = os.path.splitext(filename)
+
+    pattern = re.compile(
+        r"^"
+        r"(?:CYC-(?P<cycle>\d+)_)?"  # cycle is optional for the autofluorescence images
+        r"SCN-(?P<stainnumber>\d+)_"
+        r"ST-(?P<imagetype>[A-Z]+)_"
+        r"R-(?P<region>\d+)_"
+        r"W-(?P<well>[A-Z]\d+)_"
+        r"ROI-(?P<roi>\d+)_"
+        r"F-(?P<field>\d+)"
+        r"(?:_A-(?P<marker>.*?)(?=(?:_C-|_D-)))?"  # marker optional; capture until _C- or _D-
+        r"(?:_C-(?P<clone>.*?)(?=_D-))?"  # clone optional; capture until _D-
+        r"_D-(?P<channel>[A-Za-z0-9-]+)_"
+        r"EXP-(?P<exposure>\d+(?:\.\d+)?)"  # Allow integer or float exposures
+        r"$",
+        re.ASCII,
+    )
+
+    m = pattern.match(filename)
+    if m is None:
+        raise ValueError(f"Error when trying to extract metadata for {path.stem}")
+
+    metadata = m.groupdict()
+
+    # If cycle was not matched we assume these are autofluorescence images
+    # The true cycle number in this case will be extracted from parse_cycle
+    if metadata["cycle"] is None:
+        if metadata["imagetype"] != "AF":
+            raise ValueError("Expected a cycle for non-autofluorescence channels, but didnt find one")
+
+    # For DAPI stain images, the marker metadata is empty. In these cases replace it with the channel, which also says "DAPI"
+    if metadata["marker"] is None:
+        metadata["marker"] = metadata["channel"]
+
+    # Rename imagetype to something more easy to understand
+    if metadata["imagetype"]:
+        imagetype_rename_dict = {"S": "stain", "B": "bleach", "AF": "autofluorescence"}
+        metadata["imagetype"] = imagetype_rename_dict[metadata["imagetype"]]
+
+    if convert_numerics:
+
+        def to_int(field_name: str, s: str) -> int:
+            m = re.search(r"\d+", s)
+            if not m:
+                raise ValueError(f"Expected numeric part in {field_name!r}: '{s}'")
+            return int(m.group(0))
+
+        integers_to_convert = ["cycle", "stainnumber", "region", "roi", "field"]
+        for int_field in integers_to_convert:
+            if metadata[int_field] is not None:
+                metadata[int_field] = to_int(int_field, metadata[int_field])
+
+        def to_float(field_name: str, s: str) -> float:
+            m = re.search(r"\d+", s)
+            if not m:
+                raise ValueError(f"Expected numeric part in {field_name!r}: '{s}'")
+            return float(m.group(0))
+
+        if metadata["exposure"] is not None:
+            metadata["exposure"] = to_float("exposure", metadata["exposure"])
+
+    return metadata
+
+
 def parse_processed_folder(
     path: Path,
     imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
@@ -453,12 +624,35 @@ def create_sdata(
 def create_table(mci: MultiChannelImage) -> ad.AnnData:
     cycles = mci.get_cycles()
     names = mci.get_channel_names()
+    stainnumbers = mci.get_stainnumbers()
+    imagetypes = mci.get_image_types()
+    regions = mci.get_regions()
+    wells = mci.get_wells()
+    rois = mci.get_rois()
+    fluorophores = mci.get_fluorophores()
+    fields = mci.get_fields()
+    clones = mci.get_clones()
+    exposures = mci.get_exposures()
+
     df = pd.DataFrame(
         {
             "name": names,
             "cycle": cycles,
+            "stainnumber": stainnumbers,
+            "imagetype": imagetypes,
+            "region": regions,
+            "well": wells,
+            "ROI": rois,
+            "fluorophore": fluorophores,
+            "field": fields,
+            "clone": clones,
+            "exposure": exposures,
         }
     )
+
+    # Replace missing data. This happens mostly in the clone column.
+    df = df.replace({None: pd.NA, "": pd.NA})
+
     table = ad.AnnData(var=df)
     table.var_names = names
     return sd.models.TableModel.parse(table)
diff --git a/tests/test_macsima.py b/tests/test_macsima.py
index abe4f76d..1f1dc898 100644
--- a/tests/test_macsima.py
+++ b/tests/test_macsima.py
@@ -14,9 +14,9 @@
 from spatialdata_io.readers.macsima import (
     ChannelMetadata,
     MultiChannelImage,
+    _parse_name_to_cycle,
     macsima,
-    parse_cycle_from_tiff,
-    parse_name_to_cycle,
+    parse_cycle,
 )
 from tests._utils import skip_if_below_python_version
 
@@ -162,7 +162,7 @@ def test_parsing_of_tiff_metadata_to_cycle(dataset: str, image: str, expected: i
     f = Path("./data") / dataset / image
     assert f.is_file()
 
-    cycles = parse_cycle_from_tiff(f)
+    cycles = parse_cycle(f)
 
     assert cycles == expected
 
@@ -175,7 +175,7 @@ def test_parsing_of_tiff_metadata_to_cycle(dataset: str, image: str, expected: i
     ],
 )
 def test_parsing_of_name_to_cycle(name: str, expected: int) -> None:
-    result = parse_name_to_cycle(name)
+    result = _parse_name_to_cycle(name)
     assert result == expected
 
 
@@ -224,7 +224,7 @@ def test_mci_array_reference() -> None:
 
 @skip_if_below_python_version()
 @pytest.mark.parametrize("dataset", ["Lung_adc_demo", "MACSimaData_HCA/HumanLiverH35"])
-def test_cli_macimsa(runner: CliRunner, dataset: str) -> None:
+def test_cli_macsima(runner: CliRunner, dataset: str) -> None:
     f = Path("./data") / dataset
     assert f.is_dir()
     with TemporaryDirectory() as tmpdir:

From 82d304f83b37cac1e3b975d904230292a9d93a0b Mon Sep 17 00:00:00 2001
From: Martin Helm <martin@bio-ai.org>
Date: Tue, 16 Dec 2025 11:49:17 +0000
Subject: [PATCH 3/5] Parse all metadata from ome metadata, nothing from
 filename

---
 src/spatialdata_io/readers/macsima.py | 394 +++++++++++++++-----------
 tests/test_macsima.py                 | 311 ++++++++++++++++++--
 2 files changed, 514 insertions(+), 191 deletions(-)

diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py
index 461cfd07..7c04bb2f 100644
--- a/src/spatialdata_io/readers/macsima.py
+++ b/src/spatialdata_io/readers/macsima.py
@@ -15,7 +15,7 @@
 import pandas as pd
 import spatialdata as sd
 from dask_image.imread import imread
-from ome_types import from_tiff
+from ome_types import OME, from_tiff
 from spatialdata import SpatialData
 from spatialdata._logging import logger
 
@@ -47,15 +47,12 @@ class ChannelMetadata:
 
     name: str
     cycle: int
-    stainnumber: int | None = None
-    imagetype: str | None = None
-    region: int | None = None
-    well: str | None = None
-    roi: int | None = None
-    fluorophore: str | None = None
-    field: str | None = None
-    clone: str | None = None
-    exposure: float | None = None
+    imagetype: str
+    well: str
+    roi: int
+    fluorophore: str
+    exposure: float
+    clone: str | None = None  # For example DAPI doesnt have a clone
 
 
 @dataclass
@@ -75,55 +72,26 @@ def from_paths(
     ) -> MultiChannelImage:
         channel_metadata: list[ChannelMetadata] = []
         for p in path_files:
-            cycle = parse_cycle(p)
             try:
-                with warnings.catch_warnings():
-                    warnings.simplefilter("ignore")
-                    channel_names = parse_channels(p)
-                if len(channel_names) > 1:
-                    warnings.warn(
-                        f"Found multiple channels in OME-TIFF file {p}. Only the first one will be used.",
-                        UserWarning,
-                        stacklevel=2,
-                    )
-                channel = channel_names[0]
+                metadata = parse_metadata(p)
             except ValueError as e:
                 warnings.warn(
                     f"Cannot parse OME metadata from {p}. Error: {e}. Skipping this file.", UserWarning, stacklevel=2
                 )
                 continue
 
-            # Try to parse additional metadata
-            # If it succeeds, prefer the channelname from this additional metadata over the simple parsing.
-            try:
-                additional_metadata = parse_additional_metadata(p, convert_numerics=True)
-            except Exception as e:
-                warnings.warn(f"Cannot parse additional metadata from {p.stem}. Error: {e}.", stacklevel=2)
-                additional_metadata = None
-
-            if additional_metadata is not None:
-                # Rename markername for a background image, to prevent it from duplicating the stain image name
-                if additional_metadata["imagetype"] == "bleach":
-                    additional_metadata["marker"] = additional_metadata["marker"] + "_background"
-
-                channel_metadata.append(
-                    ChannelMetadata(
-                        name=additional_metadata["marker"],
-                        cycle=cycle,
-                        stainnumber=additional_metadata["stainnumber"],
-                        imagetype=additional_metadata["imagetype"],
-                        region=additional_metadata["region"],
-                        well=additional_metadata["well"],
-                        roi=additional_metadata["roi"],
-                        fluorophore=additional_metadata["channel"],
-                        field=additional_metadata["field"],
-                        clone=additional_metadata["clone"],
-                        exposure=additional_metadata["exposure"],
-                    )
+            channel_metadata.append(
+                ChannelMetadata(
+                    name=metadata["name"],
+                    cycle=metadata["cycle"],
+                    imagetype=metadata["imagetype"],
+                    well=metadata["well"],
+                    roi=metadata["roi"],
+                    fluorophore=metadata["fluorophore"],
+                    clone=metadata["clone"],
+                    exposure=metadata["exposure"],
                 )
-            else:
-                # Only the minimal required metadata
-                channel_metadata.append(ChannelMetadata(name=channel, cycle=cycle))
+            )
 
         if len(path_files) != len(channel_metadata):
             raise ValueError("Length of path_files and metadata must be the same.")
@@ -193,18 +161,10 @@ def get_cycles(self) -> list[int]:
         """Get the cycle numbers."""
         return [c.cycle for c in self.metadata]
 
-    def get_stainnumbers(self) -> list[int | None]:
-        """Get the staining numbers."""
-        return [c.stainnumber for c in self.metadata]
-
     def get_image_types(self) -> list[str | None]:
         """Get the staining types (stain or bleach)."""
         return [c.imagetype for c in self.metadata]
 
-    def get_regions(self) -> list[int | None]:
-        """Get the regions."""
-        return [c.region for c in self.metadata]
-
     def get_wells(self) -> list[str | None]:
         """Get the wells."""
         return [c.well for c in self.metadata]
@@ -217,10 +177,6 @@ def get_fluorophores(self) -> list[str | None]:
         """Get the fluorophores."""
         return [c.fluorophore for c in self.metadata]
 
-    def get_fields(self) -> list[str | None]:
-        """Get the fields."""
-        return [c.field for c in self.metadata]
-
     def get_clones(self) -> list[str | None]:
         """Get the clones."""
         return [c.clone for c in self.metadata]
@@ -379,117 +335,234 @@ def macsima(
         raise NotImplementedError("Parsing raw MACSima data is not yet implemented.")
 
 
-def _extract_filename_from_tif_metadata(path: Path) -> str | Any:
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        images = from_tiff(path).images
+def _collect_map_annotation_values(ome: OME) -> dict[str, Any]:
+    """Collapse structured_annotations from OME into dictionary.
 
-    name = images[0].name
-    return name
+    Collects all key/value pairs from all map_annotations ins structured_annotations into a single flat dictionary.
+    If a key appears multiple times across annotations, the *first*
+    occurrence wins and later occurrences are ignored.
+    """
+    merged: dict[str, Any] = {}
+
+    sa = getattr(ome, "structured_annotations", None)
+    map_annotations = getattr(sa, "map_annotations", []) if sa else []
+
+    for ma in map_annotations:
+        raw_value = ma.value
+        value = raw_value.dict()
 
+        for k, v in value.items():
+            if k not in merged:
+                merged[k] = v
 
-def _parse_name_to_cycle(name: str) -> int:
-    """Parse the cycle number from the name of the image."""
-    cycle = name.split("_")[0]
-    if "-" in cycle:
-        cycle = cycle.split("-")[1]
-    return int(cycle)
+    return merged
 
 
-def parse_cycle(path: Path) -> int:
-    """Parse the cycle number.
+def _get_software_version(ma_values: dict[str, Any]) -> str:
+    """Extract the software version string from the flattened map-annotation values.
 
-    Ideally the image name stored in tiff metadata is used. If this is not available it falls back to the filename itself
+    Supports both:
+      - 'Software version'  (v0)
+      - 'SoftwareVersion'   (v1)
     """
-    # Parsing from the metadata is more robust, as files can be easily renamed by users
-    try:
-        name = _extract_filename_from_tif_metadata(path)
-    except Exception:
-        logger.debug(f"Cannot parse cycle from OME metadata from {path.stem}. Falling back to parsing from file name.")
-        name = path.stem
-    return _parse_name_to_cycle(name)
-
-
-def parse_additional_metadata(path: Path, convert_numerics: bool = True) -> dict[str, Any]:
-    """Parse metadata from filename stored in tiff metadata."""
-    try:
-        filename = _extract_filename_from_tif_metadata(path)
-    except Exception:
-        logger.debug(
-            f"Cannot parse complete metadata from OME metadata from {path.stem}. "
-            f"Falling back to parsing from file name."
-        )
-        filename = path.name
+    for key in ("SoftwareVersion", "Software version"):
+        v = ma_values.get(key)
+        if isinstance(v, str) and v.strip():
+            return v.strip()
+
+    raise ValueError("Could not extract Software Version from OME metadata.")
+
+
+def _get_software_major_version(version: str) -> int:
+    """Parse the major component of a semantic version string."""
+    s = version.strip()
+    if s.startswith(("v", "V")):
+        s = s[1:]
+    parts = s.split(".")
+    if not parts:
+        raise ValueError("Could not extract major software version part from version string.")
+
+    return int(parts[0])
+
+
+def _parse_v0_ome_metadata(ome: OME) -> dict[str, Any]:
+    """Parse ome."""
+    metadata: dict[str, Any] = {
+        "name": None,
+        "clone": None,
+        "fluorophore": None,
+        "cycle": None,
+        "imagetype": None,
+        "well": None,
+        "roi": None,
+        "exposure": None,
+    }
 
-    lower = filename.lower()
-    if lower.endswith(".ome.tiff"):
-        filename = filename[: -len(".ome.tiff")]
-    elif lower.endswith(".ome.tif"):
-        filename = filename[: -len(".ome.tif")]
-    else:
-        filename, _ = os.path.splitext(filename)
-
-    pattern = re.compile(
-        r"^"
-        r"(?:CYC-(?P<cycle>\d+)_)?"  # cycle is optional for the autofluorescence images
-        r"SCN-(?P<stainnumber>\d+)_"
-        r"ST-(?P<imagetype>[A-Z]+)_"
-        r"R-(?P<region>\d+)_"
-        r"W-(?P<well>[A-Z]\d+)_"
-        r"ROI-(?P<roi>\d+)_"
-        r"F-(?P<field>\d+)"
-        r"(?:_A-(?P<marker>.*?)(?=(?:_C-|_D-)))?"  # marker optional; capture until _C- or _D-
-        r"(?:_C-(?P<clone>.*?)(?=_D-))?"  # clone optional; capture until _D-
-        r"_D-(?P<channel>[A-Za-z0-9-]+)_"
-        r"EXP-(?P<exposure>\d+(?:\.\d+)?)"  # Allow integer or float exposures
-        r"$",
-        re.ASCII,
-    )
+    antigen = None
+    clone = None
+
+    if ome.screens:
+        screen0 = ome.screens[0]
+        reagents = getattr(screen0, "reagents", [])
+        if reagents:
+            r0 = reagents[0]
+            name = getattr(r0, "name", None)
+            if isinstance(name, str) and name:
+                if "__" in name:
+                    antigen, clone = name.split("__", 1)
+                else:
+                    antigen = name
+                    clone = None
+
+    metadata["name"] = antigen
+    metadata["clone"] = clone
+
+    ma_values = _collect_map_annotation_values(ome)
+
+    if "Fluorochrome" in ma_values:
+        metadata["fluorophore"] = ma_values["Fluorochrome"]
+
+    if "Exposure time" in ma_values:
+        exp_time = ma_values["Exposure time"]
+        try:
+            metadata["exposure"] = float(exp_time)
+        except (TypeError, ValueError):
+            metadata["exposure"] = None
+
+    if "Cycle" in ma_values:
+        cyc = ma_values["Cycle"]
+        try:
+            metadata["cycle"] = int(cyc)
+        except (TypeError, ValueError):
+            metadata["cycle"] = None
+
+    if "ROI ID" in ma_values:
+        roi = ma_values["ROI ID"]
+        try:
+            metadata["roi"] = int(roi)
+        except (TypeError, ValueError):
+            metadata["roi"] = None
+
+    if "MICS cycle type" in ma_values:
+        metadata["imagetype"] = ma_values["MICS cycle type"]
+
+    well = None
+    if ome.plates:
+        plate0 = ome.plates[0]
+        wells = getattr(plate0, "wells", [])
+        if wells:
+            w0 = wells[0]
+            ext_id = getattr(w0, "external_identifier", None)
+            if isinstance(ext_id, str) and ext_id:
+                well = ext_id
+
+    metadata["well"] = well
+
+    # Add _background suffix to marker name of bleach images, to distinguis them from stain image
+    if metadata["imagetype"] == "BleachCycle":
+        metadata["name"] = metadata["name"] + "_background"
 
-    m = pattern.match(filename)
-    if m is None:
-        raise ValueError(f"Error when trying to extract metadata for {path.stem}")
+    return metadata
 
-    metadata = m.groupdict()
 
-    # If cycle was not matched we assume these are autofluorescence images
-    # The true cycle number in this case will be extracted from parse_cycle
-    if metadata["cycle"] is None:
-        if metadata["imagetype"] != "AF":
-            raise ValueError("Expected a cycle for non-autofluorescence channels, but didnt find one")
+def _parse_v1_ome_metadata(ome: OME) -> dict[str, Any]:
+    """Your existing MACSima-style parser (1.x.x layout)."""
+    metadata: dict[str, Any] = {
+        "name": None,
+        "clone": None,
+        "fluorophore": None,
+        "cycle": None,
+        "imagetype": None,
+        "well": None,
+        "roi": None,
+        "exposure": None,
+    }
 
-    # For DAPI stain images, the marker metadata is empty. In these cases replace it with the channel, which also says "DAPI"
-    if metadata["marker"] is None:
-        metadata["marker"] = metadata["channel"]
+    ma_values = _collect_map_annotation_values(ome)
+
+    if "Clone" in ma_values:
+        metadata["clone"] = ma_values["Clone"]
+
+    antigen_name = None
+    if "Biomarker" in ma_values and ma_values["Biomarker"]:
+        antigen_name = ma_values["Biomarker"]
+    elif "Dye" in ma_values and ma_values["Dye"]:
+        antigen_name = ma_values["Dye"]
+
+    metadata["name"] = antigen_name
+
+    if "Fluorochrome" in ma_values and ma_values["Fluorochrome"]:
+        metadata["fluorophore"] = ma_values["Fluorochrome"]
+    elif "Dye" in ma_values and ma_values["Dye"]:
+        metadata["fluorophore"] = ma_values["Dye"]
+
+    if "ExposureTime" in ma_values:
+        exp_time = ma_values["ExposureTime"]
+        try:
+            metadata["exposure"] = float(exp_time)
+        except (TypeError, ValueError):
+            metadata["exposure"] = None
+
+    if "Cycle" in ma_values:
+        cyc = ma_values["Cycle"]
+        try:
+            metadata["cycle"] = int(cyc)
+        except (TypeError, ValueError):
+            metadata["cycle"] = None
+
+    if "RoiId" in ma_values:
+        roi = ma_values["RoiId"]
+        try:
+            metadata["roi"] = int(roi)
+        except (TypeError, ValueError):
+            metadata["roi"] = None
+
+    if "ScanType" in ma_values:
+        metadata["imagetype"] = ma_values["ScanType"]
+
+    well = None
+    if ome.plates:
+        plate0 = ome.plates[0]
+        wells = getattr(plate0, "wells", [])
+        if wells:
+            w0 = wells[0]
+            ext_id = getattr(w0, "external_identifier", None)
+            if isinstance(ext_id, str) and ext_id:
+                well = ext_id
+
+    metadata["well"] = well
+
+    # Add _background suffix to marker name of bleach images, to distinguis them from stain image
+    if metadata["imagetype"] == "B":
+        metadata["name"] = metadata["name"] + "_background"
 
-    # Rename imagetype to something more easy to understand
-    if metadata["imagetype"]:
-        imagetype_rename_dict = {"S": "stain", "B": "bleach", "AF": "autofluorescence"}
-        metadata["imagetype"] = imagetype_rename_dict[metadata["imagetype"]]
+    return metadata
 
-    if convert_numerics:
 
-        def to_int(field_name: str, s: str) -> int:
-            m = re.search(r"\d+", s)
-            if not m:
-                raise ValueError(f"Expected numeric part in {field_name!r}: '{s}'")
-            return int(m.group(0))
+def _parse_ome_metadata(ome: OME) -> dict[str, Any]:
+    """Extract the software version from OME metadata and parse with appropriate parser."""
+    ma_values = _collect_map_annotation_values(ome)
+    version_str = _get_software_version(ma_values)
+    major = _get_software_major_version(version_str)
 
-        integers_to_convert = ["cycle", "stainnumber", "region", "roi", "field"]
-        for int_field in integers_to_convert:
-            if metadata[int_field] is not None:
-                metadata[int_field] = to_int(int_field, metadata[int_field])
+    if major == 0:
+        return _parse_v0_ome_metadata(ome)
+    elif major == 1:
+        return _parse_v1_ome_metadata(ome)
+    else:
+        raise ValueError("Unknown software version, cannot determine parser")
 
-        def to_float(field_name: str, s: str) -> float:
-            m = re.search(r"\d+", s)
-            if not m:
-                raise ValueError(f"Expected numeric part in {field_name!r}: '{s}'")
-            return float(m.group(0))
 
-        if metadata["exposure"] is not None:
-            metadata["exposure"] = to_float("exposure", metadata["exposure"])
+def parse_metadata(path: Path) -> dict[str, Any]:
+    """Parse metadata for a file.
 
-    return metadata
+    All metadata is extracted from the OME metadata.
+    """
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        ome = from_tiff(path)
+
+    return _parse_ome_metadata(ome)
 
 
 def parse_processed_folder(
@@ -580,7 +653,9 @@ def create_sdata(
             [i for i in range(len(mci.metadata)) if i not in nuclei_idx_without_first_and_last],
         )
 
-    pixels_to_microns = parse_physical_size(path_files[0])
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        pixels_to_microns = parse_physical_size(path_files[0])
 
     image_element = create_image_element(
         mci,
@@ -624,13 +699,10 @@ def create_sdata(
 def create_table(mci: MultiChannelImage) -> ad.AnnData:
     cycles = mci.get_cycles()
     names = mci.get_channel_names()
-    stainnumbers = mci.get_stainnumbers()
     imagetypes = mci.get_image_types()
-    regions = mci.get_regions()
     wells = mci.get_wells()
     rois = mci.get_rois()
     fluorophores = mci.get_fluorophores()
-    fields = mci.get_fields()
     clones = mci.get_clones()
     exposures = mci.get_exposures()
 
@@ -638,13 +710,10 @@ def create_table(mci: MultiChannelImage) -> ad.AnnData:
         {
             "name": names,
             "cycle": cycles,
-            "stainnumber": stainnumbers,
             "imagetype": imagetypes,
-            "region": regions,
             "well": wells,
             "ROI": rois,
             "fluorophore": fluorophores,
-            "field": fields,
             "clone": clones,
             "exposure": exposures,
         }
@@ -652,6 +721,7 @@ def create_table(mci: MultiChannelImage) -> ad.AnnData:
 
     # Replace missing data. This happens mostly in the clone column.
     df = df.replace({None: pd.NA, "": pd.NA})
+    df.index = df.index.astype(str)
 
     table = ad.AnnData(var=df)
     table.var_names = names
diff --git a/tests/test_macsima.py b/tests/test_macsima.py
index 1f1dc898..1f557d23 100644
--- a/tests/test_macsima.py
+++ b/tests/test_macsima.py
@@ -7,6 +7,15 @@
 import dask.array as da
 import pytest
 from click.testing import CliRunner
+from ome_types import OME
+from ome_types.model import (
+    MapAnnotation,
+    Plate,
+    Reagent,
+    Screen,
+    StructuredAnnotations,
+    Well,
+)
 from spatialdata import read_zarr
 from spatialdata.models import get_channel_names
 
@@ -14,9 +23,13 @@
 from spatialdata_io.readers.macsima import (
     ChannelMetadata,
     MultiChannelImage,
-    _parse_name_to_cycle,
+    _collect_map_annotation_values,
+    _get_software_major_version,
+    _get_software_version,
+    _parse_ome_metadata,
+    _parse_v0_ome_metadata,
+    _parse_v1_ome_metadata,
     macsima,
-    parse_cycle,
 )
 from tests._utils import skip_if_below_python_version
 
@@ -33,6 +46,28 @@
     )
 
 
+# Helper to create ChannelMetadata with some defaults
+def make_ChannelMetadata(
+    name: str,
+    cycle: int,
+    fluorophore: str | None = None,
+    exposure: float | None = None,
+    imagetype: str | None = None,
+    well: str | None = None,
+    roi: int | None = None,
+) -> ChannelMetadata:
+    """Helper to construct ChannelMetadata with required defaults."""
+    return ChannelMetadata(
+        name=name,
+        cycle=cycle,
+        fluorophore=fluorophore or "",
+        exposure=exposure if exposure is not None else 0.0,
+        imagetype=imagetype or "StainCycle",
+        well=well or "A01",
+        roi=roi if roi is not None else 0,
+    )
+
+
 @skip_if_below_python_version()
 @pytest.mark.parametrize(
     "dataset,expected",
@@ -154,38 +189,15 @@ def test_parsing_style() -> None:
         macsima(Path(), parsing_style="not_a_parsing_style")
 
 
-@pytest.mark.parametrize(
-    "dataset,image,expected",
-    [("OMAP23", "OMAP23_CD3E.ome.tif", 1), ("OMAP23", "OMAP23_CD4.ome.tif", 4)],
-)
-def test_parsing_of_tiff_metadata_to_cycle(dataset: str, image: str, expected: int) -> None:
-    f = Path("./data") / dataset / image
-    assert f.is_file()
-
-    cycles = parse_cycle(f)
-
-    assert cycles == expected
-
-
-@pytest.mark.parametrize(
-    "name,expected",
-    [
-        ("C-002_S-000_S_FITC_R-01_W-C-1_ROI-01_A-CD147_C-REA282.tif", 2),
-        ("001_S_R-01_W-B-1_ROI-01_A-CD14REA599ROI1_C-REA599.ome.tif", 1),
-    ],
-)
-def test_parsing_of_name_to_cycle(name: str, expected: int) -> None:
-    result = _parse_name_to_cycle(name)
-    assert result == expected
-
-
 def test_mci_sort_by_channel() -> None:
     sizes = [100, 200, 300]
     c_names = ["test11", "test3", "test2"]
     cycles = [2, 0, 1]
     mci = MultiChannelImage(
         data=[RNG.random((size, size), chunks=(10, 10)) for size in sizes],
-        metadata=[ChannelMetadata(name=c_name, cycle=cycle) for c_name, cycle in zip(c_names, cycles, strict=False)],
+        metadata=[
+            make_ChannelMetadata(name=c_name, cycle=cycle) for c_name, cycle in zip(c_names, cycles, strict=False)
+        ],
     )
     assert mci.get_channel_names() == c_names
     assert [x.shape[0] for x in mci.data] == sizes
@@ -199,7 +211,7 @@ def test_mci_array_reference() -> None:
     arr2 = RNG.random((200, 200), chunks=(10, 10))
     mci = MultiChannelImage(
         data=[arr1, arr2],
-        metadata=[ChannelMetadata(name="test1", cycle=0), ChannelMetadata(name="test2", cycle=1)],
+        metadata=[make_ChannelMetadata(name="test1", cycle=0), make_ChannelMetadata(name="test2", cycle=1)],
     )
     orig_arr1 = arr1.copy()
 
@@ -246,3 +258,244 @@ def test_cli_macsima(runner: CliRunner, dataset: str) -> None:
         )
         assert result.exit_code == 0, result.output
         _ = read_zarr(output_zarr)
+
+
+def test_collect_map_annotation_values_merges_all_keys_first_wins() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(value={"a": "1", "b": "2"}),
+                MapAnnotation(value={"b": "99", "c": "3"}),
+            ]
+        )
+    )
+
+    result = _collect_map_annotation_values(ome)
+
+    assert result == {"a": "1", "b": "2", "c": "3"}
+    # ensure later duplicate "b" did not overwrite
+    assert result["b"] == "2"
+
+
+def test_collect_map_annotation_values_handles_missing_sa_and_empty_list() -> None:
+    # No structured_annotations at all
+    ome1 = OME()
+    assert _collect_map_annotation_values(ome1) == {}
+
+    # structured_annotations present but with empty map_annotation list
+    ome2 = OME(structured_annotations=StructuredAnnotations(map_annotation=[]))
+    assert _collect_map_annotation_values(ome2) == {}
+
+
+@pytest.mark.parametrize(
+    "ma_values, expected",
+    [
+        ({"SoftwareVersion": " 1.2.3 "}, "1.2.3"),
+        ({"Software version": " v0.9.0"}, "v0.9.0"),
+    ],
+)
+def test_get_software_version_success(ma_values: dict[str, str], expected: str) -> None:
+    assert _get_software_version(ma_values) == expected
+
+
+@pytest.mark.parametrize(
+    "ma_values",
+    [
+        ({}),
+        ({"SoftwareVersion": ""}),
+        ({"SoftwareVersion": "    "}),
+        ({"Software version": ""}),
+        ({"Software version": None}),
+    ],
+)
+def test_get_software_version_failure(ma_values: dict[str, str | None]) -> None:
+    with pytest.raises(ValueError, match="Could not extract Software Version"):
+        _get_software_version(ma_values)
+
+
+@pytest.mark.parametrize(
+    "version, expected",
+    [
+        ("1.2.3", 1),
+        ("  2.0.0 ", 2),
+        ("v3.4.5", 3),
+        ("V4.0.1", 4),
+        ("10", 10),
+    ],
+)
+def test_get_software_major_version_success(version: str, expected: int) -> None:
+    assert _get_software_major_version(version) == expected
+
+
+def test_get_software_major_version_empty_parts_raises() -> None:
+    with pytest.raises(ValueError):
+        _get_software_major_version("")
+
+
+def test_parse_v0_ome_metadata_basic_extraction_and_conversions() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(
+                    value={
+                        "Fluorochrome": "AF488",
+                        "Exposure time": "123.4",
+                        "Cycle": "5",
+                        "ROI ID": "7",
+                        "MICS cycle type": "StainCycle",
+                    }
+                )
+            ]
+        ),
+        screens=[Screen(reagents=[Reagent(name="CD3__OKT3")])],
+        plates=[Plate(wells=[Well(column=0, row=0, external_identifier="A01")])],
+    )
+
+    md = _parse_v0_ome_metadata(ome)
+
+    assert md["name"] == "CD3"
+    assert md["clone"] == "OKT3"
+    assert md["fluorophore"] == "AF488"
+    assert md["exposure"] == pytest.approx(123.4)
+    assert md["cycle"] == 5
+    assert md["roi"] == 7
+    assert md["imagetype"] == "StainCycle"
+    assert md["well"] == "A01"
+
+
+def test_parse_v0_ome_metadata_handles_missing_or_invalid_numeric_fields() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(
+                    value={
+                        "Exposure time": "not-a-number",
+                        "Cycle": "NaN",
+                        "ROI ID": "x",
+                    }
+                )
+            ]
+        ),
+        screens=[Screen(reagents=[Reagent(name="MarkerOnly")])],
+        plates=[Plate(wells=[Well(column=0, row=0, external_identifier=None)])],
+    )
+
+    md = _parse_v0_ome_metadata(ome)
+
+    # name from reagent without "__"
+    assert md["name"] == "MarkerOnly"
+    assert md["clone"] is None
+    assert md["exposure"] is None
+    assert md["cycle"] is None
+    assert md["roi"] is None
+    # well remains None
+    assert md["well"] is None
+
+
+def test_parse_v0_ome_metadata_bleach_cycle_appends_background() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(
+                    value={
+                        "MICS cycle type": "BleachCycle",
+                    }
+                )
+            ]
+        ),
+        screens=[Screen(reagents=[Reagent(name="CD4__RPA-T4")])],
+    )
+
+    md = _parse_v0_ome_metadata(ome)
+
+    assert md["imagetype"] == "BleachCycle"
+    assert md["name"] == "CD4_background"
+
+
+def test_parse_v1_ome_metadata_primary_keys_biomarker_and_clone() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(
+                    value={
+                        "Clone": "OKT3",
+                        "Biomarker": "CD3",
+                        "Fluorochrome": "AF488",
+                        "ExposureTime": "45.6",
+                        "Cycle": "3",
+                        "RoiId": "10",
+                        "ScanType": "S",
+                    }
+                )
+            ]
+        ),
+        plates=[Plate(wells=[Well(column=0, row=0, external_identifier="B02")])],
+    )
+
+    md = _parse_v1_ome_metadata(ome)
+
+    assert md["name"] == "CD3"
+    assert md["clone"] == "OKT3"
+    assert md["fluorophore"] == "AF488"
+    assert md["exposure"] == pytest.approx(45.6)
+    assert md["cycle"] == 3
+    assert md["roi"] == 10
+    assert md["imagetype"] == "S"
+    assert md["well"] == "B02"
+
+
+def test_parse_v1_ome_metadata_invalid_numerics_become_none() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(
+                    value={
+                        "ExposureTime": "x",
+                        "Cycle": "NaN",
+                        "RoiId": "ABC",
+                    }
+                )
+            ]
+        ),
+    )
+
+    md = _parse_v1_ome_metadata(ome)
+
+    assert md["exposure"] is None
+    assert md["cycle"] is None
+    assert md["roi"] is None
+
+
+def make_ome_with_version(version_value: str, extra_ma: dict[str, Any] | None = None) -> OME:
+    base = {"SoftwareVersion": version_value}
+    if extra_ma:
+        base.update(extra_ma)
+    return OME(structured_annotations=StructuredAnnotations(map_annotations=[MapAnnotation(value=base)]))
+
+
+def test_parse_ome_metadata_dispatches_to_v0() -> None:
+    ome = make_ome_with_version("0.9.0")
+    # enrich some so v0 parser has something to see
+    ome.screens = [Screen(reagents=[Reagent(name="Marker0")])]
+
+    md = _parse_ome_metadata(ome)
+
+    # Assert that the v0 parser was used by checking a field
+    # that can only come from v0 parsing logic (e.g. name from reagent)
+    assert "name" in md
+    assert md["name"] == "Marker0"
+
+
+def test_parse_ome_metadata_dispatches_to_v1() -> None:
+    ome = make_ome_with_version("1.0.0", extra_ma={"Biomarker": "CD3"})
+
+    md = _parse_ome_metadata(ome)
+
+    assert md["name"] == "CD3"
+
+
+def test_parse_ome_metadata_unknown_major_raises() -> None:
+    ome = make_ome_with_version("2.0.0")
+
+    with pytest.raises(ValueError, match="Unknown software version"):
+        _parse_ome_metadata(ome)

From cef71707661c88f77fd2875305f333d7c46578e5 Mon Sep 17 00:00:00 2001
From: Martin Helm <martin@bio-ai.org>
Date: Tue, 16 Dec 2025 14:53:08 +0000
Subject: [PATCH 4/5] Add public OMAP data sets for testing and adjust tests

---
 .github/workflows/prepare_test_data.yaml | 16 ++++++---
 tests/test_macsima.py                    | 42 +++++++++++-------------
 2 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/prepare_test_data.yaml b/.github/workflows/prepare_test_data.yaml
index e694d2de..dfd182e1 100644
--- a/.github/workflows/prepare_test_data.yaml
+++ b/.github/workflows/prepare_test_data.yaml
@@ -4,10 +4,10 @@ on:
     schedule:
         - cron: "0 0 1 * *" # run once a month to prevent artifact expiration
     workflow_dispatch:
-# uncomment and adjust the branch name if you need to add new datasets to the artifact
-#    push:
-#        branches:
-#            - main
+    # uncomment and adjust the branch name if you need to add new datasets to the artifact
+    push:
+        branches:
+            - fix/macsima-cycle-parsing
 
 jobs:
     prepare-data:
@@ -57,6 +57,14 @@ jobs:
                   # Spatial Genomics seqFISH v2
                   curl -O https://s3.embl.de/spatialdata/raw_data/seqfish-2-test-dataset.zip
 
+                  # -------
+                  # MACSima OMAP datasets are licensed as CC BY 4.0
+                  # OMAP23 for current format
+                  curl https://zenodo.org/api/records/14008816/files-archive -o OMAP23.zip
+
+                  # OMAP10 for legacy format
+                  curl https://zenodo.org/api/records/7875938/files-archive -o OMAP10.zip
+
             - name: Unzip files
               run: |
                   cd ./data
diff --git a/tests/test_macsima.py b/tests/test_macsima.py
index 1f557d23..c0af099e 100644
--- a/tests/test_macsima.py
+++ b/tests/test_macsima.py
@@ -35,12 +35,10 @@
 
 RNG = da.random.default_rng(seed=0)
 
-if not (
-    Path("./data/Lung_adc_demo").exists() or Path("./data/MACSimaData_HCA").exists() or Path("./data/OMAP23").exists()
-):
+if not (Path("./data/OMAP10").exists() or Path("./data/OMAP23").exists()):
     pytest.skip(
-        "Requires the Lung_adc_demo or MACSimaData_HCA or OMAP23 datasets, please check "
-        "https://github.com/giovp/spatialdata-sandbox/blob/main/macsima_io/Readme.md for instructions on how to get the data."
+        "Requires the OMAP10 or OMAP23 datasets. "
+        "The OMAP10 dataset can be downloaded from https://zenodo.org/records/7875938 "
         "The OMAP23 dataset can be downloaded from https://zenodo.org/records/14008816",
         allow_module_level=True,
     )
@@ -72,8 +70,8 @@ def make_ChannelMetadata(
 @pytest.mark.parametrize(
     "dataset,expected",
     [
-        ("Lung_adc_demo", {"y": (0, 15460), "x": (0, 13864)}),
-        ("MACSimaData_HCA/HumanLiverH35", {"y": (0, 1154), "x": (0, 1396)}),
+        ("OMAP10", {"y": (0, 7797), "x": (0, 9407)}),
+        ("OMAP23", {"y": (0, 7703), "x": (0, 9329)}),
     ],
 )
 def test_image_size(dataset: str, expected: dict[str, Any]) -> None:
@@ -81,7 +79,7 @@ def test_image_size(dataset: str, expected: dict[str, Any]) -> None:
 
     f = Path("./data") / dataset
     assert f.is_dir()
-    sdata = macsima(f)
+    sdata = macsima(f, transformations=False)  # Do not transform to make it easier to compare against pixel dimensions
     el = sdata[list(sdata.images.keys())[0]]
     cs = sdata.coordinate_systems[0]
 
@@ -93,7 +91,7 @@ def test_image_size(dataset: str, expected: dict[str, Any]) -> None:
 @skip_if_below_python_version()
 @pytest.mark.parametrize(
     "dataset,expected",
-    [("Lung_adc_demo", 116), ("MACSimaData_HCA/HumanLiverH35", 102)],
+    [("OMAP10", 31), ("OMAP23", 29)],
 )
 def test_total_channels(dataset: str, expected: int) -> None:
     f = Path("./data") / dataset
@@ -106,12 +104,12 @@ def test_total_channels(dataset: str, expected: int) -> None:
     assert channels == expected
 
 
-@skip_if_below_python_version()
+# @skip_if_below_python_version()
 @pytest.mark.parametrize(
     "dataset,expected",
     [
-        ("Lung_adc_demo", ["R0 DAPI", "R1 CD68", "R1 CD163"]),
-        ("MACSimaData_HCA/HumanLiverH35", ["R0 DAPI", "R1 PE", "R1 DAPI"]),
+        ("OMAP10", ["R1 DAPI", "R1 CD11c", "R1 Actin"]),
+        ("OMAP23", ["R1 CD3", "R1 DAPI", "R1 CD274"]),
     ],
 )
 def test_channel_names(dataset: str, expected: list[str]) -> None:
@@ -129,8 +127,8 @@ def test_channel_names(dataset: str, expected: list[str]) -> None:
 @pytest.mark.parametrize(
     "dataset,expected",
     [
-        ("Lung_adc_demo", 68),
-        ("MACSimaData_HCA/HumanLiverH35", 51),
+        ("OMAP10", 22),
+        ("OMAP23", 15),
     ],
 )
 def test_total_rounds(dataset: str, expected: list[int]) -> None:
@@ -146,11 +144,11 @@ def test_total_rounds(dataset: str, expected: list[int]) -> None:
 @pytest.mark.parametrize(
     "dataset,skip_rounds,expected",
     [
-        ("Lung_adc_demo", list(range(2, 68)), ["DAPI (1)", "CD68", "CD163", "DAPI (2)", "Control"]),
+        ("OMAP10", list(range(2, 23)), ["DAPI", "CD11c", "Actin", "CD15"]),
         (
-            "MACSimaData_HCA/HumanLiverH35",
-            list(range(2, 51)),
-            ["DAPI (1)", "PE", "CD14", "Vimentin", "DAPI (2)", "WT1"],
+            "OMAP23",
+            list(range(3, 16)),
+            ["CD3", "DAPI", "CD274", "CD279"],
         ),
     ],
 )
@@ -169,8 +167,8 @@ def test_skip_rounds(dataset: str, skip_rounds: list[int], expected: list[str])
 @pytest.mark.parametrize(
     "dataset,expected",
     [
-        ("Lung_adc_demo", [0, 1, 1]),
-        ("MACSimaData_HCA/HumanLiverH35", [0, 1, 1]),
+        ("OMAP10", [1, 1, 1]),
+        ("OMAP23", [1, 1, 1]),
     ],
 )
 def test_cycle_metadata(dataset: str, expected: list[str]) -> None:
@@ -235,7 +233,7 @@ def test_mci_array_reference() -> None:
 
 
 @skip_if_below_python_version()
-@pytest.mark.parametrize("dataset", ["Lung_adc_demo", "MACSimaData_HCA/HumanLiverH35"])
+@pytest.mark.parametrize("dataset", ["OMAP10", "OMAP23"])
 def test_cli_macsima(runner: CliRunner, dataset: str) -> None:
     f = Path("./data") / dataset
     assert f.is_dir()
@@ -283,7 +281,7 @@ def test_collect_map_annotation_values_handles_missing_sa_and_empty_list() -> No
     assert _collect_map_annotation_values(ome1) == {}
 
     # structured_annotations present but with empty map_annotation list
-    ome2 = OME(structured_annotations=StructuredAnnotations(map_annotation=[]))
+    ome2 = OME(structured_annotations=StructuredAnnotations(map_annotations=[]))
     assert _collect_map_annotation_values(ome2) == {}
 
 

From 904abfcbf8f912c3e6ff4618fc3f56367ddec458 Mon Sep 17 00:00:00 2001
From: Martin Helm <martin@bio-ai.org>
Date: Mon, 5 Jan 2026 10:07:40 +0000
Subject: [PATCH 5/5] Emitt warning when duplicate key with different value is
 found in OME metadata

---
 src/spatialdata_io/readers/macsima.py | 21 +++++++++++--
 tests/test_macsima.py                 | 43 ++++++++++++++++++++++++---
 2 files changed, 57 insertions(+), 7 deletions(-)

diff --git a/src/spatialdata_io/readers/macsima.py b/src/spatialdata_io/readers/macsima.py
index 7c04bb2f..fce16a9c 100644
--- a/src/spatialdata_io/readers/macsima.py
+++ b/src/spatialdata_io/readers/macsima.py
@@ -354,6 +354,14 @@ def _collect_map_annotation_values(ome: OME) -> dict[str, Any]:
         for k, v in value.items():
             if k not in merged:
                 merged[k] = v
+            else:
+                # For instances where we have different values for the same repeated key, raise a warning.
+                if v != merged[k]:
+                    warnings.warn(
+                        f"Found different value for {k}: {v}. The parser will only use the first found value, which is {merged[k]}!",
+                        UserWarning,
+                        stacklevel=2,
+                    )
 
     return merged
 
@@ -382,11 +390,16 @@ def _get_software_major_version(version: str) -> int:
     if not parts:
         raise ValueError("Could not extract major software version part from version string.")
 
-    return int(parts[0])
+    major = int(parts[0])
+    logger.debug(f"Found major software version {major}")
+
+    return major
 
 
 def _parse_v0_ome_metadata(ome: OME) -> dict[str, Any]:
-    """Parse ome."""
+    """Parse Legacy Format of OME Metadata (software version 0.x.x)."""
+    logger.debug("Parsing OME metadata expecting version 0 format")
+
     metadata: dict[str, Any] = {
         "name": None,
         "clone": None,
@@ -466,7 +479,9 @@ def _parse_v0_ome_metadata(ome: OME) -> dict[str, Any]:
 
 
 def _parse_v1_ome_metadata(ome: OME) -> dict[str, Any]:
-    """Your existing MACSima-style parser (1.x.x layout)."""
+    """Parse v1 format of OME metadata (software version 1.x.x)."""
+    logger.debug("Parsing OME metadata expecting version 1 format")
+
     metadata: dict[str, Any] = {
         "name": None,
         "clone": None,
diff --git a/tests/test_macsima.py b/tests/test_macsima.py
index c0af099e..d05710b5 100644
--- a/tests/test_macsima.py
+++ b/tests/test_macsima.py
@@ -258,21 +258,56 @@ def test_cli_macsima(runner: CliRunner, dataset: str) -> None:
         _ = read_zarr(output_zarr)
 
 
-def test_collect_map_annotation_values_merges_all_keys_first_wins() -> None:
+def test_collect_map_annotation_values_with_no_duplicate_keys() -> None:
     ome = OME(
         structured_annotations=StructuredAnnotations(
             map_annotations=[
                 MapAnnotation(value={"a": "1", "b": "2"}),
-                MapAnnotation(value={"b": "99", "c": "3"}),
+                MapAnnotation(value={"c": "3"}),
+            ]
+        )
+    )
+
+    result = _collect_map_annotation_values(ome)
+
+    assert result == {"a": "1", "b": "2", "c": "3"}
+
+
+def test_collect_map_annotations_values_with_duplicate_keys_identical_values() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(value={"a": "1", "b": "2"}),
+                MapAnnotation(value={"b": "2", "c": "3"}),
             ]
         )
     )
 
     result = _collect_map_annotation_values(ome)
+    # Key should only be returned once
+    assert result == {"a": "1", "b": "2", "c": "3"}
+
+
+def test_collect_map_annotations_values_with_duplicate_keys_different_values() -> None:
+    ome = OME(
+        structured_annotations=StructuredAnnotations(
+            map_annotations=[
+                MapAnnotation(value={"a": "1", "b": "2"}),
+                MapAnnotation(value={"b": "99", "c": "3"}),
+            ]
+        )
+    )
+    import re
+
+    # The parser should throw a warning when duplicate keys with different values are found
+    with pytest.warns(
+        UserWarning,
+        match=re.escape("Found different value for b: 99. The parser will only use the first found value, which is 2!"),
+    ):
+        result = _collect_map_annotation_values(ome)
 
+    # The parser should return only the first found value.
     assert result == {"a": "1", "b": "2", "c": "3"}
-    # ensure later duplicate "b" did not overwrite
-    assert result["b"] == "2"
 
 
 def test_collect_map_annotation_values_handles_missing_sa_and_empty_list() -> None: