Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/reference/parsers/empaia.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# ratiopath.parsers.EMPAIAParser

::: ratiopath.parsers.EMPAIAParser

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
[project]
name = "ratiopath"
version = "1.3.0"
version = "1.3.1"
description = "A library for efficient processing and analysis of whole-slide pathology images."
authors = [
{ name = "Matěj Pekár", email = "matejpekar@mail.muni.cz" },
{ name = "Jakub Pekár", email = "jakubpekar@mail.muni.cz" },
{ name = "Adam Kukučka", email = "adamkukucka@mail.muni.cz" },
{ name = "Vít Musil", email = "musil@fi.muni.cz" },
]
readme = "README.md"
license = "MIT"
Expand Down
3 changes: 2 additions & 1 deletion ratiopath/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from ratiopath.parsers.asap_parser import ASAPParser
from ratiopath.parsers.darwin7_json_parser import Darwin7JSONParser
from ratiopath.parsers.empaia_parser import EMPAIAParser
from ratiopath.parsers.geojson_parser import GeoJSONParser


__all__ = ["ASAPParser", "Darwin7JSONParser", "GeoJSONParser"]
__all__ = ["ASAPParser", "Darwin7JSONParser", "EMPAIAParser", "GeoJSONParser"]
4 changes: 2 additions & 2 deletions ratiopath/parsers/asap_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from typing import TextIO

from shapely.geometry import Point, Polygon
from shapely import Point, Polygon


class ASAPParser:
Expand All @@ -16,7 +16,7 @@ class ASAPParser:
This parser supports both polygon and point annotations.
"""

def __init__(self, file_path: Path | str | TextIO):
def __init__(self, file_path: Path | str | TextIO) -> None:
self.tree = ET.parse(file_path)
self.root = self.tree.getroot()

Expand Down
78 changes: 78 additions & 0 deletions ratiopath/parsers/empaia_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import json
import re
from collections.abc import Iterable
from pathlib import Path
from typing import TextIO

from shapely import Point, Polygon


class EMPAIAParser:
"""Parser for EMPAIA format annotation files.

EMPAIA uses JSON format for storing annotations. This parser supports
both polygon and point geometry features from the EMPAIA standardized schema.
"""

def __init__(self, file_path: Path | str | TextIO) -> None:
"""Initialize the EMPAIA parser.

Args:
file_path: Path to the EMPAIA JSON annotation file or a file-like object.
"""
if isinstance(file_path, Path | str):
with open(file_path) as f:
self.annotations = json.load(f)
else:
self.annotations = json.load(file_path)

def _get_filtered_annotations(
self, name: str, annotation_type: str
) -> Iterable[dict]:
"""Get annotations that match the provided regex filters.

Args:
name: Regex pattern to match annotation names.
annotation_type: Type of annotation to match (e.g., 'polygon', 'point').

Yields:
Dictionary annotation elements that match the filters.
"""
name_regex = re.compile(name)
for annotation in self.annotations["items"]:
if (
name_regex.match(annotation["name"])
and annotation["type"] == annotation_type
):
yield annotation

def get_polygons(self, name: str = ".*") -> Iterable[Polygon]:
"""Get polygon annotations that match the given name pattern.

Args:
name: Regex pattern to match annotation names. Default is ".*" (all).

Yields:
Polygon representations of the matching annotations.
"""
for annotation in self._get_filtered_annotations(name, "polygon"):
yield Polygon(
[
(float(coordinate[0]), float(coordinate[1]))
for coordinate in annotation["coordinates"]
]
)
Comment thread
Adames4 marked this conversation as resolved.

def get_points(self, name: str = ".*") -> Iterable[Point]:
"""Get point annotations that match the given name pattern.

Args:
name: Regex pattern to match annotation names. Default is ".*" (all).

Yields:
Point representations of the matching annotations.
"""
for annotation in self._get_filtered_annotations(name, "point"):
yield Point(
float(annotation["coordinates"][0]), float(annotation["coordinates"][1])
)
79 changes: 78 additions & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pandas as pd
import pytest

from ratiopath.parsers import ASAPParser, Darwin7JSONParser, GeoJSONParser
from ratiopath.parsers import ASAPParser, Darwin7JSONParser, EMPAIAParser, GeoJSONParser


class TestASAPParser:
Expand Down Expand Up @@ -307,6 +307,83 @@ def test_extract_nested(self):
)


class TestEMPAIAParser:
"""Test the EMPAIA parser."""

@pytest.fixture
def empaia_json_content(self):
"""Sample EMPAIA JSON content."""
return {
"items_count": 2,
"items": [
{
"name": "Annotation 1",
"type": "polygon",
"coordinates": [[100.0, 200.0], [150.0, 200.0], [125.0, 250.0]],
},
{
"name": "Annotation 2",
"type": "point",
"coordinates": [300.0, 400.0],
},
],
}

def test_get_polygons(self, empaia_json_content):
"""Test parsing polygons from EMPAIA JSON."""
f = io.StringIO(json.dumps(empaia_json_content))

parser = EMPAIAParser(f)
polygons = list(parser.get_polygons())

assert len(polygons) == 1
# Check that we have a polygon-like object
polygon = polygons[0]
assert hasattr(polygon, "exterior")
assert list(polygon.exterior.coords) == [
(100.0, 200.0),
(150.0, 200.0),
(125.0, 250.0),
(100.0, 200.0),
]

def test_get_points(self, empaia_json_content):
"""Test parsing points from EMPAIA JSON."""
f = io.StringIO(json.dumps(empaia_json_content))

parser = EMPAIAParser(f)
points = list(parser.get_points())

assert len(points) == 1
# Check that we have a point-like object
point = points[0]
assert hasattr(point, "x") and hasattr(point, "y")
assert point.x == 300.0
assert point.y == 400.0

def test_get_polygons_with_filters(self, empaia_json_content):
"""Test parsing polygons with filters."""
f = io.StringIO(json.dumps(empaia_json_content))
parser = EMPAIAParser(f)

polygons = list(parser.get_polygons(name="Annotation 1"))
assert len(polygons) == 1

polygons = list(parser.get_polygons(name="Nonexistent"))
assert len(polygons) == 0

def test_get_points_with_filters(self, empaia_json_content):
"""Test parsing points with filters."""
f = io.StringIO(json.dumps(empaia_json_content))
parser = EMPAIAParser(f)

points = list(parser.get_points(name="Annotation 2"))
assert len(points) == 1

points = list(parser.get_points(name="Nonexistent"))
assert len(points) == 0


def test_safe_to_dict():
"""Test the safe_to_dict utility function."""
from ratiopath.parsers.geojson_parser import safe_to_dict
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading