Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions sectra_dicom_to_imagescope.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""Convert DICOM annotations to Aperio ImageScope XML.

This script reads annotation data from a DICOM file and
writes a minimal ImageScope-compatible XML file. Only basic
annotation types (polylines/polygons and circles) are handled.
"""

import argparse
import os
import xml.etree.ElementTree as ET
from typing import Iterable, List, Tuple

import pydicom
from pydicom.dataset import Dataset


def _parse_float(val: object) -> float:
"""Safely parse a value as float."""
try:
return float(val)
except Exception:
return 0.0


def _extract_coordinates(data: object) -> List[Tuple[float, float]]:
"""Extract (x, y) pairs from a DICOM GraphicData item."""
if data is None:
return []
# Convert to list of strings
if isinstance(data, (list, tuple)):
parts = list(data)
else:
text = str(data)
parts = text.split("\\")
coords: List[Tuple[float, float]] = []
for i in range(0, len(parts), 2):
if i + 1 < len(parts):
x = _parse_float(parts[i])
y = _parse_float(parts[i + 1])
coords.append((x, y))
return coords


def _extract_graphics(seq: Dataset) -> Iterable[List[Tuple[float, float]]]:
"""Yield coordinate lists for graphic objects in a sequence."""
if hasattr(seq, "GraphicObjectSequence"):
for obj in seq.GraphicObjectSequence:
coords = _extract_coordinates(getattr(obj, "GraphicData", None))
if coords:
yield coords


def dicom_to_imagescope_xml(ds: Dataset) -> ET.Element:
"""Convert a DICOM dataset to an ImageScope Annotations element."""
root = ET.Element("Annotations")
ann_id = 0
region_id = 0
for seq in getattr(ds, "GraphicAnnotationSequence", []):
polygons = list(_extract_graphics(seq))
if not polygons:
continue
ann_id += 1
ann = ET.SubElement(
root,
"Annotation",
{
"Id": str(ann_id),
"Name": f"Annotation {ann_id}",
"ReadOnly": "0",
},
)
regions = ET.SubElement(ann, "Regions")
for poly in polygons:
region_id += 1
reg = ET.SubElement(
regions,
"Region",
{
"Id": str(region_id),
"Type": "0",
"LineColor": "65280",
},
)
verts = ET.SubElement(reg, "Vertices")
for x, y in poly:
ET.SubElement(verts, "Vertex", {"X": str(x), "Y": str(y)})
return root


def dicom_file_to_imagescope(dicom_path: str, output: str | None = None) -> None:
ds = pydicom.dcmread(dicom_path)
root = dicom_to_imagescope_xml(ds)
xml_bytes = ET.tostring(root, encoding="utf-8")
if output is None:
output = os.path.splitext(dicom_path)[0] + "_aperio.xml"
with open(output, "wb") as f:
f.write(xml_bytes)


def main() -> None:
parser = argparse.ArgumentParser(
description="Convert DICOM annotations to Aperio ImageScope XML"
)
parser.add_argument("-i", "--input", required=True, help="Input DICOM file")
parser.add_argument("-o", "--output", help="Output XML file")
args = parser.parse_args()
dicom_file_to_imagescope(args.input, args.output)


if __name__ == "__main__":
main()
65 changes: 65 additions & 0 deletions sectra_dicom_to_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env python3
"""Convert a DICOM file to a simple XML representation.

The script reads a `.dcm` file using ``pydicom`` and writes the contents to an
XML file. Nested sequences are preserved so the resulting XML mirrors the
structure of the DICOM dataset.

Usage::

python sectra_dicom_to_xml.py -i path/to/file.dcm -o output.xml

If no output file is given the name of the input file is used with the
``.xml`` extension.
"""

import argparse
import os
import xml.etree.ElementTree as ET
from xml.dom import minidom

import pydicom
from pydicom.dataset import Dataset


def _add_dataset_to_element(dataset: Dataset, parent: ET.Element) -> None:
"""Recursively append dataset contents as XML elements."""
for element in dataset:
tag = f"({element.tag.group:04X},{element.tag.element:04X})"
keyword = element.keyword or tag
attrs = {"tag": tag, "vr": element.VR, "name": keyword}
child = ET.SubElement(parent, "Element", attrs)
if element.VR == "SQ":
for item in element.value:
item_elem = ET.SubElement(child, "Item")
_add_dataset_to_element(item, item_elem)
else:
child.text = str(element.value)


def dataset_to_xml(dataset: Dataset) -> ET.Element:
root = ET.Element("DicomDataset")
_add_dataset_to_element(dataset, root)
return root


def dicom_to_xml(dicom_file: str, output_file: str | None = None) -> None:
ds = pydicom.dcmread(dicom_file)
root = dataset_to_xml(ds)
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ")
if output_file is None:
output_file = os.path.splitext(dicom_file)[0] + ".xml"
with open(output_file, "w", encoding="utf-8") as f:
f.write(xml_str)


def main() -> None:
parser = argparse.ArgumentParser(description="Convert DICOM to XML")
parser.add_argument("-i", "--input", required=True, help="Input DICOM file")
parser.add_argument("-o", "--output", help="Output XML file")
args = parser.parse_args()
dicom_to_xml(args.input, args.output)


if __name__ == "__main__":
main()