From ddcf1da05bb17581f590e09a20f7d20e788b98fd Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Sat, 17 Jan 2026 18:38:24 +0800 Subject: [PATCH 1/3] [feat] itk_combine --- itkit/process/itk_combine.py | 214 ++++++++++++++++++++++++++ pyproject.toml | 1 + tests/itk_process/test_itk_combine.py | 187 ++++++++++++++++++++++ 3 files changed, 402 insertions(+) create mode 100644 itkit/process/itk_combine.py create mode 100644 tests/itk_process/test_itk_combine.py diff --git a/itkit/process/itk_combine.py b/itkit/process/itk_combine.py new file mode 100644 index 0000000..e743e5a --- /dev/null +++ b/itkit/process/itk_combine.py @@ -0,0 +1,214 @@ +import argparse +import os +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +import SimpleITK as sitk + +from itkit.process.base_processor import BaseITKProcessor +from itkit.process.metadata_models import SeriesMetadata + + +@dataclass(frozen=True) +class SourceSpec: + name: str + folder: Path + + +@dataclass(frozen=True) +class MappingRule: + source_name: str + source_labels: tuple[int, ...] + target_label: int + + +def _parse_sources(source_args: list[str]) -> list[SourceSpec]: + sources: list[SourceSpec] = [] + seen_names: set[str] = set() + for item in source_args: + if "=" not in item: + raise ValueError(f"Invalid source format: {item}. Expected name=/path/to/labels") + name, folder = item.split("=", 1) + name = name.strip() + if not name: + raise ValueError(f"Invalid source name in: {item}") + if name in seen_names: + raise ValueError(f"Duplicate source name: {name}") + folder_path = Path(folder).expanduser().resolve() + if not folder_path.exists() or not folder_path.is_dir(): + raise ValueError(f"Source folder not found: {folder_path}") + sources.append(SourceSpec(name=name, folder=folder_path)) + seen_names.add(name) + return sources + + +def _parse_mapping_rule(rule: str) -> MappingRule: + if "->" not in rule or ":" not in rule: + raise ValueError(f"Invalid mapping rule: {rule}. Expected :->") + left, target_str = rule.split("->", 1) + source_name, labels_str = left.split(":", 1) + source_name = source_name.strip() + labels_str = labels_str.strip() + target_str = target_str.strip() + if not source_name or not labels_str or not target_str: + raise ValueError(f"Invalid mapping rule: {rule}. Expected :->") + + try: + target_label = int(target_str) + except ValueError as exc: + raise ValueError(f"Invalid target label in rule: {rule}") from exc + + label_parts = [p.strip() for p in labels_str.split(",") if p.strip()] + if not label_parts: + raise ValueError(f"No source labels specified in rule: {rule}") + + source_labels: list[int] = [] + for part in label_parts: + try: + source_labels.append(int(part)) + except ValueError as exc: + raise ValueError(f"Invalid source label '{part}' in rule: {rule}") from exc + + return MappingRule(source_name=source_name, source_labels=tuple(source_labels), target_label=target_label) + + +class CombineProcessor(BaseITKProcessor): + def __init__( + self, + sources: list[SourceSpec], + dest_folder: Path, + mapping_rules: list[MappingRule], + mp: bool = False, + workers: int | None = None, + ): + super().__init__(task_description="Combining labels", mp=mp, workers=workers) + self.sources = sources + self.dest_folder = dest_folder + self.mapping_rules = mapping_rules + self.source_index = {src.name: idx for idx, src in enumerate(self.sources)} + + def get_items_to_process(self) -> list[tuple[str, list[str]]]: + source_files: dict[str, dict[str, str]] = {} + for src in self.sources: + files = {p.name: str(p) for p in src.folder.glob("*.mha")} + source_files[src.name] = files + + common_names = None + for files in source_files.values(): + names = set(files.keys()) + common_names = names if common_names is None else common_names & names + if not common_names: + return [] + + items = [] + for name in sorted(common_names): + paths = [source_files[src.name][name] for src in self.sources] + items.append((name, paths)) + return items + + def process_one(self, args: tuple[str, list[str]]) -> SeriesMetadata | None: + name, paths = args + images = [sitk.ReadImage(p) for p in paths] + base_size = images[0].GetSize() + base_spacing = images[0].GetSpacing() + + for idx, image in enumerate(images[1:], start=1): + if image.GetSize() != base_size: + raise ValueError(f"Size mismatch for {name}: {paths[0]} vs {paths[idx]}") + if not np.allclose(image.GetSpacing(), base_spacing): + raise ValueError(f"Spacing mismatch for {name}: {paths[0]} vs {paths[idx]}") + + arrays = [sitk.GetArrayFromImage(img) for img in images] + output = np.zeros(arrays[0].shape, dtype=np.uint8) + + for rule in self.mapping_rules: + src_idx = self.source_index[rule.source_name] + src_arr = arrays[src_idx] + mask = np.isin(src_arr, rule.source_labels) + mask = mask & (output == 0) + output[mask] = rule.target_label + + out_image = sitk.GetImageFromArray(output) + out_image.CopyInformation(images[0]) + + output_path = self.dest_folder / name + output_path.parent.mkdir(parents=True, exist_ok=True) + sitk.WriteImage(out_image, str(output_path), useCompression=True) + + return SeriesMetadata.from_sitk_image(out_image, name) + + +def parse_args(): + parser = argparse.ArgumentParser( + prog="itk_combine", + description=( + "Combine multiple label folders by intersecting filenames and merging labels " + "according to ordered mapping rules." + ), + ) + parser.add_argument( + "-i", "--source", + action="append", + required=True, + help="Label source in form name=/path/to/label_folder (repeatable)", + ) + parser.add_argument( + "--map", + dest="mapping_rules", + action="append", + required=True, + help="Mapping rule in form `:->`, e.g., `A:1,2->3` (repeatable)", + ) + parser.add_argument( + "-o", "dest_folder", + type=Path, + help="Destination folder for combined labels", + ) + parser.add_argument("--mp", action="store_true", help="Enable multiprocessing") + parser.add_argument("--workers", type=int, default=None, help="Number of worker processes") + return parser.parse_args() + + +def main(): + args = parse_args() + + sources = _parse_sources(args.source) + rules = [_parse_mapping_rule(rule) for rule in args.mapping_rules] + + source_names = {s.name for s in sources} + for rule in rules: + if rule.source_name not in source_names: + raise ValueError(f"Mapping rule references unknown source: {rule.source_name}") + + if not rules: + raise ValueError("At least one mapping rule is required.") + + dest_folder = args.dest_folder.expanduser().resolve() + os.makedirs(dest_folder, exist_ok=True) + + print("Combining label sources:") + for src in sources: + print(f" - {src.name}: {src.folder}") + print("Mapping rules (ordered, earlier has higher priority):") + for rule in rules: + print(f" - {rule.source_name}: {list(rule.source_labels)} -> {rule.target_label}") + print(f"Output: {dest_folder}") + print(f"Multiprocessing: {args.mp} | Workers: {args.workers}") + + processor = CombineProcessor( + sources=sources, + dest_folder=dest_folder, + mapping_rules=rules, + mp=args.mp, + workers=args.workers, + ) + + processor.process("Combining labels") + processor.save_meta(dest_folder / "meta.json") + + print("Combine completed.") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 33719bf..f315a27 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,6 +103,7 @@ itk_aug = "itkit.process.itk_aug:main" itk_extract = "itkit.process.itk_extract:main" itk_convert = "itkit.process.itk_convert:main" itk_evaluate = "itkit.process.itk_evaluate:main" +itk_combine = "itkit.process.itk_combine:main" itkit-app = "itkit.gui.app:main" [tool.setuptools.packages.find] diff --git a/tests/itk_process/test_itk_combine.py b/tests/itk_process/test_itk_combine.py new file mode 100644 index 0000000..3a556af --- /dev/null +++ b/tests/itk_process/test_itk_combine.py @@ -0,0 +1,187 @@ +import os +import tempfile +from pathlib import Path + +import numpy as np +import pytest +import SimpleITK as sitk + +from itkit.process.itk_combine import CombineProcessor, MappingRule, SourceSpec + + +@pytest.fixture +def temp_dir(): + with tempfile.TemporaryDirectory() as tmpdir: + yield tmpdir + + +def _write_label(folder: str, name: str, array: np.ndarray) -> str: + path = os.path.join(folder, name) + image = sitk.GetImageFromArray(array.astype(np.uint8)) + image.SetSpacing((1.0, 1.0, 1.0)) + sitk.WriteImage(image, path, True) + return path + + +def _make_array(shape=(3, 3, 3)) -> np.ndarray: + return np.zeros(shape, dtype=np.uint8) + + +@pytest.mark.itk_process +class TestCombineProcessor: + def test_two_sources_simple_mapping(self, temp_dir): + src_a = os.path.join(temp_dir, "A") + src_b = os.path.join(temp_dir, "B") + out_dir = os.path.join(temp_dir, "out") + os.makedirs(src_a) + os.makedirs(src_b) + os.makedirs(out_dir) + + arr_a = _make_array() + arr_a[0, :, :] = 1 + arr_b = _make_array() + arr_b[1, :, :] = 1 + + _write_label(src_a, "case.mha", arr_a) + _write_label(src_b, "case.mha", arr_b) + + sources = [ + SourceSpec(name="A", folder=Path(src_a)), + SourceSpec(name="B", folder=Path(src_b)), + ] + rules = [ + MappingRule(source_name="A", source_labels=(1,), target_label=1), + MappingRule(source_name="B", source_labels=(1,), target_label=2), + ] + + processor = CombineProcessor(sources=sources, dest_folder=Path(out_dir), mapping_rules=rules) + processor.process("Combine") + + output = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(out_dir, "case.mha"))) + assert np.all(output[0, :, :] == 1) + assert np.all(output[1, :, :] == 2) + assert np.all(output[2, :, :] == 0) + + def test_two_sources_complex_mapping(self, temp_dir): + src_a = os.path.join(temp_dir, "A") + src_b = os.path.join(temp_dir, "B") + out_dir = os.path.join(temp_dir, "out") + os.makedirs(src_a) + os.makedirs(src_b) + os.makedirs(out_dir) + + arr_a = _make_array() + arr_a[0, :, :] = 1 + arr_a[1, :, :] = 2 + arr_b = _make_array() + arr_b[0, :, :] = 1 + arr_b[2, :, :] = 2 + + _write_label(src_a, "case.mha", arr_a) + _write_label(src_b, "case.mha", arr_b) + + sources = [ + SourceSpec(name="A", folder=Path(src_a)), + SourceSpec(name="B", folder=Path(src_b)), + ] + rules = [ + MappingRule(source_name="A", source_labels=(1,), target_label=1), + MappingRule(source_name="A", source_labels=(2,), target_label=4), + MappingRule(source_name="B", source_labels=(1,), target_label=2), + MappingRule(source_name="B", source_labels=(2,), target_label=3), + ] + + processor = CombineProcessor(sources=sources, dest_folder=Path(out_dir), mapping_rules=rules) + processor.process("Combine") + + output = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(out_dir, "case.mha"))) + assert np.all(output[0, :, :] == 1) + assert np.all(output[1, :, :] == 4) + assert np.all(output[2, :, :] == 3) + + def test_three_sources_mapping(self, temp_dir): + src_a = os.path.join(temp_dir, "A") + src_b = os.path.join(temp_dir, "B") + src_c = os.path.join(temp_dir, "C") + out_dir = os.path.join(temp_dir, "out") + os.makedirs(src_a) + os.makedirs(src_b) + os.makedirs(src_c) + os.makedirs(out_dir) + + arr_a = _make_array() + arr_a[0, :, :] = 1 + arr_b = _make_array() + arr_b[1, :, :] = 1 + arr_c = _make_array() + arr_c[2, :, :] = 1 + + _write_label(src_a, "case.mha", arr_a) + _write_label(src_b, "case.mha", arr_b) + _write_label(src_c, "case.mha", arr_c) + + sources = [ + SourceSpec(name="A", folder=Path(src_a)), + SourceSpec(name="B", folder=Path(src_b)), + SourceSpec(name="C", folder=Path(src_c)), + ] + rules = [ + MappingRule(source_name="A", source_labels=(1,), target_label=1), + MappingRule(source_name="B", source_labels=(1,), target_label=2), + MappingRule(source_name="C", source_labels=(1,), target_label=3), + ] + + processor = CombineProcessor(sources=sources, dest_folder=Path(out_dir), mapping_rules=rules) + processor.process("Combine") + + output = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(out_dir, "case.mha"))) + assert np.all(output[0, :, :] == 1) + assert np.all(output[1, :, :] == 2) + assert np.all(output[2, :, :] == 3) + + def test_four_sources_priority(self, temp_dir): + src_a = os.path.join(temp_dir, "A") + src_b = os.path.join(temp_dir, "B") + src_c = os.path.join(temp_dir, "C") + src_d = os.path.join(temp_dir, "D") + out_dir = os.path.join(temp_dir, "out") + os.makedirs(src_a) + os.makedirs(src_b) + os.makedirs(src_c) + os.makedirs(src_d) + os.makedirs(out_dir) + + arr_a = _make_array() + arr_a[0, :, :] = 1 + arr_b = _make_array() + arr_b[0, :, :] = 1 + arr_c = _make_array() + arr_c[1, :, :] = 2 + arr_d = _make_array() + arr_d[1, :, :] = 2 + + _write_label(src_a, "case.mha", arr_a) + _write_label(src_b, "case.mha", arr_b) + _write_label(src_c, "case.mha", arr_c) + _write_label(src_d, "case.mha", arr_d) + + sources = [ + SourceSpec(name="A", folder=Path(src_a)), + SourceSpec(name="B", folder=Path(src_b)), + SourceSpec(name="C", folder=Path(src_c)), + SourceSpec(name="D", folder=Path(src_d)), + ] + rules = [ + MappingRule(source_name="B", source_labels=(1,), target_label=5), + MappingRule(source_name="A", source_labels=(1,), target_label=1), + MappingRule(source_name="D", source_labels=(2,), target_label=6), + MappingRule(source_name="C", source_labels=(2,), target_label=2), + ] + + processor = CombineProcessor(sources=sources, dest_folder=Path(out_dir), mapping_rules=rules) + processor.process("Combine") + + output = sitk.GetArrayFromImage(sitk.ReadImage(os.path.join(out_dir, "case.mha"))) + assert np.all(output[0, :, :] == 5) + assert np.all(output[1, :, :] == 6) + assert np.all(output[2, :, :] == 0) From 5f7a8b8dd19b2d4ec4c95157b127a46326572772 Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Sat, 17 Jan 2026 18:44:15 +0800 Subject: [PATCH 2/3] [doc] itk_combine --- docs/index.md | 1 + docs/itk_combine.md | 57 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 docs/itk_combine.md diff --git a/docs/index.md b/docs/index.md index 26819f7..15caa70 100644 --- a/docs/index.md +++ b/docs/index.md @@ -21,6 +21,7 @@ Welcome to the ITKIT documentation! ITKIT is a user-friendly toolkit built on `S - **[itk_patch](itk_patch.md)** - Patch extraction - **[itk_aug](itk_aug.md)** - Data augmentation - **[itk_extract](itk_extract.md)** - Label extraction +- **[itk_combine](itk_combine.md)** - Label merging and intersection - **[itk_convert](itk_convert.md)** - Format conversion ### Advanced Topics diff --git a/docs/itk_combine.md b/docs/itk_combine.md new file mode 100644 index 0000000..bdf42f1 --- /dev/null +++ b/docs/itk_combine.md @@ -0,0 +1,57 @@ +# itk_combine + +Combine multiple label folders by intersecting filenames and merging labels according to ordered mapping rules. This tool is useful when you have multiple specialized segmentations for the same cases and want to create a unified label map. + +## Usage + +```bash +itk_combine --source = --map [options] +``` + +## Parameters + +- `--source`: Specify a label source in the format `name=/path/to/folder`. Can be specified multiple times for different sources. +- `--map`: Specify a mapping rule in the format `:->`. + - `` must match one of the names defined in `--source`. + - `` can be a single integer or a comma-separated list of integers. + - Multiple `--map` rules are allowed. **Priority is determined by order**: the first rule that matches a voxel determines its value in the output. +- `dest_folder`: Destination folder for the combined label files. +- `--mp`: Enable multiprocessing. +- `--workers`: Number of worker processes (defaults to half of CPU cores). + +## Mapping Priority and Logic + +1. **Intersection**: Only files that exist in **all** specified source folders (with the same base name) will be processed. + +2. **Validation**: For each file, the tool ensures that the image size and spacing are identical across all sources. If a mismatch is found, the process will fail. + +3. **Merging**: + + - The output label map is initialized to 0 (Background). + - Rules are applied sequentially in the order they appear in the command line. + - Once a voxel is assigned a non-zero value, it will not be overwritten by subsequent rules. This allows for clear priority management between overlapping sources. + +## Example + +Suppose you have: + +- `Source A`: Organ segmentations (1: Liver, 2: Spleen) +- `Source B`: Tumor segmentations (1: Liver Tumor) + +To combine them into a single map where Background=0, Liver=1, Spleen=2, and Liver Tumor=3 (with tumor taking priority over the organ label): + +```bash +itk_combine \ + --source organs=/path/to/organs \ + --source tumors=/path/to/tumors \ + --map tumors:1->3 \ + --map organs:1->1 \ + --map organs:2->2 \ + /path/to/combined_output \ + --mp +``` + +## Output + +- Combined label maps (normalized to `.mha` format and `uint8` data type). +- `meta.json`: Standard ITKIT metadata file containing size, spacing, origin, and unique classes for each combined file. From f95dba0e43f75753235d3154e4538bf765365ec0 Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Sat, 17 Jan 2026 20:08:32 +0800 Subject: [PATCH 3/3] [fix] fix an argument bug --- itkit/process/itk_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/itkit/process/itk_combine.py b/itkit/process/itk_combine.py index e743e5a..f1fb2af 100644 --- a/itkit/process/itk_combine.py +++ b/itkit/process/itk_combine.py @@ -161,7 +161,7 @@ def parse_args(): help="Mapping rule in form `:->`, e.g., `A:1,2->3` (repeatable)", ) parser.add_argument( - "-o", "dest_folder", + "-o", "--dest-folder", type=Path, help="Destination folder for combined labels", )