From ae8892470b63b91aa8f963ce0f0f88d80c7d851a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:11:51 +0000 Subject: [PATCH 1/8] Initial plan From 82514f8526cf610ea4ab66ba7eb797fd55a1a071 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:16:44 +0000 Subject: [PATCH 2/8] Add load_and_merge method to MetadataManager and update processors to preserve metadata Co-authored-by: MGAMZ <57469116+MGAMZ@users.noreply.github.com> --- itkit/process/base_processor.py | 12 ++++++++++++ itkit/process/metadata_models.py | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/itkit/process/base_processor.py b/itkit/process/base_processor.py index 3eed9c2..334020e 100644 --- a/itkit/process/base_processor.py +++ b/itkit/process/base_processor.py @@ -232,6 +232,12 @@ def source_meta_path(self) -> Path | None: return Path(self.source_folder) / "meta.json" def process(self, desc: str | None = None): + # Load existing destination metadata to preserve metadata for skipped files + if self.dest_folder is not None: + dest_meta_path = Path(self.dest_folder) / "meta.json" + if dest_meta_path.exists(): + self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + super().process(desc) if self.dest_folder is not None: os.makedirs(self.dest_folder, exist_ok=True) @@ -365,6 +371,12 @@ def source_meta_path(self) -> Path: return Path(self.source_folder) / "meta.json" def process(self, desc: str | None = None): + # Load existing destination metadata to preserve metadata for skipped files + if self.dest_folder is not None: + dest_meta_path = Path(self.dest_folder) / "meta.json" + if dest_meta_path.exists(): + self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + super().process(desc) if self.dest_folder is not None: self.save_meta(Path(self.dest_folder) / "meta.json") diff --git a/itkit/process/metadata_models.py b/itkit/process/metadata_models.py index 3219efb..2bbfab7 100644 --- a/itkit/process/metadata_models.py +++ b/itkit/process/metadata_models.py @@ -116,6 +116,25 @@ def update(self, image_meta:SeriesMetadata, allow_and_overwrite_existed:bool=Tru else: pass + def load_and_merge(self, meta_file_path: str | Path, allow_and_overwrite_existed: bool = False): + """Load metadata from a file and merge it with existing metadata. + + Args: + meta_file_path: Path to the metadata file to load + allow_and_overwrite_existed: If True, overwrite existing metadata with loaded data. + If False (default), keep existing metadata when conflicts occur. + """ + if not Path(meta_file_path).exists(): + return + + try: + data = json.loads(Path(meta_file_path).read_text()) + for name, meta_dict in data.items(): + meta = SeriesMetadata.model_validate({"name": name, **meta_dict}) + self.update(meta, allow_and_overwrite_existed=allow_and_overwrite_existed) + except (json.JSONDecodeError, Exception) as e: + print(f"Warning: Could not load metadata from {meta_file_path}: {e}") + def save(self, path: str|Path): data = { name: meta.model_dump(mode="json", exclude={'name'}) From d84608e60a6a47078ffc0d484c85b359e1197fc9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:21:56 +0000 Subject: [PATCH 3/8] Add metadata generation for existing files and tests for metadata preservation Co-authored-by: MGAMZ <57469116+MGAMZ@users.noreply.github.com> --- itkit/process/base_processor.py | 87 ++++++++++++++++++++++++ tests/test_itk_resample.py | 117 ++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) diff --git a/itkit/process/base_processor.py b/itkit/process/base_processor.py index 334020e..f66da77 100644 --- a/itkit/process/base_processor.py +++ b/itkit/process/base_processor.py @@ -54,6 +54,7 @@ from pathlib import Path from typing import Any +import SimpleITK as sitk from tqdm import tqdm from .metadata_models import MetadataManager, SeriesMetadata @@ -175,6 +176,17 @@ def process_one(self, args) -> list[SeriesMetadata] | SeriesMetadata | Any | Non Return `None` if no metadata should be recorded for this item. """ + def generate_metadata_for_existing_files(self): + """ + Generate metadata for files that already exist in the destination folder. + + This method should be called before processing to ensure that metadata + is generated for files that will be skipped during processing. + + Subclasses should override this if they skip existing files. + """ + pass + def _normalize_filename(self, filepath: str) -> str: base = os.path.splitext(filepath)[0] # Handle double extensions like .nii.gz @@ -231,6 +243,31 @@ def get_items_to_process(self) -> list[str]: def source_meta_path(self) -> Path | None: return Path(self.source_folder) / "meta.json" + def generate_metadata_for_existing_files(self): + """Generate metadata for files that already exist in destination folder.""" + if self.dest_folder is None or not os.path.exists(self.dest_folder): + return + + # Find all existing files in destination + existing_files = self.find_files_flat(self.dest_folder) + + # For each existing file, check if it will be skipped during processing + source_files_set = {os.path.basename(f) for f in self.find_files_flat(self.source_folder)} + + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + # If this file would be skipped (because it exists and source has it) + if dest_basename in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") + def process(self, desc: str | None = None): # Load existing destination metadata to preserve metadata for skipped files if self.dest_folder is not None: @@ -238,6 +275,9 @@ def process(self, desc: str | None = None): if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + # Generate metadata for files that already exist and will be skipped + self.generate_metadata_for_existing_files() + super().process(desc) if self.dest_folder is not None: os.makedirs(self.dest_folder, exist_ok=True) @@ -370,6 +410,50 @@ def start_from_arg(cls): def source_meta_path(self) -> Path: return Path(self.source_folder) / "meta.json" + def generate_metadata_for_existing_files(self): + """Generate metadata for files that already exist in destination folder.""" + if self.dest_folder is None: + return + + # Check image and label folders + for subfolder in ['image', 'label']: + dest_subfolder = os.path.join(self.dest_folder, subfolder) + if not os.path.exists(dest_subfolder): + continue + + # Find all existing files + existing_files = [] + for f in os.listdir(dest_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + existing_files.append(os.path.join(dest_subfolder, f)) + + # Get source files for comparison + source_subfolder = os.path.join(self.source_folder, subfolder) + if not os.path.exists(source_subfolder): + continue + + source_files_set = set() + for f in os.listdir(source_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + source_files_set.add(self._normalize_filename(f)) + + # Generate metadata for files that exist and would be skipped + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + dest_normalized = self._normalize_filename(dest_basename) + + # If this file would be skipped (exists and source has it) + if dest_normalized in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") + def process(self, desc: str | None = None): # Load existing destination metadata to preserve metadata for skipped files if self.dest_folder is not None: @@ -377,6 +461,9 @@ def process(self, desc: str | None = None): if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + # Generate metadata for files that already exist and will be skipped + self.generate_metadata_for_existing_files() + super().process(desc) if self.dest_folder is not None: self.save_meta(Path(self.dest_folder) / "meta.json") diff --git a/tests/test_itk_resample.py b/tests/test_itk_resample.py index d64f8c5..80ba183 100644 --- a/tests/test_itk_resample.py +++ b/tests/test_itk_resample.py @@ -200,3 +200,120 @@ def test_full_io_processing_label_field(self, shared_temp_data, tmp_path): for name, expected_meta in metadata_manager.meta.items(): assert name in loaded_manager.meta assert loaded_manager.meta[name] == expected_meta + + def test_metadata_preservation_on_skip(self, shared_temp_data, tmp_path): + """Test that metadata is preserved when files are skipped due to already existing. + + This tests the fix for the issue where skipped files disappear from meta.json. + """ + dest_folder = tmp_path / "dst" + dest_folder.mkdir() + + # Randomly generate target spacing + target_spacing = [np.random.uniform(0.5, 3.0) for _ in range(3)] + + # First pass: Process all files + processor1 = itk_resample.SingleResampleProcessor( + source_folder=str(shared_temp_data / "image"), + dest_folder=str(dest_folder), + target_spacing=target_spacing, + target_size=[-1, -1, -1], + field="image" + ) + processor1.process() + + # Save metadata from first pass + first_meta_path = dest_folder / "meta.json" + processor1.save_meta(first_meta_path) + first_manager = MetadataManager(meta_file_path=first_meta_path) + first_metadata_count = len(first_manager.meta) + + # Get list of files processed in first pass + first_pass_files = set(first_manager.meta.keys()) + assert first_metadata_count > 0, "First pass should process some files" + + # Second pass: Process again (all files should be skipped) + processor2 = itk_resample.SingleResampleProcessor( + source_folder=str(shared_temp_data / "image"), + dest_folder=str(dest_folder), + target_spacing=target_spacing, + target_size=[-1, -1, -1], + field="image" + ) + processor2.process() + + # Check that metadata is preserved after second pass + second_meta_path = dest_folder / "meta.json" + second_manager = MetadataManager(meta_file_path=second_meta_path) + second_metadata_count = len(second_manager.meta) + + # Verify metadata count is the same + assert second_metadata_count == first_metadata_count, \ + f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" + + # Verify all files from first pass are still in metadata + second_pass_files = set(second_manager.meta.keys()) + assert first_pass_files == second_pass_files, \ + f"Files in metadata should be the same: missing={first_pass_files - second_pass_files}, extra={second_pass_files - first_pass_files}" + + # Verify metadata values are the same + for name in first_pass_files: + assert first_manager.meta[name] == second_manager.meta[name], \ + f"Metadata for {name} should be the same" + + def test_partial_processing_metadata_preservation(self, shared_temp_data, tmp_path): + """Test metadata preservation when only some files are processed. + + Simulates the scenario where some files already exist and are skipped. + """ + dest_folder = tmp_path / "dst" + dest_folder.mkdir() + + # Get source files + source_files = list((shared_temp_data / "image").glob("*.mha")) + assert len(source_files) >= 3, "Need at least 3 source files for this test" + + # Randomly generate target spacing + target_spacing = [np.random.uniform(0.5, 3.0) for _ in range(3)] + + # First pass: Process only the first 2 files manually + first_batch_files = source_files[:2] + for src_file in first_batch_files: + img = sitk.ReadImage(str(src_file)) + # Simple processing without actual resampling for speed + dest_file = dest_folder / src_file.name + sitk.WriteImage(img, str(dest_file), useCompression=True) + + # Create metadata for first batch + first_manager = MetadataManager() + for src_file in first_batch_files: + dest_file = dest_folder / src_file.name + img = sitk.ReadImage(str(dest_file)) + meta = itk_resample.SeriesMetadata.from_sitk_image(img, dest_file.name) + first_manager.update(meta) + first_manager.save(dest_folder / "meta.json") + + first_file_names = {f.name for f in first_batch_files} + + # Second pass: Process all files (first 2 should be skipped) + processor = itk_resample.SingleResampleProcessor( + source_folder=str(shared_temp_data / "image"), + dest_folder=str(dest_folder), + target_spacing=target_spacing, + target_size=[-1, -1, -1], + field="image" + ) + processor.process() + + # Check final metadata + final_manager = MetadataManager(meta_file_path=dest_folder / "meta.json") + final_file_names = set(final_manager.meta.keys()) + + # Verify first batch files are still in metadata + assert first_file_names.issubset(final_file_names), \ + f"First batch files should be preserved in metadata: missing={first_file_names - final_file_names}" + + # Verify all source files are in final metadata + source_file_names = {f.name for f in source_files} + assert source_file_names == final_file_names, \ + f"All source files should be in final metadata: missing={source_file_names - final_file_names}, extra={final_file_names - source_file_names}" From f7045d286e99bb14f72ff13110d7d4d00247faae Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:27:19 +0000 Subject: [PATCH 4/8] Add metadata preservation to all process modules with skip logic Co-authored-by: MGAMZ <57469116+MGAMZ@users.noreply.github.com> --- itkit/process/itk_convert_format.py | 50 +++++++++++ itkit/process/itk_convert_monai.py | 49 +++++++++++ itkit/process/itk_convert_torchio.py | 50 +++++++++++ tests/test_itk_extract.py | 42 +++++++++ tests/test_itk_orient.py | 126 +++++++++++++++++++++++++++ 5 files changed, 317 insertions(+) diff --git a/itkit/process/itk_convert_format.py b/itkit/process/itk_convert_format.py index df74002..0320a8b 100644 --- a/itkit/process/itk_convert_format.py +++ b/itkit/process/itk_convert_format.py @@ -18,6 +18,7 @@ """ import os +from pathlib import Path from typing import Any import SimpleITK as sitk @@ -237,6 +238,14 @@ def process(self, desc: str | None = None): os.makedirs(os.path.join(self.dest_folder, "image"), exist_ok=True) os.makedirs(os.path.join(self.dest_folder, "label"), exist_ok=True) + # Load existing destination metadata to preserve metadata for skipped files + dest_meta_path = Path(self.dest_folder) / "meta.json" + if dest_meta_path.exists(): + self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + + # Generate metadata for files that already exist and will be skipped + self._generate_metadata_for_existing_files() + desc = desc or f"{self.task_description} to {self.target_format}" # Process items @@ -264,6 +273,47 @@ def process(self, desc: str | None = None): self.save_meta(os.path.join(self.dest_folder, "image", "meta.json")) self.save_meta(os.path.join(self.dest_folder, "label", "meta.json")) + def _generate_metadata_for_existing_files(self): + """Generate metadata for files that already exist in destination folder.""" + # Check image and label folders + for subfolder in ['image', 'label']: + dest_subfolder = os.path.join(self.dest_folder, subfolder) + if not os.path.exists(dest_subfolder): + continue + + # Find all existing files + existing_files = [] + for f in os.listdir(dest_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + existing_files.append(os.path.join(dest_subfolder, f)) + + # Get source files for comparison + source_subfolder = os.path.join(self.source_folder, subfolder) + if not os.path.exists(source_subfolder): + continue + + source_files_set = set() + for f in os.listdir(source_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + source_files_set.add(self._normalize_filename(f)) + + # Generate metadata for files that exist and would be skipped + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + dest_normalized = self._normalize_filename(dest_basename) + + # If this file would be skipped (exists and source has it) + if dest_normalized in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") + def convert_format( source_folder: str, diff --git a/itkit/process/itk_convert_monai.py b/itkit/process/itk_convert_monai.py index f754df4..126e894 100644 --- a/itkit/process/itk_convert_monai.py +++ b/itkit/process/itk_convert_monai.py @@ -306,6 +306,14 @@ def process(self, desc: str | None = None): # Create output directories os.makedirs(self.dest_folder, exist_ok=True) + # Load existing destination metadata to preserve metadata for skipped files + dest_meta_path = Path(self.dest_folder) / "meta.json" + if dest_meta_path.exists(): + self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + + # Generate metadata for files that already exist and will be skipped + self._generate_metadata_for_existing_files() + desc = desc or self.task_description if self.mp: # For multiprocessing, we need to handle class discovery differently @@ -339,6 +347,47 @@ def process(self, desc: str | None = None): # Save metadata self.save_meta(os.path.join(self.dest_folder, "meta.json")) + def _generate_metadata_for_existing_files(self): + """Generate metadata for files that already exist in destination folder.""" + if not os.path.exists(self.dest_folder): + return + + # Find all existing files in destination + existing_files = [] + for f in os.listdir(self.dest_folder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + existing_files.append(os.path.join(self.dest_folder, f)) + + # Get source files for comparison + if not os.path.exists(self.source_folder): + return + + # Build source file set from image and label folders + source_files_set = set() + for subfolder in ['image', 'label']: + source_subfolder = os.path.join(self.source_folder, subfolder) + if os.path.exists(source_subfolder): + for f in os.listdir(source_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + source_files_set.add(self._normalize_filename(f)) + + # Generate metadata for files that exist and would be skipped + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + dest_normalized = self._normalize_filename(dest_basename) + + # If this file would be skipped (exists and source has it) + if dest_normalized in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") + def _build_dataset_json(self, items: list[tuple[str, str, str, str]]): """Build dataset.json entries from processed items.""" for img_input, img_output, lbl_input, lbl_output in items: diff --git a/itkit/process/itk_convert_torchio.py b/itkit/process/itk_convert_torchio.py index 9fbcef5..0acd37c 100644 --- a/itkit/process/itk_convert_torchio.py +++ b/itkit/process/itk_convert_torchio.py @@ -15,6 +15,7 @@ import csv import os from multiprocessing import Pool +from pathlib import Path from typing import Any import SimpleITK as sitk @@ -199,6 +200,14 @@ def process(self, desc: str | None = None): # Create output directories os.makedirs(self.dest_folder, exist_ok=True) + # Load existing destination metadata to preserve metadata for skipped files + dest_meta_path = Path(self.dest_folder) / "meta.json" + if dest_meta_path.exists(): + self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) + + # Generate metadata for files that already exist and will be skipped + self._generate_metadata_for_existing_files() + desc = desc or self.task_description if self.mp: with Pool(self.workers) as pool: @@ -224,6 +233,47 @@ def process(self, desc: str | None = None): # Save metadata self.save_meta(os.path.join(self.dest_folder, "meta.json")) + def _generate_metadata_for_existing_files(self): + """Generate metadata for files that already exist in destination folder.""" + if not os.path.exists(self.dest_folder): + return + + # Find all existing files in destination + existing_files = [] + for f in os.listdir(self.dest_folder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + existing_files.append(os.path.join(self.dest_folder, f)) + + # Get source files for comparison + if not os.path.exists(self.source_folder): + return + + # Build source file set from image and label folders + source_files_set = set() + for subfolder in ['image', 'label']: + source_subfolder = os.path.join(self.source_folder, subfolder) + if os.path.exists(source_subfolder): + for f in os.listdir(source_subfolder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + source_files_set.add(self._normalize_filename(f)) + + # Generate metadata for files that exist and would be skipped + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + dest_normalized = self._normalize_filename(dest_basename) + + # If this file would be skipped (exists and source has it) + if dest_normalized in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") + def _create_subjects_csv(self): """Create subjects.csv manifest file.""" csv_path = os.path.join(self.dest_folder, "subjects.csv") diff --git a/tests/test_itk_extract.py b/tests/test_itk_extract.py index 6f48bfc..4929083 100644 --- a/tests/test_itk_extract.py +++ b/tests/test_itk_extract.py @@ -364,3 +364,45 @@ def test_main_config_save_error_handling(self, temp_dir): with patch.object(sys, 'argv', test_args): # This should complete successfully despite any config save issues main() + + def test_metadata_preservation_on_skip(self, temp_dir): + """Test that metadata is preserved when files are skipped in ExtractProcessor.""" + source_folder = os.path.join(temp_dir, "source") + dest_folder = os.path.join(temp_dir, "dest") + os.makedirs(source_folder) + + # Create source files with labels to extract + label_mapping = {1: 0, 2: 1, 3: 2} + for i in range(3): + image = create_sample_image([0, 1, 2, 3], shape=(10, 10, 10)) + sitk.WriteImage(image, os.path.join(source_folder, f"test{i}.mha")) + + # First pass: Process all files + processor1 = ExtractProcessor(source_folder, dest_folder, label_mapping) + processor1.process() + + # Check metadata from first pass + from itkit.process.metadata_models import MetadataManager + first_meta_path = os.path.join(dest_folder, 'meta.json') + assert os.path.exists(first_meta_path), "meta.json should be created after first pass" + + first_manager = MetadataManager(meta_file_path=first_meta_path) + first_metadata_count = len(first_manager.meta) + first_files = set(first_manager.meta.keys()) + + # Second pass: Process again (all files should be skipped) + processor2 = ExtractProcessor(source_folder, dest_folder, label_mapping) + processor2.process() + + # Check that metadata is preserved after second pass + second_manager = MetadataManager(meta_file_path=first_meta_path) + second_metadata_count = len(second_manager.meta) + second_files = set(second_manager.meta.keys()) + + # Verify metadata count is the same + assert second_metadata_count == first_metadata_count, \ + f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" + + # Verify all files from first pass are still in metadata + assert first_files == second_files, \ + f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" diff --git a/tests/test_itk_orient.py b/tests/test_itk_orient.py index 2232b3e..73701cb 100644 --- a/tests/test_itk_orient.py +++ b/tests/test_itk_orient.py @@ -269,3 +269,129 @@ def test_main_dataset_mode_missing_structure(self, capsys): assert "Error: Dataset mode requires 'image' and 'label' subfolders" in captured.out finally: sys.argv = original_argv + + def test_metadata_preservation_on_skip(self): + """Test that metadata is preserved when files are skipped in OrientProcessor.""" + with tempfile.TemporaryDirectory() as tmpdir: + src_dir = os.path.join(tmpdir, 'src') + dst_dir = os.path.join(tmpdir, 'dst') + os.makedirs(src_dir) + + # Create source files + for i in range(3): + create_test_image(os.path.join(src_dir, f'test{i}.mha')) + + # First pass: Process all files + processor1 = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) + processor1.process() + + # Save metadata from first pass + first_meta_path = os.path.join(dst_dir, 'meta.json') + assert os.path.exists(first_meta_path), "meta.json should be created after first pass" + + from itkit.process.metadata_models import MetadataManager + first_manager = MetadataManager(meta_file_path=first_meta_path) + first_metadata_count = len(first_manager.meta) + first_files = set(first_manager.meta.keys()) + + # Second pass: Process again (all files should be skipped) + processor2 = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) + processor2.process() + + # Check that metadata is preserved after second pass + second_manager = MetadataManager(meta_file_path=first_meta_path) + second_metadata_count = len(second_manager.meta) + second_files = set(second_manager.meta.keys()) + + # Verify metadata count is the same + assert second_metadata_count == first_metadata_count, \ + f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" + + # Verify all files from first pass are still in metadata + assert first_files == second_files, \ + f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" + + def test_partial_processing_metadata_preservation(self): + """Test metadata preservation when only some files are processed in OrientProcessor.""" + with tempfile.TemporaryDirectory() as tmpdir: + src_dir = os.path.join(tmpdir, 'src') + dst_dir = os.path.join(tmpdir, 'dst') + os.makedirs(src_dir) + os.makedirs(dst_dir) + + # Create 5 source files + for i in range(5): + create_test_image(os.path.join(src_dir, f'test{i}.mha')) + + # Pre-create first 2 files in destination + for i in range(2): + create_test_image(os.path.join(dst_dir, f'test{i}.mha')) + + # Create initial metadata for first 2 files + from itkit.process.metadata_models import MetadataManager, SeriesMetadata + initial_manager = MetadataManager() + for i in range(2): + img = sitk.ReadImage(os.path.join(dst_dir, f'test{i}.mha')) + meta = SeriesMetadata.from_sitk_image(img, f'test{i}.mha') + initial_manager.update(meta) + initial_manager.save(os.path.join(dst_dir, 'meta.json')) + + # Process all files (first 2 should be skipped) + processor = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) + processor.process() + + # Check final metadata + final_manager = MetadataManager(meta_file_path=os.path.join(dst_dir, 'meta.json')) + final_files = set(final_manager.meta.keys()) + expected_files = {f'test{i}.mha' for i in range(5)} + + # Verify all files are in final metadata + assert final_files == expected_files, \ + f"All files should be in final metadata: missing={expected_files - final_files}, extra={final_files - expected_files}" + + def test_dataset_metadata_preservation_on_skip(self): + """Test that metadata is preserved when files are skipped in DatasetOrientProcessor.""" + with tempfile.TemporaryDirectory() as tmpdir: + src_dir = os.path.join(tmpdir, 'src') + dst_dir = os.path.join(tmpdir, 'dst') + + # Create source structure + img_dir = os.path.join(src_dir, 'image') + lbl_dir = os.path.join(src_dir, 'label') + os.makedirs(img_dir) + os.makedirs(lbl_dir) + + # Create matching image and label files + for i in range(3): + create_test_image(os.path.join(img_dir, f'case{i:02d}.mha')) + create_test_image(os.path.join(lbl_dir, f'case{i:02d}.mha')) + + # First pass: Process all files + processor1 = DatasetOrientProcessor(src_dir, dst_dir, 'LPI', mp=False) + processor1.process() + + # Check metadata from first pass + first_meta_path = os.path.join(dst_dir, 'meta.json') + assert os.path.exists(first_meta_path), "meta.json should be created after first pass" + + from itkit.process.metadata_models import MetadataManager + first_manager = MetadataManager(meta_file_path=first_meta_path) + first_metadata_count = len(first_manager.meta) + first_files = set(first_manager.meta.keys()) + + # Second pass: Process again (all files should be skipped) + processor2 = DatasetOrientProcessor(src_dir, dst_dir, 'LPI', mp=False) + processor2.process() + + # Check that metadata is preserved after second pass + second_manager = MetadataManager(meta_file_path=first_meta_path) + second_metadata_count = len(second_manager.meta) + second_files = set(second_manager.meta.keys()) + + # Verify metadata count is the same + assert second_metadata_count == first_metadata_count, \ + f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" + + # Verify all files from first pass are still in metadata + assert first_files == second_files, \ + f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" From 9f0eb656df0e1fca3497fcdc60d2452ec7589ef8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:47:44 +0000 Subject: [PATCH 5/8] Refactor metadata generation to reduce code duplication - Added _generate_metadata_for_folder() helper method in BaseITKProcessor - Reduced code duplication across 5 modules (SingleFolderProcessor, DatasetProcessor, FormatConverter, MonaiConverter, TorchIOConverter) - Net reduction of 62 lines of duplicated code - Improved maintainability and abstraction layer separation Co-authored-by: MGAMZ <57469116+MGAMZ@users.noreply.github.com> --- itkit/process/base_processor.py | 112 ++++++++++++++------------- itkit/process/itk_convert_format.py | 40 ++-------- itkit/process/itk_convert_monai.py | 32 ++------ itkit/process/itk_convert_torchio.py | 32 ++------ 4 files changed, 77 insertions(+), 139 deletions(-) diff --git a/itkit/process/base_processor.py b/itkit/process/base_processor.py index f66da77..c4d0bcc 100644 --- a/itkit/process/base_processor.py +++ b/itkit/process/base_processor.py @@ -186,6 +186,54 @@ def generate_metadata_for_existing_files(self): Subclasses should override this if they skip existing files. """ pass + + def _generate_metadata_for_folder(self, dest_folder: str, source_folder: str, + source_files_set: set[str] | None = None) -> None: + """ + Helper method to generate metadata for existing files in a folder. + + This method encapsulates the common logic for regenerating metadata from + existing files that will be skipped during processing. + + Args: + dest_folder: Destination folder containing existing files + source_folder: Source folder to compare against (optional if source_files_set provided) + source_files_set: Pre-computed set of normalized source file names (optional) + """ + if not os.path.exists(dest_folder): + return + + # Find all existing files in destination + existing_files = [] + for f in os.listdir(dest_folder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + existing_files.append(os.path.join(dest_folder, f)) + + # Build source files set if not provided + if source_files_set is None: + if not os.path.exists(source_folder): + return + source_files_set = set() + for f in os.listdir(source_folder): + if f.endswith(self.SUPPORTED_EXTENSIONS): + source_files_set.add(self._normalize_filename(f)) + + # Generate metadata for files that exist and would be skipped + for dest_file in existing_files: + dest_basename = os.path.basename(dest_file) + dest_normalized = self._normalize_filename(dest_basename) + + # If this file would be skipped (exists and source has it) + if dest_normalized in source_files_set: + # Check if metadata already exists + if dest_basename not in self.meta_manager.meta: + # Generate metadata from the existing file + try: + img = sitk.ReadImage(dest_file) + meta = SeriesMetadata.from_sitk_image(img, dest_basename) + self.meta_manager.update(meta, allow_and_overwrite_existed=False) + except Exception as e: + print(f"Warning: Could not generate metadata for {dest_file}: {e}") def _normalize_filename(self, filepath: str) -> str: base = os.path.splitext(filepath)[0] @@ -248,25 +296,11 @@ def generate_metadata_for_existing_files(self): if self.dest_folder is None or not os.path.exists(self.dest_folder): return - # Find all existing files in destination - existing_files = self.find_files_flat(self.dest_folder) - - # For each existing file, check if it will be skipped during processing - source_files_set = {os.path.basename(f) for f in self.find_files_flat(self.source_folder)} - - for dest_file in existing_files: - dest_basename = os.path.basename(dest_file) - # If this file would be skipped (because it exists and source has it) - if dest_basename in source_files_set: - # Check if metadata already exists - if dest_basename not in self.meta_manager.meta: - # Generate metadata from the existing file - try: - img = sitk.ReadImage(dest_file) - meta = SeriesMetadata.from_sitk_image(img, dest_basename) - self.meta_manager.update(meta, allow_and_overwrite_existed=False) - except Exception as e: - print(f"Warning: Could not generate metadata for {dest_file}: {e}") + # Use the helper method from base class + self._generate_metadata_for_folder( + dest_folder=self.dest_folder, + source_folder=self.source_folder + ) def process(self, desc: str | None = None): # Load existing destination metadata to preserve metadata for skipped files @@ -415,44 +449,16 @@ def generate_metadata_for_existing_files(self): if self.dest_folder is None: return - # Check image and label folders + # Check image and label folders using the helper method for subfolder in ['image', 'label']: dest_subfolder = os.path.join(self.dest_folder, subfolder) - if not os.path.exists(dest_subfolder): - continue - - # Find all existing files - existing_files = [] - for f in os.listdir(dest_subfolder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - existing_files.append(os.path.join(dest_subfolder, f)) - - # Get source files for comparison source_subfolder = os.path.join(self.source_folder, subfolder) - if not os.path.exists(source_subfolder): - continue - - source_files_set = set() - for f in os.listdir(source_subfolder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - source_files_set.add(self._normalize_filename(f)) - # Generate metadata for files that exist and would be skipped - for dest_file in existing_files: - dest_basename = os.path.basename(dest_file) - dest_normalized = self._normalize_filename(dest_basename) - - # If this file would be skipped (exists and source has it) - if dest_normalized in source_files_set: - # Check if metadata already exists - if dest_basename not in self.meta_manager.meta: - # Generate metadata from the existing file - try: - img = sitk.ReadImage(dest_file) - meta = SeriesMetadata.from_sitk_image(img, dest_basename) - self.meta_manager.update(meta, allow_and_overwrite_existed=False) - except Exception as e: - print(f"Warning: Could not generate metadata for {dest_file}: {e}") + if os.path.exists(dest_subfolder) and os.path.exists(source_subfolder): + self._generate_metadata_for_folder( + dest_folder=dest_subfolder, + source_folder=source_subfolder + ) def process(self, desc: str | None = None): # Load existing destination metadata to preserve metadata for skipped files diff --git a/itkit/process/itk_convert_format.py b/itkit/process/itk_convert_format.py index 0320a8b..743c263 100644 --- a/itkit/process/itk_convert_format.py +++ b/itkit/process/itk_convert_format.py @@ -275,44 +275,16 @@ def process(self, desc: str | None = None): def _generate_metadata_for_existing_files(self): """Generate metadata for files that already exist in destination folder.""" - # Check image and label folders + # Check image and label folders using the helper method from base class for subfolder in ['image', 'label']: dest_subfolder = os.path.join(self.dest_folder, subfolder) - if not os.path.exists(dest_subfolder): - continue - - # Find all existing files - existing_files = [] - for f in os.listdir(dest_subfolder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - existing_files.append(os.path.join(dest_subfolder, f)) - - # Get source files for comparison source_subfolder = os.path.join(self.source_folder, subfolder) - if not os.path.exists(source_subfolder): - continue - source_files_set = set() - for f in os.listdir(source_subfolder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - source_files_set.add(self._normalize_filename(f)) - - # Generate metadata for files that exist and would be skipped - for dest_file in existing_files: - dest_basename = os.path.basename(dest_file) - dest_normalized = self._normalize_filename(dest_basename) - - # If this file would be skipped (exists and source has it) - if dest_normalized in source_files_set: - # Check if metadata already exists - if dest_basename not in self.meta_manager.meta: - # Generate metadata from the existing file - try: - img = sitk.ReadImage(dest_file) - meta = SeriesMetadata.from_sitk_image(img, dest_basename) - self.meta_manager.update(meta, allow_and_overwrite_existed=False) - except Exception as e: - print(f"Warning: Could not generate metadata for {dest_file}: {e}") + if os.path.exists(dest_subfolder) and os.path.exists(source_subfolder): + self._generate_metadata_for_folder( + dest_folder=dest_subfolder, + source_folder=source_subfolder + ) def convert_format( diff --git a/itkit/process/itk_convert_monai.py b/itkit/process/itk_convert_monai.py index 126e894..0b3c9d3 100644 --- a/itkit/process/itk_convert_monai.py +++ b/itkit/process/itk_convert_monai.py @@ -352,16 +352,6 @@ def _generate_metadata_for_existing_files(self): if not os.path.exists(self.dest_folder): return - # Find all existing files in destination - existing_files = [] - for f in os.listdir(self.dest_folder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - existing_files.append(os.path.join(self.dest_folder, f)) - - # Get source files for comparison - if not os.path.exists(self.source_folder): - return - # Build source file set from image and label folders source_files_set = set() for subfolder in ['image', 'label']: @@ -371,22 +361,12 @@ def _generate_metadata_for_existing_files(self): if f.endswith(self.SUPPORTED_EXTENSIONS): source_files_set.add(self._normalize_filename(f)) - # Generate metadata for files that exist and would be skipped - for dest_file in existing_files: - dest_basename = os.path.basename(dest_file) - dest_normalized = self._normalize_filename(dest_basename) - - # If this file would be skipped (exists and source has it) - if dest_normalized in source_files_set: - # Check if metadata already exists - if dest_basename not in self.meta_manager.meta: - # Generate metadata from the existing file - try: - img = sitk.ReadImage(dest_file) - meta = SeriesMetadata.from_sitk_image(img, dest_basename) - self.meta_manager.update(meta, allow_and_overwrite_existed=False) - except Exception as e: - print(f"Warning: Could not generate metadata for {dest_file}: {e}") + # Use the helper method from base class with pre-computed source files set + self._generate_metadata_for_folder( + dest_folder=self.dest_folder, + source_folder=None, # Not needed since we provide source_files_set + source_files_set=source_files_set + ) def _build_dataset_json(self, items: list[tuple[str, str, str, str]]): """Build dataset.json entries from processed items.""" diff --git a/itkit/process/itk_convert_torchio.py b/itkit/process/itk_convert_torchio.py index 0acd37c..cef28a5 100644 --- a/itkit/process/itk_convert_torchio.py +++ b/itkit/process/itk_convert_torchio.py @@ -238,16 +238,6 @@ def _generate_metadata_for_existing_files(self): if not os.path.exists(self.dest_folder): return - # Find all existing files in destination - existing_files = [] - for f in os.listdir(self.dest_folder): - if f.endswith(self.SUPPORTED_EXTENSIONS): - existing_files.append(os.path.join(self.dest_folder, f)) - - # Get source files for comparison - if not os.path.exists(self.source_folder): - return - # Build source file set from image and label folders source_files_set = set() for subfolder in ['image', 'label']: @@ -257,22 +247,12 @@ def _generate_metadata_for_existing_files(self): if f.endswith(self.SUPPORTED_EXTENSIONS): source_files_set.add(self._normalize_filename(f)) - # Generate metadata for files that exist and would be skipped - for dest_file in existing_files: - dest_basename = os.path.basename(dest_file) - dest_normalized = self._normalize_filename(dest_basename) - - # If this file would be skipped (exists and source has it) - if dest_normalized in source_files_set: - # Check if metadata already exists - if dest_basename not in self.meta_manager.meta: - # Generate metadata from the existing file - try: - img = sitk.ReadImage(dest_file) - meta = SeriesMetadata.from_sitk_image(img, dest_basename) - self.meta_manager.update(meta, allow_and_overwrite_existed=False) - except Exception as e: - print(f"Warning: Could not generate metadata for {dest_file}: {e}") + # Use the helper method from base class with pre-computed source files set + self._generate_metadata_for_folder( + dest_folder=self.dest_folder, + source_folder=None, # Not needed since we provide source_files_set + source_files_set=source_files_set + ) def _create_subjects_csv(self): """Create subjects.csv manifest file.""" From e346276bd5985c15fbe91895d4ac718f5e152dcb Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Fri, 9 Jan 2026 22:17:50 +0800 Subject: [PATCH 6/8] lint fix --- itkit/process/base_processor.py | 36 ++++++++++++---------- itkit/process/itk_convert_format.py | 4 +-- itkit/process/itk_convert_monai.py | 6 ++-- itkit/process/itk_convert_torchio.py | 6 ++-- itkit/process/metadata_models.py | 4 +-- tests/test_itk_extract.py | 16 +++++----- tests/test_itk_orient.py | 46 ++++++++++++++-------------- tests/test_itk_resample.py | 40 ++++++++++++------------ 8 files changed, 80 insertions(+), 78 deletions(-) diff --git a/itkit/process/base_processor.py b/itkit/process/base_processor.py index c4d0bcc..6d4efe9 100644 --- a/itkit/process/base_processor.py +++ b/itkit/process/base_processor.py @@ -179,22 +179,22 @@ def process_one(self, args) -> list[SeriesMetadata] | SeriesMetadata | Any | Non def generate_metadata_for_existing_files(self): """ Generate metadata for files that already exist in the destination folder. - + This method should be called before processing to ensure that metadata is generated for files that will be skipped during processing. - + Subclasses should override this if they skip existing files. """ pass - - def _generate_metadata_for_folder(self, dest_folder: str, source_folder: str, + + def _generate_metadata_for_folder(self, dest_folder: str, source_folder: str | None, source_files_set: set[str] | None = None) -> None: """ Helper method to generate metadata for existing files in a folder. - + This method encapsulates the common logic for regenerating metadata from existing files that will be skipped during processing. - + Args: dest_folder: Destination folder containing existing files source_folder: Source folder to compare against (optional if source_files_set provided) @@ -202,27 +202,29 @@ def _generate_metadata_for_folder(self, dest_folder: str, source_folder: str, """ if not os.path.exists(dest_folder): return - + # Find all existing files in destination existing_files = [] for f in os.listdir(dest_folder): if f.endswith(self.SUPPORTED_EXTENSIONS): existing_files.append(os.path.join(dest_folder, f)) - + # Build source files set if not provided if source_files_set is None: + if source_folder is None: + raise ValueError("Either source_folder or source_files_set must be provided.") if not os.path.exists(source_folder): return source_files_set = set() for f in os.listdir(source_folder): if f.endswith(self.SUPPORTED_EXTENSIONS): source_files_set.add(self._normalize_filename(f)) - + # Generate metadata for files that exist and would be skipped for dest_file in existing_files: dest_basename = os.path.basename(dest_file) dest_normalized = self._normalize_filename(dest_basename) - + # If this file would be skipped (exists and source has it) if dest_normalized in source_files_set: # Check if metadata already exists @@ -295,7 +297,7 @@ def generate_metadata_for_existing_files(self): """Generate metadata for files that already exist in destination folder.""" if self.dest_folder is None or not os.path.exists(self.dest_folder): return - + # Use the helper method from base class self._generate_metadata_for_folder( dest_folder=self.dest_folder, @@ -308,10 +310,10 @@ def process(self, desc: str | None = None): dest_meta_path = Path(self.dest_folder) / "meta.json" if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) - + # Generate metadata for files that already exist and will be skipped self.generate_metadata_for_existing_files() - + super().process(desc) if self.dest_folder is not None: os.makedirs(self.dest_folder, exist_ok=True) @@ -448,12 +450,12 @@ def generate_metadata_for_existing_files(self): """Generate metadata for files that already exist in destination folder.""" if self.dest_folder is None: return - + # Check image and label folders using the helper method for subfolder in ['image', 'label']: dest_subfolder = os.path.join(self.dest_folder, subfolder) source_subfolder = os.path.join(self.source_folder, subfolder) - + if os.path.exists(dest_subfolder) and os.path.exists(source_subfolder): self._generate_metadata_for_folder( dest_folder=dest_subfolder, @@ -466,10 +468,10 @@ def process(self, desc: str | None = None): dest_meta_path = Path(self.dest_folder) / "meta.json" if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) - + # Generate metadata for files that already exist and will be skipped self.generate_metadata_for_existing_files() - + super().process(desc) if self.dest_folder is not None: self.save_meta(Path(self.dest_folder) / "meta.json") diff --git a/itkit/process/itk_convert_format.py b/itkit/process/itk_convert_format.py index 743c263..ac13c3a 100644 --- a/itkit/process/itk_convert_format.py +++ b/itkit/process/itk_convert_format.py @@ -242,7 +242,7 @@ def process(self, desc: str | None = None): dest_meta_path = Path(self.dest_folder) / "meta.json" if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) - + # Generate metadata for files that already exist and will be skipped self._generate_metadata_for_existing_files() @@ -279,7 +279,7 @@ def _generate_metadata_for_existing_files(self): for subfolder in ['image', 'label']: dest_subfolder = os.path.join(self.dest_folder, subfolder) source_subfolder = os.path.join(self.source_folder, subfolder) - + if os.path.exists(dest_subfolder) and os.path.exists(source_subfolder): self._generate_metadata_for_folder( dest_folder=dest_subfolder, diff --git a/itkit/process/itk_convert_monai.py b/itkit/process/itk_convert_monai.py index 0b3c9d3..dd9ca26 100644 --- a/itkit/process/itk_convert_monai.py +++ b/itkit/process/itk_convert_monai.py @@ -310,7 +310,7 @@ def process(self, desc: str | None = None): dest_meta_path = Path(self.dest_folder) / "meta.json" if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) - + # Generate metadata for files that already exist and will be skipped self._generate_metadata_for_existing_files() @@ -351,7 +351,7 @@ def _generate_metadata_for_existing_files(self): """Generate metadata for files that already exist in destination folder.""" if not os.path.exists(self.dest_folder): return - + # Build source file set from image and label folders source_files_set = set() for subfolder in ['image', 'label']: @@ -360,7 +360,7 @@ def _generate_metadata_for_existing_files(self): for f in os.listdir(source_subfolder): if f.endswith(self.SUPPORTED_EXTENSIONS): source_files_set.add(self._normalize_filename(f)) - + # Use the helper method from base class with pre-computed source files set self._generate_metadata_for_folder( dest_folder=self.dest_folder, diff --git a/itkit/process/itk_convert_torchio.py b/itkit/process/itk_convert_torchio.py index cef28a5..a7cbd01 100644 --- a/itkit/process/itk_convert_torchio.py +++ b/itkit/process/itk_convert_torchio.py @@ -204,7 +204,7 @@ def process(self, desc: str | None = None): dest_meta_path = Path(self.dest_folder) / "meta.json" if dest_meta_path.exists(): self.meta_manager.load_and_merge(dest_meta_path, allow_and_overwrite_existed=False) - + # Generate metadata for files that already exist and will be skipped self._generate_metadata_for_existing_files() @@ -237,7 +237,7 @@ def _generate_metadata_for_existing_files(self): """Generate metadata for files that already exist in destination folder.""" if not os.path.exists(self.dest_folder): return - + # Build source file set from image and label folders source_files_set = set() for subfolder in ['image', 'label']: @@ -246,7 +246,7 @@ def _generate_metadata_for_existing_files(self): for f in os.listdir(source_subfolder): if f.endswith(self.SUPPORTED_EXTENSIONS): source_files_set.add(self._normalize_filename(f)) - + # Use the helper method from base class with pre-computed source files set self._generate_metadata_for_folder( dest_folder=self.dest_folder, diff --git a/itkit/process/metadata_models.py b/itkit/process/metadata_models.py index 2bbfab7..e116056 100644 --- a/itkit/process/metadata_models.py +++ b/itkit/process/metadata_models.py @@ -118,7 +118,7 @@ def update(self, image_meta:SeriesMetadata, allow_and_overwrite_existed:bool=Tru def load_and_merge(self, meta_file_path: str | Path, allow_and_overwrite_existed: bool = False): """Load metadata from a file and merge it with existing metadata. - + Args: meta_file_path: Path to the metadata file to load allow_and_overwrite_existed: If True, overwrite existing metadata with loaded data. @@ -126,7 +126,7 @@ def load_and_merge(self, meta_file_path: str | Path, allow_and_overwrite_existed """ if not Path(meta_file_path).exists(): return - + try: data = json.loads(Path(meta_file_path).read_text()) for name, meta_dict in data.items(): diff --git a/tests/test_itk_extract.py b/tests/test_itk_extract.py index 4929083..5e8d21f 100644 --- a/tests/test_itk_extract.py +++ b/tests/test_itk_extract.py @@ -370,39 +370,39 @@ def test_metadata_preservation_on_skip(self, temp_dir): source_folder = os.path.join(temp_dir, "source") dest_folder = os.path.join(temp_dir, "dest") os.makedirs(source_folder) - + # Create source files with labels to extract label_mapping = {1: 0, 2: 1, 3: 2} for i in range(3): image = create_sample_image([0, 1, 2, 3], shape=(10, 10, 10)) sitk.WriteImage(image, os.path.join(source_folder, f"test{i}.mha")) - + # First pass: Process all files processor1 = ExtractProcessor(source_folder, dest_folder, label_mapping) processor1.process() - + # Check metadata from first pass from itkit.process.metadata_models import MetadataManager first_meta_path = os.path.join(dest_folder, 'meta.json') assert os.path.exists(first_meta_path), "meta.json should be created after first pass" - + first_manager = MetadataManager(meta_file_path=first_meta_path) first_metadata_count = len(first_manager.meta) first_files = set(first_manager.meta.keys()) - + # Second pass: Process again (all files should be skipped) processor2 = ExtractProcessor(source_folder, dest_folder, label_mapping) processor2.process() - + # Check that metadata is preserved after second pass second_manager = MetadataManager(meta_file_path=first_meta_path) second_metadata_count = len(second_manager.meta) second_files = set(second_manager.meta.keys()) - + # Verify metadata count is the same assert second_metadata_count == first_metadata_count, \ f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" - + # Verify all files from first pass are still in metadata assert first_files == second_files, \ f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" diff --git a/tests/test_itk_orient.py b/tests/test_itk_orient.py index 73701cb..b561111 100644 --- a/tests/test_itk_orient.py +++ b/tests/test_itk_orient.py @@ -276,37 +276,37 @@ def test_metadata_preservation_on_skip(self): src_dir = os.path.join(tmpdir, 'src') dst_dir = os.path.join(tmpdir, 'dst') os.makedirs(src_dir) - + # Create source files for i in range(3): create_test_image(os.path.join(src_dir, f'test{i}.mha')) - + # First pass: Process all files processor1 = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) processor1.process() - + # Save metadata from first pass first_meta_path = os.path.join(dst_dir, 'meta.json') assert os.path.exists(first_meta_path), "meta.json should be created after first pass" - + from itkit.process.metadata_models import MetadataManager first_manager = MetadataManager(meta_file_path=first_meta_path) first_metadata_count = len(first_manager.meta) first_files = set(first_manager.meta.keys()) - + # Second pass: Process again (all files should be skipped) processor2 = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) processor2.process() - + # Check that metadata is preserved after second pass second_manager = MetadataManager(meta_file_path=first_meta_path) second_metadata_count = len(second_manager.meta) second_files = set(second_manager.meta.keys()) - + # Verify metadata count is the same assert second_metadata_count == first_metadata_count, \ f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" - + # Verify all files from first pass are still in metadata assert first_files == second_files, \ f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" @@ -318,15 +318,15 @@ def test_partial_processing_metadata_preservation(self): dst_dir = os.path.join(tmpdir, 'dst') os.makedirs(src_dir) os.makedirs(dst_dir) - + # Create 5 source files for i in range(5): create_test_image(os.path.join(src_dir, f'test{i}.mha')) - + # Pre-create first 2 files in destination for i in range(2): create_test_image(os.path.join(dst_dir, f'test{i}.mha')) - + # Create initial metadata for first 2 files from itkit.process.metadata_models import MetadataManager, SeriesMetadata initial_manager = MetadataManager() @@ -335,16 +335,16 @@ def test_partial_processing_metadata_preservation(self): meta = SeriesMetadata.from_sitk_image(img, f'test{i}.mha') initial_manager.update(meta) initial_manager.save(os.path.join(dst_dir, 'meta.json')) - + # Process all files (first 2 should be skipped) processor = OrientProcessor(src_dir, dst_dir, 'LPI', field='image', mp=False) processor.process() - + # Check final metadata final_manager = MetadataManager(meta_file_path=os.path.join(dst_dir, 'meta.json')) final_files = set(final_manager.meta.keys()) expected_files = {f'test{i}.mha' for i in range(5)} - + # Verify all files are in final metadata assert final_files == expected_files, \ f"All files should be in final metadata: missing={expected_files - final_files}, extra={final_files - expected_files}" @@ -354,44 +354,44 @@ def test_dataset_metadata_preservation_on_skip(self): with tempfile.TemporaryDirectory() as tmpdir: src_dir = os.path.join(tmpdir, 'src') dst_dir = os.path.join(tmpdir, 'dst') - + # Create source structure img_dir = os.path.join(src_dir, 'image') lbl_dir = os.path.join(src_dir, 'label') os.makedirs(img_dir) os.makedirs(lbl_dir) - + # Create matching image and label files for i in range(3): create_test_image(os.path.join(img_dir, f'case{i:02d}.mha')) create_test_image(os.path.join(lbl_dir, f'case{i:02d}.mha')) - + # First pass: Process all files processor1 = DatasetOrientProcessor(src_dir, dst_dir, 'LPI', mp=False) processor1.process() - + # Check metadata from first pass first_meta_path = os.path.join(dst_dir, 'meta.json') assert os.path.exists(first_meta_path), "meta.json should be created after first pass" - + from itkit.process.metadata_models import MetadataManager first_manager = MetadataManager(meta_file_path=first_meta_path) first_metadata_count = len(first_manager.meta) first_files = set(first_manager.meta.keys()) - + # Second pass: Process again (all files should be skipped) processor2 = DatasetOrientProcessor(src_dir, dst_dir, 'LPI', mp=False) processor2.process() - + # Check that metadata is preserved after second pass second_manager = MetadataManager(meta_file_path=first_meta_path) second_metadata_count = len(second_manager.meta) second_files = set(second_manager.meta.keys()) - + # Verify metadata count is the same assert second_metadata_count == first_metadata_count, \ f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" - + # Verify all files from first pass are still in metadata assert first_files == second_files, \ f"Files in metadata should be the same: missing={first_files - second_files}, extra={second_files - first_files}" diff --git a/tests/test_itk_resample.py b/tests/test_itk_resample.py index 80ba183..a1acce8 100644 --- a/tests/test_itk_resample.py +++ b/tests/test_itk_resample.py @@ -203,15 +203,15 @@ def test_full_io_processing_label_field(self, shared_temp_data, tmp_path): def test_metadata_preservation_on_skip(self, shared_temp_data, tmp_path): """Test that metadata is preserved when files are skipped due to already existing. - + This tests the fix for the issue where skipped files disappear from meta.json. """ dest_folder = tmp_path / "dst" dest_folder.mkdir() - + # Randomly generate target spacing target_spacing = [np.random.uniform(0.5, 3.0) for _ in range(3)] - + # First pass: Process all files processor1 = itk_resample.SingleResampleProcessor( source_folder=str(shared_temp_data / "image"), @@ -221,17 +221,17 @@ def test_metadata_preservation_on_skip(self, shared_temp_data, tmp_path): field="image" ) processor1.process() - + # Save metadata from first pass first_meta_path = dest_folder / "meta.json" processor1.save_meta(first_meta_path) first_manager = MetadataManager(meta_file_path=first_meta_path) first_metadata_count = len(first_manager.meta) - + # Get list of files processed in first pass first_pass_files = set(first_manager.meta.keys()) assert first_metadata_count > 0, "First pass should process some files" - + # Second pass: Process again (all files should be skipped) processor2 = itk_resample.SingleResampleProcessor( source_folder=str(shared_temp_data / "image"), @@ -241,21 +241,21 @@ def test_metadata_preservation_on_skip(self, shared_temp_data, tmp_path): field="image" ) processor2.process() - + # Check that metadata is preserved after second pass second_meta_path = dest_folder / "meta.json" second_manager = MetadataManager(meta_file_path=second_meta_path) second_metadata_count = len(second_manager.meta) - + # Verify metadata count is the same assert second_metadata_count == first_metadata_count, \ f"Metadata count should be preserved: first={first_metadata_count}, second={second_metadata_count}" - + # Verify all files from first pass are still in metadata second_pass_files = set(second_manager.meta.keys()) assert first_pass_files == second_pass_files, \ f"Files in metadata should be the same: missing={first_pass_files - second_pass_files}, extra={second_pass_files - first_pass_files}" - + # Verify metadata values are the same for name in first_pass_files: assert first_manager.meta[name] == second_manager.meta[name], \ @@ -263,19 +263,19 @@ def test_metadata_preservation_on_skip(self, shared_temp_data, tmp_path): def test_partial_processing_metadata_preservation(self, shared_temp_data, tmp_path): """Test metadata preservation when only some files are processed. - + Simulates the scenario where some files already exist and are skipped. """ dest_folder = tmp_path / "dst" dest_folder.mkdir() - + # Get source files source_files = list((shared_temp_data / "image").glob("*.mha")) assert len(source_files) >= 3, "Need at least 3 source files for this test" - + # Randomly generate target spacing target_spacing = [np.random.uniform(0.5, 3.0) for _ in range(3)] - + # First pass: Process only the first 2 files manually first_batch_files = source_files[:2] for src_file in first_batch_files: @@ -283,7 +283,7 @@ def test_partial_processing_metadata_preservation(self, shared_temp_data, tmp_pa # Simple processing without actual resampling for speed dest_file = dest_folder / src_file.name sitk.WriteImage(img, str(dest_file), useCompression=True) - + # Create metadata for first batch first_manager = MetadataManager() for src_file in first_batch_files: @@ -292,9 +292,9 @@ def test_partial_processing_metadata_preservation(self, shared_temp_data, tmp_pa meta = itk_resample.SeriesMetadata.from_sitk_image(img, dest_file.name) first_manager.update(meta) first_manager.save(dest_folder / "meta.json") - + first_file_names = {f.name for f in first_batch_files} - + # Second pass: Process all files (first 2 should be skipped) processor = itk_resample.SingleResampleProcessor( source_folder=str(shared_temp_data / "image"), @@ -304,15 +304,15 @@ def test_partial_processing_metadata_preservation(self, shared_temp_data, tmp_pa field="image" ) processor.process() - + # Check final metadata final_manager = MetadataManager(meta_file_path=dest_folder / "meta.json") final_file_names = set(final_manager.meta.keys()) - + # Verify first batch files are still in metadata assert first_file_names.issubset(final_file_names), \ f"First batch files should be preserved in metadata: missing={first_file_names - final_file_names}" - + # Verify all source files are in final metadata source_file_names = {f.name for f in source_files} assert source_file_names == final_file_names, \ From 3e03f1328e0460a17400e747ce8777585f82933c Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Fri, 9 Jan 2026 22:23:17 +0800 Subject: [PATCH 7/8] Remove `aditional_dependencies` from `pyright` pre-commit hook. --- .pre-commit-config.yaml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 647dee8..ac32855 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,15 +50,12 @@ repos: hooks: - id: pyright args: ["--threads", "24"] - additional_dependencies: [ - colorama, - tqdm, - SimpleITK, - numpy, - onedl-mmsegmentation, - deprecated, - psutil, - tabulate, - torchio, - ] + # additional_dependencies: [ + # colorama, + # tqdm, + # numpy, + # deprecated, + # psutil, + # tabulate, + # ] pass_filenames: false From c9666a8ea7ff2efde9d93d5fb4f1ccd43b584aec Mon Sep 17 00:00:00 2001 From: mgam <312065559@qq.com> Date: Fri, 9 Jan 2026 22:48:04 +0800 Subject: [PATCH 8/8] [doc] improve doc structure --- .pre-commit-config.yaml | 8 - docs/contributing.md | 2 +- docs/faq.md | 388 --------------------------------------- docs/index.md | 19 +- docs/itk_aug.md | 37 ++++ docs/itk_check.md | 43 +++++ docs/itk_convert.md | 116 ++++++++++++ docs/itk_extract.md | 36 ++++ docs/itk_orient.md | 38 ++++ docs/itk_patch.md | 43 +++++ docs/itk_resample.md | 48 +++++ docs/preprocessing.md | 390 +--------------------------------------- mkdocs.yml | 12 +- 13 files changed, 385 insertions(+), 795 deletions(-) delete mode 100644 docs/faq.md create mode 100644 docs/itk_aug.md create mode 100644 docs/itk_check.md create mode 100644 docs/itk_convert.md create mode 100644 docs/itk_extract.md create mode 100644 docs/itk_orient.md create mode 100644 docs/itk_patch.md create mode 100644 docs/itk_resample.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ac32855..6284a5f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,12 +50,4 @@ repos: hooks: - id: pyright args: ["--threads", "24"] - # additional_dependencies: [ - # colorama, - # tqdm, - # numpy, - # deprecated, - # psutil, - # tabulate, - # ] pass_filenames: false diff --git a/docs/contributing.md b/docs/contributing.md index 4f6b83f..1eb5c0a 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -245,7 +245,7 @@ To add a new preprocessing command: 1. Create script in `itkit/process/` 2. Add entry point in `pyproject.toml` -3. Document in `docs/preprocessing.md` +3. Create documentation in `docs/itk_*.md` and update `mkdocs.yml` 4. Add tests in `tests/` 5. Support common flags (`--mp`, `--help`) diff --git a/docs/faq.md b/docs/faq.md deleted file mode 100644 index f0aaa86..0000000 --- a/docs/faq.md +++ /dev/null @@ -1,388 +0,0 @@ -# FAQ and Troubleshooting - -Frequently asked questions and common issues with ITKIT. - -## Installation Issues - -### Q: I get import errors after installation - -**A:** Try reinstalling ITKIT with force-reinstall: - -```bash -pip install itkit --force-reinstall -``` - -If the issue persists, check that all dependencies are installed: - -```bash -pip install -r requirements.txt -``` - -### Q: SimpleITK installation fails - -**A:** SimpleITK requires specific system libraries. Try: - -```bash -# Update pip first -pip install --upgrade pip - -# Install SimpleITK -pip install SimpleITK -``` - -On Linux, you may need system packages: - -```bash -sudo apt-get install python3-dev -``` - -### Q: PyQt6 GUI won't start - -**A:** Ensure you installed the GUI dependencies: - -```bash -pip install "itkit[gui]" -``` - -If running on a server without display: - -```bash -export QT_QPA_PLATFORM=offscreen -itkit-app -``` - -### Q: Version conflicts with other packages - -**A:** Use a virtual environment to isolate ITKIT: - -```bash -python -m venv itkit_env -source itkit_env/bin/activate # On Windows: itkit_env\Scripts\activate -pip install itkit -``` - -## Usage Issues - -### Q: itk_check reports mismatched spacing but images look correct - -**A:** Medical images can have very small spacing differences due to floating-point precision. Use tolerance in your checks: - -```bash -# Instead of exact values, use ranges -itk_check check /data --min-spacing 0.9 0.9 0.9 --max-spacing 1.1 1.1 1.1 -``` - -### Q: itk_resample produces incorrect output - -**A:** Check: - -1. **Coordinate order:** ITKIT uses Z, Y, X order -2. **Field type:** Use `dataset` for both images and labels, or specify `image`/`label` appropriately -3. **Spacing values:** Ensure they're in millimeters - -Correct usage: - -```bash -itk_resample dataset /src /dst --spacing 1.0 1.0 1.0 # Z Y X order -``` - -### Q: Patches extracted with itk_patch are all background - -**A:** Adjust the foreground ratio threshold: - -```bash -itk_patch /src /dst \ - --patch-size 96 96 96 \ - --patch-stride 48 48 48 \ - --minimum-foreground-ratio 0.01 # Lower threshold -``` - -Or keep some empty patches: - -```bash -itk_patch /src /dst \ - --patch-size 96 96 96 \ - --patch-stride 48 48 48 \ - --keep-empty-label-prob 0.2 -``` - -### Q: Multiprocessing (--mp) doesn't speed things up - -**A:** Multiprocessing overhead can exceed benefits for small datasets. Use it only when: - -- Dataset has many samples (>50) -- Individual files are large -- I/O is not the bottleneck - -Control number of workers: - -```bash -itk_resample dataset /src /dst --spacing 1.0 1.0 1.0 --mp --workers 4 -``` - -### Q: GUI DPI is too small/large - -**A:** Set the Qt scale factor: - -```bash -# Double size -QT_SCALE_FACTOR=2 itkit-app - -# Half size -QT_SCALE_FACTOR=0.5 itkit-app -``` - -## Dataset Issues - -### Q: My dataset structure doesn't match ITKIT format - -**A:** You need to reorganize your data. ITKIT requires: - -```plaintext -dataset/ -├── image/ -│ └── files -└── label/ - └── files -``` - -Use symbolic links if you don't want to copy: - -```bash -mkdir -p dataset/image dataset/label -ln -s /original/images/* dataset/image/ -ln -s /original/labels/* dataset/label/ -``` - -Or use `itk_check` in symlink mode: - -```bash -itk_check symlink /original/mixed --output /dataset/organized -``` - -### Q: Image and label have different sizes - -**A:** This indicates preprocessing issues. Ensure: - -1. Labels were created from the same source images -2. Both underwent the same preprocessing -3. Both have matching metadata - -To fix, resample both to the same space: - -```bash -itk_resample dataset /src /dst --spacing 1.0 1.0 1.0 -``` - -### Q: Conversion to MONAI/TorchIO format fails - -**A:** Verify: - -1. Input follows ITKIT format (image/ and label/ folders) -2. File names match between image/ and label/ -3. You have write permissions in output directory - -Debug by converting a single sample manually: - -```python -from itkit.io import sitk_toolkit -import SimpleITK as sitk - -image = sitk.ReadImage("dataset/image/case001.mha") -sitk.WriteImage(image, "test_output.nii.gz") -``` - -## Framework Integration Issues - -### Q: OpenMMLab imports fail - -**A:** Install the OneDL redistributions: - -```bash -pip install "itkit[advanced]" -``` - -### Q: MMEngine experiments won't start - -**A:** Check that required variables are set: - -```python -# In your experiment script -mm_workdir = "/path/to/workdir" -mm_testdir = "/path/to/testdir" -mm_configdir = "/path/to/configs" -``` - -And verify config directory structure: - -```plaintext -configs/ -└── 0.1.MyExperiment/ - ├── mgam.py - └── model.py -``` - -### Q: MONAI transforms not working with ITKIT datasets - -**A:** Ensure you're using MONAI-compatible dataset class: - -```python -from itkit.dataset import MONAI_PatchedDataset # Not ITKITBaseSegDataset - -dataset = MONAI_PatchedDataset( - root_dir="/data/patches", - transform=monai_transforms -) -``` - -### Q: PyTorch Lightning trainer fails - -**A:** Install MONAI (required for Lightning extensions): - -```bash -pip install --no-deps monai -``` - -## Performance Issues - -### Q: Processing is very slow - -**A:** Try these optimizations: - -1. **Use multiprocessing:** - - ```bash - itk_resample dataset /src /dst --spacing 1.0 1.0 1.0 --mp --workers 8 - ``` - -2. **Use faster file formats:** - - `.mha` is faster than `.nii.gz` (no compression overhead) - - Avoid `.mhd` with large `.raw` files on network storage - -3. **Reduce I/O:** - - Work on local disk, not network storage - - Use SSD instead of HDD - -4. **Batch operations:** - - Process entire directories instead of individual files - -### Q: Out of memory errors - -**A:** Solutions: - -1. **Extract patches instead of loading full volumes:** - - ```bash - itk_patch /data /patches --patch-size 96 96 96 - ``` - -2. **Reduce batch size in training** - -3. **Use gradient checkpointing** in model training - -4. **Process files sequentially** (don't use --mp) - -## Model Training Issues - -### Q: Model training crashes with CUDA out of memory - -**A:** Reduce memory usage: - -```python -# Smaller batch size -batch_size = 1 - -# Smaller patch size -patch_size = (64, 64, 64) # Instead of (128, 128, 128) - -# Mixed precision training -use_amp = True - -# Gradient checkpointing -model.enable_gradient_checkpointing() -``` - -### Q: Validation metrics are NaN or inf - -**A:** Check: - -1. **Label values:** Should be integers 0, 1, 2, ... (not one-hot) -2. **Normalization:** Images should be normalized appropriately -3. **Loss function:** Ensure it matches your task -4. **Learning rate:** May be too high - -### Q: Model converges but predictions are all background - -**A:** This indicates class imbalance. Solutions: - -1. **Use weighted loss:** - - ```python - loss = FocalLoss(alpha=0.25, gamma=2.0) - ``` - -2. **Filter patches during extraction:** - - ```bash - itk_patch /data /patches \ - --minimum-foreground-ratio 0.1 \ - --keep-empty-label-prob 0.1 - ``` - -3. **Adjust class weights** in loss function - -## File Format Issues - -### Q: Cannot read .dcm files - -**A:** DICOM files need special handling: - -```python -from itkit.io import dcm_toolkit - -# Read DICOM series (not individual files) -image = dcm_toolkit.read_dicom_series("/path/to/dicom/folder") -sitk.WriteImage(image, "output.mha") -``` - -### Q: .nii.gz files are huge - -**A:** NIfTI compression varies. To reduce size: - -```bash -# Convert to .mha (often smaller) -itk_convert format mha /data/nifti /data/mha - -# Or use higher compression -import gzip -# Compress with maximum compression level -``` - -### Q: File extensions don't match content - -**A:** Use ITKIT's conversion to standardize: - -```bash -itk_convert format mha /data/mixed /data/standardized -``` - -## Getting More Help - -If your issue isn't covered here: - -1. **Check documentation:** - - [Installation Guide](installation.md) - - [Quick Start](quickstart.md) - - [Preprocessing Guide](preprocessing.md) - -2. **Search existing issues:** - - [GitHub Issues](https://github.com/MGAMZ/ITKIT/issues) - -3. **Ask for help:** - - Open a new issue with detailed description - - Include error messages and minimal reproducible example - - Contact: [312065559@qq.com](mailto:312065559@qq.com) - -4. **Report bugs:** - - Follow the [Contributing Guide](contributing.md) - - Provide system information (OS, Python version, ITKIT version) diff --git a/docs/index.md b/docs/index.md index 39da04d..26819f7 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,15 +12,16 @@ Welcome to the ITKIT documentation! ITKIT is a user-friendly toolkit built on `S - **[Quick Start](quickstart.md)** - Get started with basic usage and examples - **[Dataset Structure](dataset_structure.md)** - Understand the required dataset format -### Core Features +### Processing Tools -- **[Preprocessing Tools](preprocessing.md)** - Comprehensive guide to ITK preprocessing commands - - Image checking and validation - - Resampling and orientation - - Patch extraction - - Data augmentation - - Label extraction - - Format conversion +- **[Overview](preprocessing.md)** - General notes and best practices +- **[itk_check](itk_check.md)** - Image checking and validation +- **[itk_resample](itk_resample.md)** - Resampling to target spacing/size +- **[itk_orient](itk_orient.md)** - Image re-orientation +- **[itk_patch](itk_patch.md)** - Patch extraction +- **[itk_aug](itk_aug.md)** - Data augmentation +- **[itk_extract](itk_extract.md)** - Label extraction +- **[itk_convert](itk_convert.md)** - Format conversion ### Advanced Topics @@ -75,7 +76,7 @@ Welcome to the ITKIT documentation! ITKIT is a user-friendly toolkit built on `S **Ready to preprocess data?** -- See [Preprocessing Tools](preprocessing.md) for command documentation +- See [Processing Tools](preprocessing.md) for command documentation and overview. - Use the GUI: `pip install "itkit[gui]"` then `itkit-app` **Building models?** diff --git a/docs/itk_aug.md b/docs/itk_aug.md new file mode 100644 index 0000000..b19eff1 --- /dev/null +++ b/docs/itk_aug.md @@ -0,0 +1,37 @@ +# itk_aug + +Perform data augmentation on ITK image files. + +## Usage + +```bash +itk_aug [options] +``` + +## Parameters + +- `img_folder`: Folder with source image `.mha` files +- `lbl_folder`: Folder with source label `.mha` files +- `-oimg, --out-img-folder OUT_IMG`: Optional folder to save augmented images +- `-olbl, --out-lbl-folder OUT_LBL`: Optional folder to save augmented labels +- `-n, --num N`: Number of augmented samples to generate per source sample +- `--mp`: Enable multiprocessing +- `--random-rot Z Y X`: Max random rotation degrees for Z Y X axes (three ints, order Z, Y, X) + +## Notes + +- Only files present in both `img_folder` and `lbl_folder` are processed +- Augmented files are written only if corresponding output folders are provided +- Currently supports: **RandomRotate3D** + +## Examples + +```bash +# Generate 5 augmented samples per input with random rotation +itk_aug /data/images /data/labels \ + -oimg /data/aug_images \ + -olbl /data/aug_labels \ + -n 5 \ + --random-rot 15 15 15 \ + --mp +``` diff --git a/docs/itk_check.md b/docs/itk_check.md new file mode 100644 index 0000000..b0c0b8e --- /dev/null +++ b/docs/itk_check.md @@ -0,0 +1,43 @@ +# itk_check + +Check ITK image-label sample pairs to verify they meet size and spacing requirements. + +## Usage + +```bash +itk_check [options] +``` + +## Modes + +- **check**: Validate image/label pairs against size/spacing rules and report non-conforming samples (no file changes) +- **delete**: Remove image and label files for samples that fail validation +- **copy**: Copy valid image/label pairs to the specified output directory +- **symlink**: Create symbolic links for valid image/label pairs in the output directory + +## Parameters + +- `sample_folder`: Root folder containing `image/` and `label/` subfolders +- `-o, --output OUT`: Output directory (required for `copy` and `symlink` modes) +- `--min-size Z Y X`: Minimum size per dimension (three integers; -1 = ignore) +- `--max-size Z Y X`: Maximum size per dimension (three integers; -1 = ignore) +- `--min-spacing Z Y X`: Minimum spacing per dimension (three floats; -1 = ignore) +- `--max-spacing Z Y X`: Maximum spacing per dimension (three floats; -1 = ignore) +- `--same-spacing A B`: Two dimensions (X|Y|Z) that must have equal spacing +- `--same-size A B`: Two dimensions (X|Y|Z) that must have equal size +- `--mp`: Enable multiprocessing + +## Examples + +```bash +# Check dataset without modifications +itk_check check /data/dataset --min-size 32 32 32 + +# Copy valid samples to new location +itk_check copy /data/dataset --output /data/valid_dataset \ + --min-spacing 0.5 0.5 0.5 \ + --max-spacing 2.0 2.0 2.0 + +# Check that X and Y spacing are equal +itk_check check /data/dataset --same-spacing X Y +``` diff --git a/docs/itk_convert.md b/docs/itk_convert.md new file mode 100644 index 0000000..579cb07 --- /dev/null +++ b/docs/itk_convert.md @@ -0,0 +1,116 @@ +# itk_convert + +Convert ITKIT datasets between different formats and frameworks. + +## Subcommands + +- **format**: Convert medical image file formats +- **monai**: Convert to MONAI Decathlon format +- **torchio**: Convert to TorchIO format + +## Format Conversion + +Convert medical image files between different formats while preserving metadata. + +### Usage + +```bash +itk_convert format [options] +``` + +### Supported Formats + +- `mha`: MetaImage (single file) +- `mhd`: MetaImage Header (with separate .raw file) +- `nii.gz`: Compressed NIfTI +- `nii`: NIfTI (uncompressed) +- `nrrd`: Nearly Raw Raster Data + +### Parameters + +- `target_format`: Target file format +- `source_folder`: Path to ITKIT dataset +- `dest_folder`: Path to output dataset +- `--mp`: Enable multiprocessing +- `--workers N`: Number of worker processes + +### Examples + +```bash +# Convert MHA to compressed NIfTI +itk_convert format nii.gz /data/mha_dataset /data/nifti_dataset + +# Convert to NRRD with multiprocessing +itk_convert format nrrd /data/input /data/output --mp --workers 8 + +# Convert MHD to MHA +itk_convert format mha /data/mhd_dataset /data/mha_dataset +``` + +## MONAI Conversion + +Convert ITKIT dataset to MONAI Decathlon format. + +### Usage + +```bash +itk_convert monai [options] +``` + +### Parameters + +- `source_folder`: Path to ITKIT dataset +- `dest_folder`: Path to output dataset in MONAI format +- `--name`: Dataset name for the manifest file (default: `ITKITDataset`) +- `--description`: Dataset description for the manifest file +- `--modality`: Primary imaging modality (default: `CT`) +- `--split`: Which split to treat the data as: `train` | `val` | `test` | `all` (default: `train`) +- `--labels`: Label names in order (e.g., `background liver tumor`). Index 0 is background +- `--mp`: Enable multiprocessing +- `--workers N`: Number of worker processes + +### Output + +- Converted files in `.nii.gz` format +- `dataset.json` manifest file +- `meta.json` ITKIT-style metadata + +### Examples + +```bash +itk_convert monai /data/itkit_dataset /data/monai_dataset \ + --name MyDataset \ + --modality CT \ + --labels background liver tumor \ + --mp +``` + +## TorchIO Conversion + +Convert ITKIT dataset to TorchIO format. + +### Usage + +```bash +itk_convert torchio [options] +``` + +### Parameters + +- `source_folder`: Path to ITKIT dataset +- `dest_folder`: Path to output dataset in TorchIO format +- `--no-csv`: Skip creating `subjects.csv` manifest file +- `--mp`: Enable multiprocessing +- `--workers N`: Number of worker processes + +### Output + +- Converted files in `.nii.gz` format +- `subjects.csv` manifest file (unless `--no-csv` is specified) +- `meta.json` ITKIT-style metadata + +### Examples + +```bash +itk_convert torchio /data/itkit_dataset /data/torchio_dataset --mp +``` diff --git a/docs/itk_extract.md b/docs/itk_extract.md new file mode 100644 index 0000000..5fb1eee --- /dev/null +++ b/docs/itk_extract.md @@ -0,0 +1,36 @@ +# itk_extract + +Extract specified classes from ITK semantic segmentation maps. + +## Usage + +```bash +itk_extract [options] +``` + +## Parameters + +- `source_folder`: Folder containing source images +- `dest_folder`: Destination folder to save extracted label files (created if missing) +- `mappings`: One or more label mappings in `"source:target"` format (e.g., `"1:0"` `"5:1"`) +- `-r, --recursive`: Recursively process subdirectories and preserve relative paths +- `--mp`: Enable multiprocessing +- `--workers N`: Number of worker processes for multiprocessing + +## Output + +- Remapped label files written to `dest_folder` (extensions normalized to `.mha`) +- `extract_meta.json`: Per-sample metadata +- `extract_configs.json`: Configuration used + +## Examples + +```bash +# Extract liver (label 1) and tumor (label 5), renumber to 0 and 1 +itk_extract /data/labels /data/extracted "1:0" "5:1" --mp + +# Extract specific organs from multi-organ segmentation +itk_extract /data/multi_organ /data/liver_kidney \ + "1:1" "2:2" \ + --recursive --mp +``` diff --git a/docs/itk_orient.md b/docs/itk_orient.md new file mode 100644 index 0000000..dabb9c4 --- /dev/null +++ b/docs/itk_orient.md @@ -0,0 +1,38 @@ +# itk_orient + +Orient ITK image-label sample pairs to a specified orientation. + +## Usage + +```bash +itk_orient [options] +``` + +## Parameters + +- `src_dir`: Source directory containing `.mha` files (recursive scan) +- `dst_dir`: Destination directory (preserves relative directory structure; must differ from `src_dir`) +- `orient`: Target orientation string for SimpleITK.DICOMOrient (e.g., `LPI`, `RAS`) +- `--mp`: Use multiprocessing to convert files in parallel + +## Common Orientations + +- **LPI**: Left-Posterior-Inferior (common in medical imaging) +- **RAS**: Right-Anterior-Superior (neuroimaging standard) +- **LPS**: Left-Posterior-Superior + +## Notes + +- Skips files already present in `dst_dir` +- Preserves folder layout +- Writes converted `.mha` files to `dst_dir` + +## Examples + +```bash +# Orient to LPI +itk_orient /data/source /data/oriented LPI --mp + +# Orient to RAS (neuroimaging standard) +itk_orient /data/source /data/ras_oriented RAS +``` diff --git a/docs/itk_patch.md b/docs/itk_patch.md new file mode 100644 index 0000000..9dba34c --- /dev/null +++ b/docs/itk_patch.md @@ -0,0 +1,43 @@ +# itk_patch + +Extract patches from ITK image-label sample pairs for training. + +## Usage + +```bash +itk_patch --patch-size SIZE --patch-stride STRIDE [options] +``` + +## Parameters + +- `src_folder`: Source root containing `image/` and `label/` subfolders +- `dst_folder`: Destination root to save patches +- `--patch-size`: Patch size as single int or three ints (Z Y X) +- `--patch-stride`: Patch stride as single int or three ints (Z Y X) +- `--minimum-foreground-ratio`: Minimum label foreground ratio to keep a patch (float, default 0.0) +- `--keep-empty-label-prob`: Probability to keep patches with only background (0.0-1.0) +- `--still-save-when-no-label`: If set and label missing, save patches regardless +- `--mp`: Use multiprocessing to process cases in parallel + +## Output + +- Patches saved under `dst_folder//` with image and label patch files +- `crop_meta.json`: Summary of extraction and available annotations + +## Examples + +```bash +# Extract 96x96x96 patches with 48-voxel stride +itk_patch /data/dataset /data/patches \ + --patch-size 96 96 96 \ + --patch-stride 48 48 48 \ + --mp + +# Extract patches with foreground filtering +itk_patch /data/dataset /data/patches \ + --patch-size 128 128 128 \ + --patch-stride 64 64 64 \ + --minimum-foreground-ratio 0.1 \ + --keep-empty-label-prob 0.2 \ + --mp +``` diff --git a/docs/itk_resample.md b/docs/itk_resample.md new file mode 100644 index 0000000..af2b068 --- /dev/null +++ b/docs/itk_resample.md @@ -0,0 +1,48 @@ +# itk_resample + +Resample ITK image-label sample pairs to a target spacing or size. + +## Usage + +```bash +itk_resample [options] +``` + +## Field Types + +- **image**: For image data (uses linear interpolation, preserves data type) +- **label**: For label/segmentation data (uses nearest neighbor interpolation) +- **dataset**: Processes both `image/` and `label/` subfolders with appropriate settings + +## Parameters + +- `source_folder`: Folder containing source image files +- `dest_folder`: Destination folder for resampled files (created if missing) +- `--spacing Z Y X`: Target spacing per dimension (ZYX order). Use -1 to ignore a dimension +- `--size Z Y X`: Target size per dimension (ZYX order). Use -1 to ignore a dimension +- `--target-folder PATH`: Folder of reference images (mutually exclusive with `--spacing/--size`) +- `-r, --recursive`: Recursively process subdirectories, preserving layout +- `--mp`: Enable multiprocessing +- `--workers N`: Number of worker processes for multiprocessing + +## Output + +- Resampled files in `dest_folder` +- `resample_configs.json`: Configuration used for resampling +- `meta.json`: Metadata for the resampled dataset + +## Examples + +```bash +# Resample entire dataset to 1.0mm isotropic spacing +itk_resample dataset /data/source /data/resampled \ + --spacing 1.0 1.0 1.0 --mp + +# Resample only in-plane, keep Z spacing +itk_resample image /data/source /data/resampled \ + --spacing -1 0.5 0.5 + +# Resample to match reference dataset +itk_resample dataset /data/source /data/resampled \ + --target-folder /data/reference --mp +``` diff --git a/docs/preprocessing.md b/docs/preprocessing.md index d37843d..6599ca4 100644 --- a/docs/preprocessing.md +++ b/docs/preprocessing.md @@ -5,396 +5,14 @@ ITKIT provides comprehensive command-line tools for medical image preprocessing. ## General Notes - **Coordinate Order**: All dimension arguments use **Z, Y, X** order (Z→0, Y→1, X→2) -- **Help**: Use `--help` with any command to see detailed usage information -- **Multiprocessing**: Most commands support `--mp` flag for parallel processing +- **Help**: Use '--help' with any command to see detailed usage information +- **Multiprocessing**: Most commands support '--mp' flag for parallel processing - **Progress**: Commands display progress bars using tqdm -## itk_check - -Check ITK image-label sample pairs to verify they meet size and spacing requirements. - -### Usage - -```bash -itk_check [options] -``` - -### Modes - -- **check**: Validate image/label pairs against size/spacing rules and report non-conforming samples (no file changes) -- **delete**: Remove image and label files for samples that fail validation -- **copy**: Copy valid image/label pairs to the specified output directory -- **symlink**: Create symbolic links for valid image/label pairs in the output directory - -### Parameters - -- `sample_folder`: Root folder containing `image/` and `label/` subfolders -- `-o, --output OUT`: Output directory (required for `copy` and `symlink` modes) -- `--min-size Z Y X`: Minimum size per dimension (three integers; -1 = ignore) -- `--max-size Z Y X`: Maximum size per dimension (three integers; -1 = ignore) -- `--min-spacing Z Y X`: Minimum spacing per dimension (three floats; -1 = ignore) -- `--max-spacing Z Y X`: Maximum spacing per dimension (three floats; -1 = ignore) -- `--same-spacing A B`: Two dimensions (X|Y|Z) that must have equal spacing -- `--same-size A B`: Two dimensions (X|Y|Z) that must have equal size -- `--mp`: Enable multiprocessing - -### Examples - -```bash -# Check dataset without modifications -itk_check check /data/dataset --min-size 32 32 32 - -# Copy valid samples to new location -itk_check copy /data/dataset --output /data/valid_dataset \ - --min-spacing 0.5 0.5 0.5 \ - --max-spacing 2.0 2.0 2.0 - -# Check that X and Y spacing are equal -itk_check check /data/dataset --same-spacing X Y -``` - ---- - -## itk_resample - -Resample ITK image-label sample pairs to a target spacing or size. - -### Usage - -```bash -itk_resample [options] -``` - -### Field Types - -- **image**: For image data (uses linear interpolation, preserves data type) -- **label**: For label/segmentation data (uses nearest neighbor interpolation) -- **dataset**: Processes both `image/` and `label/` subfolders with appropriate settings - -### Parameters - -- `source_folder`: Folder containing source image files -- `dest_folder`: Destination folder for resampled files (created if missing) -- `--spacing Z Y X`: Target spacing per dimension (ZYX order). Use -1 to ignore a dimension -- `--size Z Y X`: Target size per dimension (ZYX order). Use -1 to ignore a dimension -- `--target-folder PATH`: Folder of reference images (mutually exclusive with `--spacing/--size`) -- `-r, --recursive`: Recursively process subdirectories, preserving layout -- `--mp`: Enable multiprocessing -- `--workers N`: Number of worker processes for multiprocessing - -### Output - -- Resampled files in `dest_folder` -- `resample_configs.json`: Configuration used for resampling -- `meta.json`: Metadata for the resampled dataset - -### Examples - -```bash -# Resample entire dataset to 1.0mm isotropic spacing -itk_resample dataset /data/source /data/resampled \ - --spacing 1.0 1.0 1.0 --mp - -# Resample only in-plane, keep Z spacing -itk_resample image /data/source /data/resampled \ - --spacing -1 0.5 0.5 - -# Resample to match reference dataset -itk_resample dataset /data/source /data/resampled \ - --target-folder /data/reference --mp -``` - ---- - -## itk_orient - -Orient ITK image-label sample pairs to a specified orientation. - -### Usage - -```bash -itk_orient [options] -``` - -### Parameters - -- `src_dir`: Source directory containing `.mha` files (recursive scan) -- `dst_dir`: Destination directory (preserves relative directory structure; must differ from `src_dir`) -- `orient`: Target orientation string for SimpleITK.DICOMOrient (e.g., `LPI`, `RAS`) -- `--mp`: Use multiprocessing to convert files in parallel - -### Common Orientations - -- **LPI**: Left-Posterior-Inferior (common in medical imaging) -- **RAS**: Right-Anterior-Superior (neuroimaging standard) -- **LPS**: Left-Posterior-Superior - -### Notes - -- Skips files already present in `dst_dir` -- Preserves folder layout -- Writes converted `.mha` files to `dst_dir` - -### Examples - -```bash -# Orient to LPI -itk_orient /data/source /data/oriented LPI --mp - -# Orient to RAS (neuroimaging standard) -itk_orient /data/source /data/ras_oriented RAS -``` - ---- - -## itk_patch - -Extract patches from ITK image-label sample pairs for training. - -### Usage - -```bash -itk_patch --patch-size SIZE --patch-stride STRIDE [options] -``` - -### Parameters - -- `src_folder`: Source root containing `image/` and `label/` subfolders -- `dst_folder`: Destination root to save patches -- `--patch-size`: Patch size as single int or three ints (Z Y X) -- `--patch-stride`: Patch stride as single int or three ints (Z Y X) -- `--minimum-foreground-ratio`: Minimum label foreground ratio to keep a patch (float, default 0.0) -- `--keep-empty-label-prob`: Probability to keep patches with only background (0.0-1.0) -- `--still-save-when-no-label`: If set and label missing, save patches regardless -- `--mp`: Use multiprocessing to process cases in parallel - -### Output - -- Patches saved under `dst_folder//` with image and label patch files -- `crop_meta.json`: Summary of extraction and available annotations - -### Examples - -```bash -# Extract 96x96x96 patches with 48-voxel stride -itk_patch /data/dataset /data/patches \ - --patch-size 96 96 96 \ - --patch-stride 48 48 48 \ - --mp - -# Extract patches with foreground filtering -itk_patch /data/dataset /data/patches \ - --patch-size 128 128 128 \ - --patch-stride 64 64 64 \ - --minimum-foreground-ratio 0.1 \ - --keep-empty-label-prob 0.2 \ - --mp -``` - ---- - -## itk_aug - -Perform data augmentation on ITK image files. - -### Usage - -```bash -itk_aug [options] -``` - -### Parameters - -- `img_folder`: Folder with source image `.mha` files -- `lbl_folder`: Folder with source label `.mha` files -- `-oimg, --out-img-folder OUT_IMG`: Optional folder to save augmented images -- `-olbl, --out-lbl-folder OUT_LBL`: Optional folder to save augmented labels -- `-n, --num N`: Number of augmented samples to generate per source sample -- `--mp`: Enable multiprocessing -- `--random-rot Z Y X`: Max random rotation degrees for Z Y X axes (three ints, order Z, Y, X) - -### Notes - -- Only files present in both `img_folder` and `lbl_folder` are processed -- Augmented files are written only if corresponding output folders are provided -- Currently supports: **RandomRotate3D** - -### Examples - -```bash -# Generate 5 augmented samples per input with random rotation -itk_aug /data/images /data/labels \ - -oimg /data/aug_images \ - -olbl /data/aug_labels \ - -n 5 \ - --random-rot 15 15 15 \ - --mp -``` - ---- - -## itk_extract - -Extract specified classes from ITK semantic segmentation maps. - -### Usage - -```bash -itk_extract [options] -``` - -### Parameters - -- `source_folder`: Folder containing source images -- `dest_folder`: Destination folder to save extracted label files (created if missing) -- `mappings`: One or more label mappings in `"source:target"` format (e.g., `"1:0"` `"5:1"`) -- `-r, --recursive`: Recursively process subdirectories and preserve relative paths -- `--mp`: Enable multiprocessing -- `--workers N`: Number of worker processes for multiprocessing - -### Output - -- Remapped label files written to `dest_folder` (extensions normalized to `.mha`) -- `extract_meta.json`: Per-sample metadata -- `extract_configs.json`: Configuration used - -### Examples - -```bash -# Extract liver (label 1) and tumor (label 5), renumber to 0 and 1 -itk_extract /data/labels /data/extracted "1:0" "5:1" --mp - -# Extract specific organs from multi-organ segmentation -itk_extract /data/multi_organ /data/liver_kidney \ - "1:1" "2:2" \ - --recursive --mp -``` - ---- - -## itk_convert - -Convert ITKIT datasets between different formats and frameworks. - -### Subcommands - -- **format**: Convert medical image file formats -- **monai**: Convert to MONAI Decathlon format -- **torchio**: Convert to TorchIO format - -### Format Conversion - -Convert medical image files between different formats while preserving metadata. - -#### Usage - -```bash -itk_convert format [options] -``` - -#### Supported Formats - -- `mha`: MetaImage (single file) -- `mhd`: MetaImage Header (with separate .raw file) -- `nii.gz`: Compressed NIfTI -- `nii`: NIfTI (uncompressed) -- `nrrd`: Nearly Raw Raster Data - -#### Parameters - -- `target_format`: Target file format -- `source_folder`: Path to ITKIT dataset -- `dest_folder`: Path to output dataset -- `--mp`: Enable multiprocessing -- `--workers N`: Number of worker processes - -#### Examples - -```bash -# Convert MHA to compressed NIfTI -itk_convert format nii.gz /data/mha_dataset /data/nifti_dataset - -# Convert to NRRD with multiprocessing -itk_convert format nrrd /data/input /data/output --mp --workers 8 - -# Convert MHD to MHA -itk_convert format mha /data/mhd_dataset /data/mha_dataset -``` - -### MONAI Conversion - -Convert ITKIT dataset to MONAI Decathlon format. - -#### Usage - -```bash -itk_convert monai [options] -``` - -#### Parameters - -- `source_folder`: Path to ITKIT dataset -- `dest_folder`: Path to output dataset in MONAI format -- `--name`: Dataset name for the manifest file (default: `ITKITDataset`) -- `--description`: Dataset description for the manifest file -- `--modality`: Primary imaging modality (default: `CT`) -- `--split`: Which split to treat the data as: `train` | `val` | `test` | `all` (default: `train`) -- `--labels`: Label names in order (e.g., `background liver tumor`). Index 0 is background -- `--mp`: Enable multiprocessing -- `--workers N`: Number of worker processes - -#### Output - -- Converted files in `.nii.gz` format -- `dataset.json` manifest file -- `meta.json` ITKIT-style metadata - -#### Examples - -```bash -itk_convert monai /data/itkit_dataset /data/monai_dataset \ - --name MyDataset \ - --modality CT \ - --labels background liver tumor \ - --mp -``` - -### TorchIO Conversion - -Convert ITKIT dataset to TorchIO format. - -#### Usage - -```bash -itk_convert torchio [options] -``` - -#### Parameters - -- `source_folder`: Path to ITKIT dataset -- `dest_folder`: Path to output dataset in TorchIO format -- `--no-csv`: Skip creating `subjects.csv` manifest file -- `--mp`: Enable multiprocessing -- `--workers N`: Number of worker processes - -#### Output - -- Converted files in `.nii.gz` format -- `subjects.csv` manifest file (unless `--no-csv` is specified) -- `meta.json` ITKIT-style metadata - -#### Examples - -```bash -itk_convert torchio /data/itkit_dataset /data/torchio_dataset --mp -``` - ---- - ## Best Practices -1. **Use multiprocessing**: Add `--mp` flag for large datasets to speed up processing -2. **Check before processing**: Always run `itk_check` before other operations +1. **Use multiprocessing**: Add '--mp' flag for large datasets to speed up processing +2. **Check before processing**: Always run 'itk_check' before other operations 3. **Preserve originals**: Work on copies of your data, never modify originals 4. **Pipeline operations**: Chain commands to create preprocessing pipelines 5. **Validate outputs**: Check a few samples manually after each processing step diff --git a/mkdocs.yml b/mkdocs.yml index 5253252..e0c38a0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -17,15 +17,21 @@ nav: - Installation: installation.md - Quick Start: quickstart.md - Dataset Structure: dataset_structure.md - - Core Features: - - Preprocessing Tools: preprocessing.md + - Processing Tools: + - Overview: preprocessing.md + - itk_check: itk_check.md + - itk_resample: itk_resample.md + - itk_orient: itk_orient.md + - itk_patch: itk_patch.md + - itk_aug: itk_aug.md + - itk_extract: itk_extract.md + - itk_convert: itk_convert.md - Advanced Topics: - Framework Integration: framework_integration.md - Neural Network Models: models.md - Supported Datasets: datasets.md - Community: - Contributing: contributing.md - - FAQ & Troubleshooting: faq.md plugins: - search