From 2d969ba50f7ef3ce90d1082334b64fe46fca5d4b Mon Sep 17 00:00:00 2001 From: mbsantiago Date: Mon, 22 Jun 2026 20:57:45 -0600 Subject: [PATCH] fix: skip unreadable audio files in batch processing --- src/batdetect2/inference/clips.py | 12 ++++++++- tests/test_cli/test_process.py | 41 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/batdetect2/inference/clips.py b/src/batdetect2/inference/clips.py index 0f486d2c..9a1177b9 100644 --- a/src/batdetect2/inference/clips.py +++ b/src/batdetect2/inference/clips.py @@ -2,7 +2,9 @@ from uuid import uuid5 import numpy as np +from loguru import logger from soundevent import data +from soundfile import LibsndfileError def get_clips_from_files( @@ -16,7 +18,15 @@ def get_clips_from_files( clips: List[data.Clip] = [] for path in paths: - recording = data.Recording.from_file(path, compute_hash=compute_hash) + try: + recording = data.Recording.from_file( + path, + compute_hash=compute_hash, + ) + except LibsndfileError as e: + logger.warning(f"Skipping unreadable audio file {path}: {e}") + continue + clips.extend( get_recording_clips( recording, diff --git a/tests/test_cli/test_process.py b/tests/test_cli/test_process.py index 99bb88ee..612163ff 100644 --- a/tests/test_cli/test_process.py +++ b/tests/test_cli/test_process.py @@ -88,3 +88,44 @@ def test_cli_process_directory_merges_clip_outputs_per_recording( ) assert actual_annotations == expected_annotations + + +def test_cli_process_directory_skips_corrupted_files( + tmp_path: Path, + contrib_dir: Path, +) -> None: + recording_path = contrib_dir / "jeff37" / "0166_20240531_223911.wav" + + source_folder = tmp_path / "audio" + source_folder.mkdir() + shutil.copy2( + recording_path, + source_folder / "example_audio.wav", + ) + + corrupted_file = source_folder / "corrupted.wav" + corrupted_file.write_text("corrupted") + + destination_folder = tmp_path / "results" + destination_folder.mkdir() + + result = CliRunner().invoke( + cli, + args=[ + "process", + "directory", + str(source_folder), + str(destination_folder), + "--detection-threshold", + "0.3", + ], + ) + + assert result.exit_code == 0 + assert destination_folder.exists() + + output_json = destination_folder / "example_audio.wav.json" + assert output_json.exists() + + corrupted_file_json = destination_folder / "corrupted.wav.json" + assert not corrupted_file_json.exists()