Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
a2b9e6f
add populated_duration property
Gautzilla Jan 26, 2026
25dffa4
add populated_ratio property
Gautzilla Jan 26, 2026
95c4216
add sum function start Timedelta(0)
Gautzilla Jan 26, 2026
8f83452
add populated duration and ratio tests
Gautzilla Jan 26, 2026
2f08f3a
add SpectroData.populated_duration test
Gautzilla Jan 26, 2026
25e4495
add SpectroData.populated_duration test
Gautzilla Jan 26, 2026
e1e62bc
add SpectroData.populated_duration test case with no ad nor file
Gautzilla Jan 26, 2026
1d7ce2b
Move Dummy* core API test classes to new test helper module
Gautzilla Jan 26, 2026
c881f6c
Merge branch 'main' into data-fill-rate
Gautzilla Jan 27, 2026
f148c78
add BaseDataset.remove_empty_data() method
Gautzilla Jan 27, 2026
c4870ce
add BaseDataset.remove_empty_data tests
Gautzilla Jan 27, 2026
52f0e30
add BaseDataset.remove_empty_data() threshold error tests
Gautzilla Jan 27, 2026
3fe889f
add remove_empty_data() examples in doc
Gautzilla Jan 27, 2026
54f7bbc
Merge branch 'main' into data-fill-rate
Gautzilla Jan 27, 2026
4f1236d
Merge branch 'main' into data-fill-rate
Gautzilla Jan 28, 2026
7155c50
Merge branch 'main' into data-fill-rate
Gautzilla Feb 4, 2026
b496447
make BaseDataset.remove_empty_data() return the removed data
Gautzilla Feb 23, 2026
caf4bfd
add test case for returned removed data
Gautzilla Feb 23, 2026
e4c478b
Merge branch 'main' into data-fill-rate
Gautzilla Feb 23, 2026
8de65fc
add removed_data parsing in core multiple spectrograms doc
Gautzilla Feb 23, 2026
2a42394
add removed_data parsing in public multiple spectrograms doc
Gautzilla Feb 23, 2026
dce706c
add removed_data parsing in core multiple reshapes doc
Gautzilla Feb 23, 2026
197613e
add removed_data parsing in public multiple reshapes doc
Gautzilla Feb 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/coreapi_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ field:
.. code-block:: python

# Filtering the ads data to remove data without audio (e.g. between files)
ads.data = [ad for ad in ads.data if not ad.is_empty]
ads.remove_empty_data(threshold=0.)

# Resampling/Exporting only the first audio data
ad = ads.data[0]
Expand Down
14 changes: 11 additions & 3 deletions docs/source/example_multiple_spectrograms_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,19 @@
"source": [
"print(f\"{' BEFORE FILTERING ':#^60}\")\n",
"print(\n",
" f\"{'Nb of Empty data:':<30}{str(len([ad for ad in audio_dataset.data if ad.is_empty])):>30}\\n\"\n",
" f\"{'Nb of Empty data:':<30}{sum(1 for ad in audio_dataset.data if ad.is_empty):>30}\\n\"\n",
")\n",
"\n",
"# Remove the empty data by using the default AudioDataset constructor:\n",
"audio_dataset = AudioDataset([ad for ad in audio_dataset.data if not ad.is_empty])"
"# Remove the empty data:\n",
"removed_data = audio_dataset.remove_empty_data(threshold=0.0)\n",
"\n",
"# We can take a look at which data has been removed:\n",
"print(f\"{' REMOVED DATA ':#^60}\")\n",
"print(f\"{'Begin':<20}{'Duration':^20}{'Fill rate':>20}\")\n",
"for data in removed_data:\n",
" print(\n",
" f\"{data.begin.strftime('%H:%M:%S'):<20}{str(data.duration):^20}{str(data.populated_ratio) + ' %':>20}\"\n",
" )"
]
},
{
Expand Down
58 changes: 12 additions & 46 deletions docs/source/example_multiple_spectrograms_public.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "dc7ebca70b3b5da",
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-13T11:25:15.629114Z",
"start_time": "2025-11-13T11:25:15.535616Z"
},
"tags": [
"remove-cell"
]
Expand Down Expand Up @@ -50,50 +46,12 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "bb002105fc9632e8",
"metadata": {
"ExecuteTime": {
"end_time": "2025-11-13T11:25:21.374824Z",
"start_time": "2025-11-13T11:25:18.067636Z"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\t2025-11-13 12:25:20,650\n",
"Building the dataset...\n",
"\n",
"\t2025-11-13 12:25:20,652\n",
"Analyzing original audio files...\n",
"\n",
"\t2025-11-13 12:25:20,680\n",
"Organizing dataset folder...\n",
"\n"
]
},
{
"ename": "PermissionError",
"evalue": "[WinError 32] Le processus ne peut pas accéder au fichier car ce fichier est utilisé par un autre processus: '_static\\\\sample_audio\\\\sample_220925_223530.wav' -> '_static\\\\sample_audio\\\\data\\\\audio\\\\original\\\\sample_220925_223530.wav'",
"output_type": "error",
"traceback": [
"\u001B[31m---------------------------------------------------------------------------\u001B[39m",
"\u001B[31mPermissionError\u001B[39m Traceback (most recent call last)",
"\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[2]\u001B[39m\u001B[32m, line 14\u001B[39m\n\u001B[32m 6\u001B[39m \u001B[38;5;28;01mfrom\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34;01mosekit\u001B[39;00m\u001B[34;01m.\u001B[39;00m\u001B[34;01mcore_api\u001B[39;00m\u001B[34;01m.\u001B[39;00m\u001B[34;01minstrument\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;28;01mimport\u001B[39;00m Instrument\n\u001B[32m 8\u001B[39m dataset = Dataset(\n\u001B[32m 9\u001B[39m folder=audio_folder,\n\u001B[32m 10\u001B[39m strptime_format=\u001B[33m\"\u001B[39m\u001B[33m%\u001B[39m\u001B[33my\u001B[39m\u001B[33m%\u001B[39m\u001B[33mm\u001B[39m\u001B[38;5;132;01m%d\u001B[39;00m\u001B[33m_\u001B[39m\u001B[33m%\u001B[39m\u001B[33mH\u001B[39m\u001B[33m%\u001B[39m\u001B[33mM\u001B[39m\u001B[33m%\u001B[39m\u001B[33mS\u001B[39m\u001B[33m\"\u001B[39m,\n\u001B[32m 11\u001B[39m instrument=Instrument(end_to_end_db=\u001B[32m150.0\u001B[39m),\n\u001B[32m 12\u001B[39m )\n\u001B[32m---> \u001B[39m\u001B[32m14\u001B[39m \u001B[43mdataset\u001B[49m\u001B[43m.\u001B[49m\u001B[43mbuild\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\public_api\\dataset.py:156\u001B[39m, in \u001B[36mDataset.build\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m 144\u001B[39m \u001B[38;5;28mself\u001B[39m.logger.info(\u001B[33m\"\u001B[39m\u001B[33mOrganizing dataset folder...\u001B[39m\u001B[33m\"\u001B[39m)\n\u001B[32m 145\u001B[39m move_tree(\n\u001B[32m 146\u001B[39m source=\u001B[38;5;28mself\u001B[39m.folder,\n\u001B[32m 147\u001B[39m destination=\u001B[38;5;28mself\u001B[39m.folder / \u001B[33m\"\u001B[39m\u001B[33mother\u001B[39m\u001B[33m\"\u001B[39m,\n\u001B[32m (...)\u001B[39m\u001B[32m 154\u001B[39m | {\u001B[38;5;28mself\u001B[39m.folder / \u001B[33m\"\u001B[39m\u001B[33mlog\u001B[39m\u001B[33m\"\u001B[39m},\n\u001B[32m 155\u001B[39m )\n\u001B[32m--> \u001B[39m\u001B[32m156\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_sort_dataset\u001B[49m\u001B[43m(\u001B[49m\u001B[43mads\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 157\u001B[39m ads.write_json(ads.folder)\n\u001B[32m 158\u001B[39m \u001B[38;5;28mself\u001B[39m.write_json()\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\public_api\\dataset.py:513\u001B[39m, in \u001B[36mDataset._sort_dataset\u001B[39m\u001B[34m(self, dataset)\u001B[39m\n\u001B[32m 511\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_sort_dataset\u001B[39m(\u001B[38;5;28mself\u001B[39m, dataset: \u001B[38;5;28mtype\u001B[39m[DatasetChild]) -> \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m 512\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mtype\u001B[39m(dataset) \u001B[38;5;129;01mis\u001B[39;00m AudioDataset:\n\u001B[32m--> \u001B[39m\u001B[32m513\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_sort_audio_dataset\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdataset\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 514\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m\n\u001B[32m 515\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mtype\u001B[39m(dataset) \u001B[38;5;129;01mis\u001B[39;00m SpectroDataset | LTASDataset:\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\public_api\\dataset.py:520\u001B[39m, in \u001B[36mDataset._sort_audio_dataset\u001B[39m\u001B[34m(self, dataset)\u001B[39m\n\u001B[32m 519\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m_sort_audio_dataset\u001B[39m(\u001B[38;5;28mself\u001B[39m, dataset: AudioDataset) -> \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[32m--> \u001B[39m\u001B[32m520\u001B[39m \u001B[43mdataset\u001B[49m\u001B[43m.\u001B[49m\u001B[43mmove_files\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43m_get_audio_dataset_subpath\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdataset\u001B[49m\u001B[43m)\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\core_api\\base_dataset.py:152\u001B[39m, in \u001B[36mBaseDataset.move_files\u001B[39m\u001B[34m(self, folder)\u001B[39m\n\u001B[32m 143\u001B[39m \u001B[38;5;250m\u001B[39m\u001B[33;03m\"\"\"Move the dataset files to the destination folder.\u001B[39;00m\n\u001B[32m 144\u001B[39m \n\u001B[32m 145\u001B[39m \u001B[33;03mParameters\u001B[39;00m\n\u001B[32m (...)\u001B[39m\u001B[32m 149\u001B[39m \n\u001B[32m 150\u001B[39m \u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 151\u001B[39m \u001B[38;5;28;01mfor\u001B[39;00m file \u001B[38;5;129;01min\u001B[39;00m tqdm(\u001B[38;5;28mself\u001B[39m.files, disable=os.environ.get(\u001B[33m\"\u001B[39m\u001B[33mDISABLE_TQDM\u001B[39m\u001B[33m\"\u001B[39m, \u001B[33m\"\u001B[39m\u001B[33m\"\u001B[39m)):\n\u001B[32m--> \u001B[39m\u001B[32m152\u001B[39m \u001B[43mfile\u001B[49m\u001B[43m.\u001B[49m\u001B[43mmove\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfolder\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 153\u001B[39m \u001B[38;5;28mself\u001B[39m._folder = folder\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\core_api\\audio_file.py:128\u001B[39m, in \u001B[36mAudioFile.move\u001B[39m\u001B[34m(self, folder)\u001B[39m\n\u001B[32m 119\u001B[39m \u001B[38;5;250m\u001B[39m\u001B[33;03m\"\"\"Move the file to the target folder.\u001B[39;00m\n\u001B[32m 120\u001B[39m \n\u001B[32m 121\u001B[39m \u001B[33;03mParameters\u001B[39;00m\n\u001B[32m (...)\u001B[39m\u001B[32m 125\u001B[39m \n\u001B[32m 126\u001B[39m \u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 127\u001B[39m afm.close()\n\u001B[32m--> \u001B[39m\u001B[32m128\u001B[39m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[43m.\u001B[49m\u001B[43mmove\u001B[49m\u001B[43m(\u001B[49m\u001B[43mfolder\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\Documents\\GitHub\\OSEkit\\src\\osekit\\core_api\\base_file.py:171\u001B[39m, in \u001B[36mBaseFile.move\u001B[39m\u001B[34m(self, folder)\u001B[39m\n\u001B[32m 169\u001B[39m destination_path = folder / \u001B[38;5;28mself\u001B[39m.path.name\n\u001B[32m 170\u001B[39m folder.mkdir(exist_ok=\u001B[38;5;28;01mTrue\u001B[39;00m, parents=\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[32m--> \u001B[39m\u001B[32m171\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mpath\u001B[49m\u001B[43m.\u001B[49m\u001B[43mrename\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdestination_path\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 172\u001B[39m \u001B[38;5;28mself\u001B[39m.path = destination_path\n",
"\u001B[36mFile \u001B[39m\u001B[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.13.3-windows-x86_64-none\\Lib\\pathlib\\_local.py:767\u001B[39m, in \u001B[36mPath.rename\u001B[39m\u001B[34m(self, target)\u001B[39m\n\u001B[32m 757\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mrename\u001B[39m(\u001B[38;5;28mself\u001B[39m, target):\n\u001B[32m 758\u001B[39m \u001B[38;5;250m \u001B[39m\u001B[33;03m\"\"\"\u001B[39;00m\n\u001B[32m 759\u001B[39m \u001B[33;03m Rename this path to the target path.\u001B[39;00m\n\u001B[32m 760\u001B[39m \n\u001B[32m (...)\u001B[39m\u001B[32m 765\u001B[39m \u001B[33;03m Returns the new Path instance pointing to the target path.\u001B[39;00m\n\u001B[32m 766\u001B[39m \u001B[33;03m \"\"\"\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m767\u001B[39m \u001B[43mos\u001B[49m\u001B[43m.\u001B[49m\u001B[43mrename\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mtarget\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m 768\u001B[39m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m.with_segments(target)\n",
"\u001B[31mPermissionError\u001B[39m: [WinError 32] Le processus ne peut pas accéder au fichier car ce fichier est utilisé par un autre processus: '_static\\\\sample_audio\\\\sample_220925_223530.wav' -> '_static\\\\sample_audio\\\\data\\\\audio\\\\original\\\\sample_220925_223530.wav'"
]
}
],
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
Expand Down Expand Up @@ -225,7 +183,15 @@
"audio_dataset = dataset.get_analysis_audiodataset(analysis=analysis)\n",
"\n",
"# Filter the returned AudioDataset\n",
"audio_dataset.data = [ad for ad in audio_dataset.data if not ad.is_empty]"
"removed_data = audio_dataset.remove_empty_data(threshold=0.0)\n",
"\n",
"# We can take a look at which data has been removed:\n",
"print(f\"{' REMOVED DATA ':#^60}\")\n",
"print(f\"{'Begin':<20}{'Duration':^20}{'Fill rate':>20}\")\n",
"for data in removed_data:\n",
" print(\n",
" f\"{data.begin.strftime('%H:%M:%S'):<20}{str(data.duration):^20}{str(data.populated_ratio) + ' %':>20}\"\n",
" )"
]
},
{
Expand Down
12 changes: 10 additions & 2 deletions docs/source/example_reshaping_multiple_files_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,16 @@
" f\"{'Nb of Empty data:':<30}{str(len([ad for ad in audio_dataset.data if ad.is_empty])):>30}\\n\"\n",
")\n",
"\n",
"# Remove the empty data by using the default AudioDataset constructor:\n",
"audio_dataset = AudioDataset([ad for ad in audio_dataset.data if not ad.is_empty])"
"# Remove the empty data:\n",
"removed_data = audio_dataset.remove_empty_data(threshold=0.0)\n",
"\n",
"# We can take a look at which data has been removed:\n",
"print(f\"{' REMOVED DATA ':#^60}\")\n",
"print(f\"{'Begin':<20}{'Duration':^20}{'Fill rate':>20}\")\n",
"for data in removed_data:\n",
" print(\n",
" f\"{data.begin.strftime('%H:%M:%S'):<20}{str(data.duration):^20}{str(data.populated_ratio) + ' %':>20}\"\n",
" )"
]
},
{
Expand Down
10 changes: 9 additions & 1 deletion docs/source/example_reshaping_multiple_files_public.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,15 @@
"audio_dataset = dataset.get_analysis_audiodataset(analysis=analysis)\n",
"\n",
"# Filter the returned AudioDataset\n",
"audio_dataset.data = [ad for ad in audio_dataset.data if not ad.is_empty]"
"removed_data = audio_dataset.remove_empty_data(threshold=0.0)\n",
"\n",
"# We can take a look at which data has been removed:\n",
"print(f\"{' REMOVED DATA ':#^60}\")\n",
"print(f\"{'Begin':<20}{'Duration':^20}{'Fill rate':>20}\")\n",
"for data in removed_data:\n",
" print(\n",
" f\"{data.begin.strftime('%H:%M:%S'):<20}{str(data.duration):^20}{str(data.populated_ratio) + ' %':>20}\"\n",
" )"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/source/publicapi_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ The returned ``AudioDataset`` can be edited at will and passed as a parameter la
ads = dataset.get_analysis_audiodataset(analysis=analysis)

# Filtering out the AudioData that are not linked to any audio file:
ads.data = [ad for ad in ads.data if not ad.is_empty]
ads.remove_empty_data(threshold=0.)

The returned ``SpectroDataset`` can be used e.g. to plot sample spectrograms prior to the analysis:

Expand Down
17 changes: 16 additions & 1 deletion src/osekit/core_api/base_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from typing import Self, TypeVar

import numpy as np
from pandas import Timestamp, date_range
from pandas import Timedelta, Timestamp, date_range

from osekit.config import (
DPDEFAULT,
Expand Down Expand Up @@ -129,6 +129,21 @@ def end(self, value: Timestamp) -> None:
for item in self.items:
item.end = min(item.end, value)

@property
def populated_duration(self) -> Timedelta:
"""Total duration of the non-empty parts of the data."""
return Timedelta(
sum(
(item.duration for item in self.items if not item.is_empty),
start=Timedelta(0),
),
)

@property
def populated_ratio(self) -> float:
"""Percentage of the non-empty parts of the data."""
return self.populated_duration / self.duration

def get_value(self) -> np.ndarray:
"""Get the concatenated values from all Items."""
return np.concatenate([item.get_value() for item in self.items])
Expand Down
27 changes: 27 additions & 0 deletions src/osekit/core_api/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,33 @@ def data_duration(self) -> Timedelta:
]
return max(set(data_durations), key=data_durations.count)

def remove_empty_data(self, threshold: float = 0.0) -> list[TData]:
"""Remove data that has less than ``threshold`` % of non-empty duration.

Parameters
----------
threshold: float
Threshold percentage of emptiness duration under which the
data should be removed.
Must be in the ``[0.,1.]`` interval.

Returns
-------
list[TData]:
The removed Data objects.

"""
if not 0.0 <= threshold <= 1.0:
msg = f"Threshold should be between 0 and 1. Got {threshold}"
raise ValueError(msg)

kept, removed = [], []
for data in self.data:
(kept if data.populated_ratio > threshold else removed).append(data)

self.data = kept
return removed

def write(
self,
folder: Path,
Expand Down
14 changes: 14 additions & 0 deletions src/osekit/core_api/spectro_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pandas as pd
from matplotlib.dates import date2num
from pandas import Timedelta
from scipy.signal import ShortTimeFFT, welch

from osekit.config import (
Expand Down Expand Up @@ -248,6 +249,19 @@ def v_lim(self, v_lim: tuple[float, float] | None) -> None:
v_lim = (-120.0, 0.0) if self.db_type == "FS" else (0.0, 170.0)
self._v_lim = v_lim

@property
def populated_duration(self) -> Timedelta:
"""Override BaseData.populated_duration.

If the SpectroData has no associated file, it will return the
populated duration of the associated AudioData.
"""
if self.files:
return super().populated_duration
if not self.audio_data:
return Timedelta(0)
return self.audio_data.populated_duration

def get_value(self) -> np.ndarray:
"""Return the Sx matrix of the spectrogram.

Expand Down
Empty file added tests/helpers/__init__.py
Empty file.
106 changes: 106 additions & 0 deletions tests/helpers/dummy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import typing
from pathlib import Path
from typing import Self

import numpy as np
from pandas import Timestamp

from osekit.core_api.base_data import BaseData, TFile
from osekit.core_api.base_dataset import BaseDataset, TData
from osekit.core_api.base_file import BaseFile
from osekit.core_api.base_item import BaseItem


class DummyFile(BaseFile):
supported_extensions: typing.ClassVar = [""]

def read(self, start: Timestamp, stop: Timestamp) -> np.ndarray: ...


class DummyItem(BaseItem[DummyFile]): ...


class DummyData(BaseData[DummyItem, DummyFile]):
item_cls = DummyItem

def write(self, folder: Path, *, link: bool = False) -> None: ...

def link(self, folder: Path) -> None: ...

def _make_split_data(
self,
files: list[DummyFile],
begin: Timestamp,
end: Timestamp,
**kwargs, # noqa: ANN003
) -> Self:
return DummyData.from_files(files=files, begin=begin, end=end, **kwargs)

@classmethod
def _make_file(cls, path: Path, begin: Timestamp) -> DummyFile:
return DummyFile(path=path, begin=begin)

@classmethod
def _make_item(
cls,
file: TFile | None = None,
begin: Timestamp | None = None,
end: Timestamp | None = None,
) -> DummyItem:
return DummyItem(file=file, begin=begin, end=end)

@classmethod
def _from_base_dict(
cls,
dictionary: dict,
files: list[TFile],
begin: Timestamp,
end: Timestamp,
**kwargs, # noqa: ANN003
) -> Self:
return cls.from_files(
files=files,
begin=begin,
end=end,
)

@classmethod
def from_files(
cls,
files: list[DummyFile],
begin: Timestamp | None = None,
end: Timestamp | None = None,
name: str | None = None,
**kwargs, # noqa: ANN003
) -> Self:
return super().from_files(
files=files,
begin=begin,
end=end,
name=name,
**kwargs,
)


class DummyDataset(BaseDataset[DummyData, DummyFile]):
@classmethod
def _data_from_dict(cls, dictionary: dict) -> list[TData]:
return [DummyData.from_dict(data) for data in dictionary.values()]

@classmethod
def _data_from_files(
cls,
files: list[DummyFile],
begin: Timestamp | None = None,
end: Timestamp | None = None,
name: str | None = None,
**kwargs,
) -> TData:
return DummyData.from_files(
files=files,
begin=begin,
end=end,
name=name,
)

file_cls = DummyFile
Loading