diff --git a/.github/workflows/build-nuitka.yml b/.github/workflows/build-nuitka.yml index 33a91f0..97b805b 100644 --- a/.github/workflows/build-nuitka.yml +++ b/.github/workflows/build-nuitka.yml @@ -3,6 +3,9 @@ name: Build (Nuitka) on: workflow_dispatch: +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + jobs: build: name: ${{ matrix.name }} @@ -31,13 +34,13 @@ jobs: suffix: win-arm64 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v7 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version-file: "pyproject.toml" @@ -50,7 +53,7 @@ jobs: run: brew install ccache - name: Cache Nuitka & CCache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: | ~/.cache/Nuitka @@ -169,11 +172,12 @@ jobs: # Upload # ----------------------------------------------------------------------- - name: Upload Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: sessionprep-${{ matrix.suffix }} path: | dist_nuitka/sessionprep-*.* + dist_nuitka/sessionprep_*.* !dist_nuitka/sessionprep-*.build !dist_nuitka/sessionprep-*.build/** !dist_nuitka/sessionprep-*.dist diff --git a/.github/workflows/build-pyinstaller.yml b/.github/workflows/build-pyinstaller.yml index 0527204..7b79669 100644 --- a/.github/workflows/build-pyinstaller.yml +++ b/.github/workflows/build-pyinstaller.yml @@ -3,6 +3,9 @@ name: Build (PyInstaller) on: workflow_dispatch: +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + jobs: build: name: Build on ${{ matrix.os }} @@ -13,13 +16,13 @@ jobs: os: [ubuntu-latest, windows-latest, macos-latest, macos-15-intel] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Install uv - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@v7 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version-file: "pyproject.toml" @@ -101,7 +104,7 @@ jobs: - name: Upload Artifacts (Windows) if: runner.os == 'Windows' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: sessionprep-${{ matrix.os }} path: | @@ -112,7 +115,7 @@ jobs: - name: Upload Artifacts (macOS) if: runner.os == 'macOS' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: sessionprep-${{ matrix.os }} path: dist_pyinstaller/*.dmg @@ -120,7 +123,7 @@ jobs: - name: Upload Artifacts (Linux) if: runner.os == 'Linux' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: sessionprep-${{ matrix.os }} path: | diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index bfc23c2..81311c1 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -431,9 +431,13 @@ sessionprepgui/ # GUI package (PySide6) waveform/ __init__.py # Re-exports WaveformWidget, WaveformLoadWorker, SPECTROGRAM_COLORMAPS compute.py # Colormaps, mel math, spectrogram computation, QThread workers + # (WaveformLoadWorker, SpectrogramWorker, PeakBuildWorker) + peakcache.py # Pre-computed peak mipmap cache: build, save, load, query + # Binary .peaks format for fast waveform rendering renderer.py # WaveformRenderer — peaks, waveform drawing, RMS overlay, dB scale, markers spectrogram.py # SpectrogramRenderer — mel image, frequency scale, freq zoom/pan overlay.py # Stateless overlay drawing functions (issue overlays, time scale) + panel.py # WaveformPanel — composite widget: toolbar + WaveformWidget + transport bar widget.py # WaveformWidget — thin orchestrator coordinating WaveformRenderer # and SpectrogramRenderer @@ -772,7 +776,7 @@ Contains audio I/O, cached DSP helpers, and stateless DSP functions. **File discovery:** - `discover_track(filepath) -> TrackContext` — reads audio file metadata (channels, samplerate, bitdepth, duration) without loading audio data. Sets `filename = os.path.basename(filepath)` — callers override with relative paths when recursive scanning is active. -- `discover_audio_files(root_dir, recursive=False, skip_folders=None) -> list[str]` — returns a sorted list of audio file paths relative to *root_dir*. When `recursive=False`, returns bare filenames (flat `os.listdir`). When `True`, walks subdirectories via `os.walk(followlinks=False)` and returns forward-slash–separated relative paths (e.g. `"drums/01_Kick.wav"`). Directories whose name appears in `skip_folders` are pruned (e.g. `{"sp_01_tracklayout", "sp_02_prepared"}`). Results sorted by `protools_sort_key`. +- `discover_audio_files(root_dir, recursive=False, skip_folders=None) -> list[str]` — returns a sorted list of audio file paths relative to *root_dir*. When `recursive=False`, returns bare filenames (flat `os.listdir`). When `True`, walks subdirectories via `os.walk(followlinks=False)` and returns forward-slash–separated relative paths (e.g. `"drums/01_Kick.wav"`). Directories whose name appears in `skip_folders` are pruned (e.g. `{"sp_01_tracklayout", "sp_02_prepared", "sp_peaks"}`). Results sorted by `protools_sort_key`. **File I/O:** @@ -1022,7 +1026,7 @@ DAWproject, +12 dB for Pro Tools). The headroom margin is configured via `fader_headroom_db` (default 8 dB). If the highest fader offset exceeds `ceiling - headroom`, all fader offsets are shifted down uniformly by the excess amount. The shift is stored in `ProcessorResult.data["fader_rebalance_shift"]` -and logged via `dbg()` when `SP_DEBUG` is active. +and logged via `dbg()` when `SP_LOG_LEVEL=DEBUG` is active. ### 7.7 Registration @@ -1307,7 +1311,7 @@ the current state. ### 9.5 Profiling & Debugging -When the `SP_DEBUG` environment variable is set to `1` or `true`, the pipeline +When the `SP_LOG_LEVEL` environment variable is set to `DEBUG`, the pipeline emits per-component timing via `dbg()` (from `sessionprepgui/log.py`). Output goes to stderr with timestamps and caller class names. @@ -1664,7 +1668,7 @@ group). | `theme.py` | `COLORS` dict, `FILE_COLOR_*` constants, dark palette + stylesheet | | `helpers.py` | `esc()`, `track_analysis_label(track, detectors=None)` (filters via `is_relevant()`), `fmt_time()`, severity maps | | `widgets.py` | `BatchEditTableWidget`, `BatchComboBox` — reusable batch-edit base classes preserving multi-row selection across cell-widget clicks (zero app imports) | -| `log.py` | `dbg(msg)` — lightweight debug logging to stderr, gated by `SP_DEBUG` env var. Timestamped output with caller class name. Used by `pipeline.py`, `dawproject.py`, and other modules via conditional import. | +| `log.py` | `dbg(msg)` — lightweight debug logging to stderr, gated by `SP_LOG_LEVEL=DEBUG`. Timestamped output with caller class name. Used by `pipeline.py`, `dawproject.py`, and other modules via conditional import. | | `analysis/mixin.py` | `AnalysisMixin` — open/save/load session, analyze, prepare, session Config tab wiring | | `analysis/worker.py` | QThread workers: `AnalyzeWorker` (pipeline in background, thread-safe progress, per-track signals), `BatchReanalyzeWorker` (subset re-analysis after batch overrides), `PrepareWorker` (runs `Pipeline.prepare()` in background with progress), `DawCheckWorker` (connectivity check), `DawFetchWorker` (folder fetch), `DawTransferWorker` (transfer with progress + progress_value signals) | | `daw/mixin.py` | `DawMixin` — DAW processor selection, check/fetch/transfer/sync, folder tree, drag-and-drop track assignment, Track Name inline editing, duplication with `-[N]` naming | @@ -1680,10 +1684,12 @@ group). | `tracks/groups_mixin.py` | `GroupsMixin` — group assignment UI, color rendering in track table | | `tracks/table_widgets.py` | Track table widget classes (custom cell widgets, batch-edit base classes) | | `waveform/__init__.py` | Re-exports `WaveformWidget`, `WaveformLoadWorker`, `SPECTROGRAM_COLORMAPS` | -| `waveform/compute.py` | Colormaps (magma/viridis/grayscale LUTs), mel math, spectrogram computation, `WaveformLoadWorker` QThread | -| `waveform/renderer.py` | `WaveformRenderer` — vectorised NumPy peak/RMS downsampling, waveform drawing, RMS L/R and AVG envelopes, dB scale, peak/RMS markers | +| `waveform/compute.py` | Colormaps (magma/viridis/grayscale LUTs), mel math, spectrogram computation, `WaveformLoadWorker` QThread, `PeakBuildWorker` (batch peak cache builder using thread pool) | +| `waveform/peakcache.py` | Pre-computed peak mipmap cache: `PeakData`, `MipLevel`, `build_peaks()`, `save_peaks()`, `load_peaks()`, `query_peaks_fast()`. Binary `.peaks` file format (`SPK1`) with staleness detection via source file mtime | +| `waveform/renderer.py` | `WaveformRenderer` — vectorised NumPy peak/RMS downsampling, waveform drawing, RMS L/R and AVG envelopes, dB scale, peak/RMS markers. Uses mip-level peak queries when `PeakData` is available for near-instant rendering | | `waveform/spectrogram.py` | `SpectrogramRenderer` — mel spectrogram QImage (256 mel bins via `scipy.signal.stft`), frequency scale, freq zoom/pan, background recompute worker | | `waveform/overlay.py` | Stateless overlay drawing functions — detector issue overlays (with optional frequency bounds), horizontal time scale | +| `waveform/panel.py` | `WaveformPanel` — composite widget: waveform toolbar + `WaveformWidget` + transport bar. Reused in Phase 1 (topology preview) and Phase 2 (file detail) | | `waveform/widget.py` | `WaveformWidget` — thin orchestrator coordinating `WaveformRenderer` and `SpectrogramRenderer`; paintEvent, mouse/keyboard event handlers, zoom/pan API, public setters | | `prefs/param_form.py` | **Portable** generic widget factory — `ParamSpec` protocol, `PathPickerMode`, `PathPicker`, `_build_widget`, `_build_param_page`, `_set_widget_value`, `_read_widget`, tooltip/subtext builders, `sanitize_output_folder`. Zero sessionpreplib dependency; copy to any PySide6 project. | | `prefs/preset_panel.py` | **Portable** `NamedPresetPanel` — reusable CRUD widget for named presets with add/duplicate/rename/delete signals. | @@ -2040,6 +2046,76 @@ widgets when available, falling back to the global preset otherwise. The CLI is **not** affected by this file — it continues to use its own `default_config()` + command-line arguments. +### 18.5 Waveform Peak Cache + +Waveform rendering uses a **pre-computed peak mipmap cache** (similar to +Cubase/Reaper `.peaks` files) to avoid expensive raw-sample downsampling on +every paint. The cache is built eagerly in the background and persisted to disk +as binary `.peaks` files in the `sp_peaks/` directory. + +**Architecture:** + +```mermaid +graph LR + A[Folder Open] --> B["_on_phase1_done()"] + B --> C["PeakBuildWorker (bg thread pool)"] + C --> D["sp_peaks/*.peaks files"] + C --> E["In-memory _peak_cache dict"] + E --> F["WaveformRenderer.set_peak_data()"] + F --> G["query_peaks_fast() → instant paint"] +``` + +**File format (`SPK1`):** + +| Field | Size | Description | +|----------------|---------|---------------------------------------| +| Magic | 4 bytes | `SPK1` | +| Version | 2 bytes | Format version (currently 1) | +| Channels | 2 bytes | Number of audio channels | +| Samplerate | 4 bytes | Source file sample rate | +| Total samples | 8 bytes | Source file total sample count | +| Source mtime | 8 bytes | Source file modification time (epoch) | +| Num levels | 2 bytes | Number of mip levels | +| *(per level)* | | | +| Samples/bin | 4 bytes | Resolution of this mip level | +| Num bins | 4 bytes | Number of bins in this level | +| Data | varies | `float32[n_bins × channels × 2]` (min/max) | + +Four mip levels at 256, 1024, 4096, and 16384 samples/bin. A 5-minute stereo +48 kHz file produces ~1.2 MB of peak data total. + +**Integration points:** + +- `settings.py` — `peak_cache_folder` default (`"sp_peaks"`), added to + `skip_folders` so the cache directory is not scanned as audio content. +- `analysis/mixin.py` — `_start_peak_build()` triggered from + `_on_phase1_done()` and `_on_analyze_done()`. Maintains `_peak_cache` dict + (filename → `PeakData`). Worker cleanup in `_clear_workspace()`. +- `topology/mixin.py` — injects `PeakData` into the Phase 1 waveform display + via `set_peak_data()` after `set_precomputed()`. Passes `peaks_dir` to + `TopologyApplyWorker` so output files get cached during Apply. +- `detail/mixin.py` — injects `PeakData` into the Phase 2 waveform display + after `set_precomputed()`. +- `analysis/worker.py` — `TopologyApplyWorker` builds and saves `.peaks` files + immediately after writing each output file. +- `waveform/renderer.py` — `_build_peaks()` uses `query_peaks_fast()` when + `PeakData` is available, falling back to raw-sample downsampling otherwise. + +**Staleness:** Each `.peaks` file stores the source file's `mtime`. On load, +`load_peaks()` compares the stored mtime against the current source file; stale +caches return `None` and are silently rebuilt. + +**Instant Preview Mode (`set_preview_mode`):** +To eliminate visual latency caused by heavy audio file disk I/O, the UI supports +an **Instant Preview Mode**. If a track is selected and its `.peaks` cache exists, +the GUI initializes the `WaveformWidget` with dummy empty channels and applies the +peak cache immediately. This renders the perfect waveform visualization instantaneously +(zero disk I/O, zero CPU). Asynchronously, `AudioLoadWorker` and `WaveformLoadWorker` +load the full audio data in the background. When they complete, they seamlessly replace +the dummy channels to enable the Mel Spectrogram, RMS envelope, and audio playback. +If a peak cache finishes building *while* the UI is blocked loading audio, the UI +aggressively pushes the new peak cache to instantly resolve the loading state. + --- ## 19. Migration Notes diff --git a/README.md b/README.md index 09e9b6f..2c8b679 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ SessionPrep operates in three phases: | Phase | Name | What happens | When | |-------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| -| **1** | Track Layout | Define how source tracks map to output files: channel routing, reordering, splitting, merging. Automatically optimizes layouts based on Phase 1 diagnostics (e.g. dropping silent files, extracting active channels from dual-mono and one-sided silence). Drag-and-drop between input/output trees with visual insert-position indicator. Optional recursive subfolder scanning. Output written to `sp_01_tracklayout/`. | GUI Phase 1 (always available) | +| **1** | Track Layout | Define how source tracks map to output files: channel routing, reordering, splitting, merging. Automatically optimizes layouts based on Phase 1 diagnostics (e.g. dropping silent files, extracting active channels from dual-mono and one-sided silence). Drag-and-drop between input/output trees with visual insert-position indicator. Optional recursive subfolder scanning. Output written to `sp_01_tracklayout/`. Pre-computed peak cache files written to `sp_peaks/` for fast waveform rendering. | GUI Phase 1 (always available) | | **2** | Analysis & Preparation | Format checks, clipping, DC offset, stereo compatibility, silence, subsonic, peak/RMS measurement, classification, tail exceedance. Bimodal normalization (clip gain adjustment) via Prepare. Output written to `sp_02_prepared/`. | GUI Phase 2 / CLI | | **3** | DAW Transfer | Transfer tracks into DAW session with per-track naming and folder assignment. Duplicate entries for multi-track scenarios (same clip on different tracks). Fader offsets applied automatically (Pro Tools via PTSL, DAWproject via file generation). Support for unattended batch processing of multiple songs. | GUI Phase 3 | diff --git a/sessionprepgui/analysis/mixin.py b/sessionprepgui/analysis/mixin.py index 74a71e5..38f83c2 100644 --- a/sessionprepgui/analysis/mixin.py +++ b/sessionprepgui/analysis/mixin.py @@ -220,6 +220,8 @@ def _clear_workspace(self): self._cancel_worker("_worker") self._cancel_worker("_batch_reanalyze_worker") self._cancel_worker("_prepare_worker") + self._cancel_worker("_peak_build_worker") + self._peak_cache = {} self._session = None self._summary = None self._current_track = None @@ -302,6 +304,7 @@ def _load_directory(self, path: str): skip_folders = { app_cfg.get("phase1_output_folder", "sp_01_tracklayout"), app_cfg.get("phase2_output_folder", "sp_02_prepared"), + app_cfg.get("peak_cache_folder", "sp_peaks"), } wav_files = discover_audio_files( path, recursive=self._recursive_scan, @@ -385,6 +388,9 @@ def _on_phase1_done(self, session): self._save_session_action.setEnabled(True) self.setWindowTitle("SessionPrep") + # Eagerly build peak cache for all source tracks in the background + self._start_peak_build(session.tracks, self._source_dir) + @Slot() def _on_save_session(self): """Save the current session state to a .spsession file.""" @@ -489,6 +495,7 @@ def _restore_session_state(self, data: dict): skip_folders = { app_cfg.get("phase1_output_folder", "sp_01_tracklayout"), app_cfg.get("phase2_output_folder", "sp_02_prepared"), + app_cfg.get("peak_cache_folder", "sp_peaks"), } source_tracks = [] for rel in discover_audio_files( @@ -696,6 +703,13 @@ def _restore_session_state(self, data: dict): ) self.setWindowTitle("SessionPrep") + # Eagerly build peak cache for source tracks (Phase 1 waveforms) + if source_dir and source_tracks: + self._start_peak_build(source_tracks, source_dir) + # And for analysis tracks (Phase 2 waveforms) if topology was applied + if self._topology_dir and tracks: + self._start_peak_build(tracks, self._topology_dir) + # ── Analyze ────────────────────────────────────────────────────────── @Slot() @@ -980,6 +994,106 @@ def _on_analyze_done(self, session, summary): f"Analysis complete: {ok_count}/{len(session.tracks)} tracks OK" ) + # Eagerly build peak cache for Phase 2 tracks + analyze_dir = self._topology_dir or self._source_dir + if analyze_dir: + self._start_peak_build(session.tracks, analyze_dir) + + # ── Peak cache ──────────────────────────────────────────────────────── + + def _start_peak_build(self, tracks, base_dir: str): + """Launch a background PeakBuildWorker for all tracks under *base_dir*. + + Before launching, scan existing ``sp_peaks/`` for valid ``.peaks`` + files and load them into the in-memory cache — only stale or missing + files are queued for background building. + """ + from ..waveform.compute import PeakBuildWorker + from ..waveform.peakcache import ( + peaks_path_for, load_peaks, get_source_mtime, + ) + + app_cfg = self._config.get("app", {}) + peaks_folder = app_cfg.get("peak_cache_folder", "sp_peaks") + peaks_dir = os.path.join(base_dir, peaks_folder) + + if not hasattr(self, "_peak_cache"): + self._peak_cache = {} + + items = [] + for track in tracks: + filepath = getattr(track, "filepath", None) + if not filepath: + filepath = os.path.join(base_dir, track.filename) + if not os.path.isfile(filepath): + continue + pp = peaks_path_for(peaks_dir, track.filename) + # Try to reuse existing .peaks from disk + mtime = get_source_mtime(filepath) + existing = load_peaks(pp, expected_mtime=mtime) + if existing is not None: + self._peak_cache[track.filename] = existing + else: + items.append((filepath, track.filename, pp)) + + if not items: + log.info("Peak cache: all %d files already cached", len(tracks)) + return + + log.info( + "Peak cache: %d cached from disk, %d to build", + len(tracks) - len(items), len(items), + ) + + self._cancel_worker("_peak_build_worker") + worker = PeakBuildWorker(items) + worker.file_done.connect(self._on_peak_file_done) + worker.progress.connect(self._status_bar.showMessage) + worker.all_done.connect( + lambda: log.info("Peak cache build complete (%d files)", len(items))) + self._peak_build_worker = worker + worker.start() + + @Slot(str, object) + def _on_peak_file_done(self, filename: str, peak_data): + """Cache a newly built PeakData in memory, and push to UI if active.""" + if not hasattr(self, "_peak_cache"): + self._peak_cache = {} + self._peak_cache[filename] = peak_data + + # Push to Phase 1 topology preview if active + if getattr(self, "_topo_wf_filename", None) == filename: + wf = getattr(self, "_topo_wf_panel", None) + if wf and hasattr(wf, "waveform"): + wf.waveform.set_peak_data(peak_data) + if getattr(wf.waveform, "_loading", False): + track = None + if hasattr(self, "_topo_track_map"): + track = self._topo_track_map().get(filename) + if track: + wf.waveform.set_preview_mode( + track.channels, track.total_samples, + track.samplerate, peak_data + ) + + # Push to Phase 2 analysis preview if active + cur_track = getattr(self, "_current_track", None) + if cur_track and cur_track.filename == filename: + wf = getattr(self, "_waveform", None) + if wf: + wf.set_peak_data(peak_data) + if getattr(wf, "_loading", False): + wf.set_preview_mode( + cur_track.channels, cur_track.total_samples, + cur_track.samplerate, peak_data + ) + + def _prioritize_peak(self, filename: str): + """If a peak build is in progress, move *filename* to the front.""" + worker = getattr(self, "_peak_build_worker", None) + if worker is not None and worker.isRunning(): + worker.prioritize(filename) + @Slot(str) def _on_analyze_error(self, message: str): self._analyze_action.setEnabled(True) diff --git a/sessionprepgui/analysis/worker.py b/sessionprepgui/analysis/worker.py index e038dcb..229769f 100644 --- a/sessionprepgui/analysis/worker.py +++ b/sessionprepgui/analysis/worker.py @@ -2,8 +2,11 @@ from __future__ import annotations +import logging import threading +log = logging.getLogger(__name__) + from PySide6.QtCore import QThread, Signal from sessionpreplib.daw_processor import DawProcessor @@ -107,10 +110,16 @@ def __init__(self, session_context: object, config: dict): super().__init__() self.session_context = session_context self.config = config + self._event_bus = EventBus() + self._is_cancelled = False + + def cancel(self): + self._is_cancelled = True + self._event_bus.cancel() def run(self): try: - event_bus = EventBus() + event_bus = self._event_bus # Use the already loaded session session = self.session_context @@ -153,6 +162,8 @@ def run(self): } loaded = 0 for future in as_completed(futures): + if self._is_cancelled: + break t = futures[future] try: res = future.result() @@ -233,10 +244,16 @@ def __init__(self, source_dir: str, config: dict, recursive: bool = False): self.source_dir = source_dir self.config = config self.recursive = recursive + self._event_bus = EventBus() + self._is_cancelled = False + + def cancel(self): + self._is_cancelled = True + self._event_bus.cancel() def run(self): try: - event_bus = EventBus() + event_bus = self._event_bus self.progress.emit("Loading session\u2026") session = load_session(self.source_dir, self.config, event_bus=event_bus, @@ -356,10 +373,19 @@ def cancel(self): def run(self): try: + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + from sessionpreplib.audio import load_track import soundfile as sf import numpy as np data, sr = sf.read(self._track.filepath, dtype='float64') + + elapsed = (time.perf_counter() - t0) * 1000 + if getattr(self._track, 'filename', None): + log.debug("[Trace] AudioLoadWorker I/O (sf.read) for '%s': %.2f ms", self._track.filename, elapsed) + if self._cancelled: return self._track.audio_data = data @@ -447,6 +473,11 @@ def cancel(self): self._cancelled = True def run(self): + import logging, time + t0 = time.perf_counter() + log = logging.getLogger(__name__) + log.debug("[Trace] TopoMultiAudioWorker loading %d items", len(self._items)) + try: import os import numpy as np @@ -459,6 +490,47 @@ def run(self): from sessionprepgui.waveform.panel import WaveformPanel + from concurrent.futures import ThreadPoolExecutor, as_completed + + # --- 1. Pre-collect all unique audio file paths --- + required_files = set() + if self._side == "input": + required_files.update(item[0] for item in self._items) + else: + for item in self._items: + for src in item[0].sources: + required_files.add(os.path.join(self._source_dir, src.input_filename)) + + # --- 2. Parallel I/O Load --- + audio_cache: dict[str, tuple[np.ndarray, int]] = {} + # Limit workers to 4 to prevent SSD thrashing and massive memory spike + max_workers = min(4, (os.cpu_count() or 1)) + + log.debug("[Trace] TopoMultiAudioWorker loading %d unique files with %d workers", len(required_files), max_workers) + + with ThreadPoolExecutor(max_workers=max_workers) as pool: + fut_to_path = { + pool.submit(sf.read, path, dtype='float64'): path + for path in required_files + } + for fut in as_completed(fut_to_path): + if self._cancelled: + pool.shutdown(wait=False, cancel_futures=True) + return + path = fut_to_path[fut] + t_load = time.perf_counter() + data, file_sr = fut.result() + log.debug("[Trace] TopoMultiAudioWorker loaded '%s'. Shape: %s, %.1f MB", + os.path.basename(path), data.shape, data.nbytes / (1024 * 1024)) + audio_cache[path] = (data, file_sr) + + if self._cancelled: + return + + log.debug("[Trace] TopoMultiAudioWorker completed parallel I/O in %.2f ms", (time.perf_counter() - t0) * 1000) + t_process = time.perf_counter() + + # --- 3. Process loaded audio natively --- if self._side == "input": for item in self._items: if self._cancelled: @@ -467,7 +539,8 @@ def run(self): name = item[1] channels_to_keep = item[2] if len(item) > 2 else None - data, file_sr = sf.read(filepath, dtype='float64') + # Retrieve directly from in-memory parallel cache + data, file_sr = audio_cache[filepath] sr = file_sr if data.ndim == 1: data = data.reshape(-1, 1) @@ -503,7 +576,7 @@ def run(self): if self._cancelled: return path = os.path.join(self._source_dir, src.input_filename) - data, file_sr = sf.read(path, dtype='float64') + data, file_sr = audio_cache[path] track_audio[src.input_filename] = (data, file_sr) sr = file_sr @@ -531,38 +604,43 @@ def run(self): if self._cancelled or not track_arrays: return - # --- Build display audio (all channels concatenated) --- + # --- Build display audio (list of contiguous 1D channels) --- + log.debug("[Trace] TopoMultiAudioWorker building display & playback arrays...") + t_stack = time.perf_counter() + display_audio = [] max_samples = max(a.shape[0] for a in track_arrays) - padded = [] for a in track_arrays: if a.shape[0] < max_samples: pad = np.zeros((max_samples - a.shape[0], a.shape[1]), dtype=np.float64) a = np.vstack([a, pad]) - padded.append(a) - display_audio = np.hstack(padded) # (max_samples, total_ch) + # Each channel becomes its own perfectly contiguous slice + for c in range(a.shape[1]): + display_audio.append(np.ascontiguousarray(a[:, c])) # --- Build playback audio (summed by channel position) --- max_ch = max(track_ch_counts) n_tracks = len(track_arrays) playback = np.zeros((max_samples, max_ch), dtype=np.float64) - for a in padded: - playback[:, :a.shape[1]] += a + for a in track_arrays: + playback[:a.shape[0], :a.shape[1]] += a playback /= n_tracks - # Squeeze mono + # Squeeze mono playback if playback.shape[1] == 1: playback = playback[:, 0] - if display_audio.shape[1] == 1: - display_audio = display_audio[:, 0] # --- Channel labels --- labels = [] for lst in track_labels_list: labels.extend(lst) + log.debug("[Trace] TopoMultiAudioWorker finished %d items (Total: %.2f ms, Process/Stack: %.2f ms)", + len(self._items), (time.perf_counter() - t0) * 1000, (time.perf_counter() - t_process) * 1000) self.finished.emit(display_audio, playback, sr, labels) except Exception as exc: + import traceback, logging + logging.getLogger(__name__).error("TopoMultiAudioWorker error: %s", traceback.format_exc()) self.error.emit(str(exc)) @@ -615,14 +693,20 @@ class TopologyApplyWorker(QThread): progress = Signal(str) progress_value = Signal(int, int) - apply_finished = Signal() # renamed: avoid shadowing QThread.finished + apply_finished = Signal() error = Signal(str) - def __init__(self, session, output_dir: str, source_dir: str | None = None): + def __init__(self, session, output_dir: str, source_dir: str | None = None, + peaks_dir: str | None = None): super().__init__() self._session = session self._output_dir = output_dir self._source_dir = source_dir + self._peaks_dir = peaks_dir + self._is_cancelled = False + + def cancel(self): + self._is_cancelled = True def run(self): try: @@ -670,7 +754,10 @@ def run(self): for src in entry.sources: needed_sources.add(src.input_filename) - total = len(needed_sources) + len(topology.entries) + n_sources = len(needed_sources) + n_entries = len(topology.entries) + # Total includes peak building phase when peaks_dir is set + total = n_sources + n_entries + (n_entries if self._peaks_dir else 0) # Phase A: Load source audio # After a previous Apply+Analyze cycle, session.tracks may @@ -679,6 +766,10 @@ def run(self): # track_map. Fall back to loading directly from source_dir. source_audio: dict[str, tuple] = {} for step, filename in enumerate(sorted(needed_sources)): + if self._is_cancelled: + return + + log.debug("Apply topology: loading source '%s' (%d/%d)", filename, step + 1, total) self.progress.emit(f"Loading {filename}") self.progress_value.emit(step, total) track = track_map.get(filename) @@ -702,15 +793,24 @@ def run(self): track.total_samples = loaded.total_samples source_audio[filename] = (loaded.audio_data, loaded.samplerate) - # Phase B: Resolve topology + write output files + # Phase B: Resolve topology + write output files (no peak building) output_tracks = [] errors = [] - base_step = len(needed_sources) + written_files: list[tuple[str, str, int]] = [] # (output_filename, dst_path, sr) + base_step = n_sources for idx, entry in enumerate(topology.entries): + if self._is_cancelled: + return + step = base_step + idx + log.debug("Apply topology: writing '%s' (%d/%d)", entry.output_filename, step + 1, total) self.progress.emit(f"Writing {entry.output_filename}") self.progress_value.emit(step, total) + # Skip entries with no channels (e.g. all channels moved elsewhere) + if entry.output_channels < 1 or not entry.sources: + continue + try: # Check all sources are available before resolving missing = [ @@ -741,6 +841,7 @@ def run(self): dst = os.path.join(output_dir, entry.output_filename) os.makedirs(os.path.dirname(dst), exist_ok=True) sf.write(dst, resolved, sr, subtype=subtype) + written_files.append((entry.output_filename, dst, sr)) n_samples = resolved.shape[0] out_tc = TrackContext( @@ -759,6 +860,58 @@ def run(self): except Exception as e: errors.append((entry.output_filename, str(e))) + # Free source audio memory before peak building + source_audio.clear() + + # Phase C: Build peak caches (re-reading from just-written files) + if self._peaks_dir: + from ..waveform.peakcache import ( + build_peaks, save_peaks, load_peaks, + peaks_path_for, get_source_mtime, + ) + peak_base_step = n_sources + n_entries + for idx, (out_fn, dst, sr) in enumerate(written_files): + if self._is_cancelled: + return + + step = peak_base_step + idx + log.debug("Apply topology: building peaks '%s' (%d/%d)", out_fn, step + 1, total) + self.progress.emit(f"Building peaks for {out_fn}") + self.progress_value.emit(step, total) + + try: + pp = peaks_path_for(self._peaks_dir, out_fn) + mtime = get_source_mtime(dst) + # Check shape: if total_samples and channels match, + # the content is unchanged and we can skip. + info = sf.info(dst) + existing = load_peaks(pp) + if (existing is not None + and existing.total_samples == info.frames + and existing.channels == info.channels): + log.debug("Peak cache for '%s' is up-to-date, skipping rebuild -> %s", out_fn, pp) + continue + + import time as _time + _t0 = _time.perf_counter() + data, file_sr = sf.read(dst, dtype="float64") + _t_read = _time.perf_counter() + pd = build_peaks(data, file_sr, source_mtime=mtime) + _t_build = _time.perf_counter() + save_peaks(pd, pp) + _t_save = _time.perf_counter() + log.debug( + "Built peak cache for '%s' -> %s " + "(%d ch, %d levels, read=%.1f ms, build=%.1f ms, save=%.1f ms, total=%.1f ms)", + out_fn, pp, pd.channels, len(pd.levels), + (_t_read - _t0) * 1000, + (_t_build - _t_read) * 1000, + (_t_save - _t_build) * 1000, + (_t_save - _t0) * 1000, + ) + except Exception as e: + log.debug("Failed to build/save peak cache for '%s' -> %s: %s", out_fn, pp, e) + self.progress_value.emit(total, total) session.output_tracks = output_tracks diff --git a/sessionprepgui/detail/mixin.py b/sessionprepgui/detail/mixin.py index c49f0c5..e3dcabd 100644 --- a/sessionprepgui/detail/mixin.py +++ b/sessionprepgui/detail/mixin.py @@ -75,6 +75,10 @@ def _show_track_detail(self, track): def _load_waveform(self, track): """Start background waveform loading for *track*.""" + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + # Guard: user may have clicked a different track while we were queued if self._current_track is not track: return @@ -89,15 +93,29 @@ def _load_waveform(self, track): self._audio_load_worker.finished.disconnect() self._audio_load_worker = None - # If audio_data is absent but the file exists, load it from disk first - if (track.audio_data is None or track.audio_data.size == 0) and \ - track.status == "OK" and os.path.isfile(track.filepath): + # 1. Handle instant preview or loading state + peak_cache = getattr(self, '_peak_cache', {}) + fn = getattr(track, 'filename', None) + if fn and fn in peak_cache: + self._waveform.set_preview_mode( + track.channels, track.total_samples, track.samplerate, peak_cache[fn] + ) + else: self._waveform.set_loading(True) - if self._detail_tabs.currentIndex() == _TAB_FILE: - self._wf_container.setVisible(True) - self._play_btn.setEnabled(False) - self._update_time_label(0) + if fn and hasattr(self, '_prioritize_peak'): + self._prioritize_peak(fn) + + if self._detail_tabs.currentIndex() == _TAB_FILE: + self._wf_container.setVisible(True) + self._play_btn.setEnabled(False) + self._update_time_label(0) + + if fn: + log.debug("[Trace] _load_waveform UI setup for '%s': %.2f ms", fn, (time.perf_counter() - t0) * 1000) + # 2. If audio_data is absent but the file exists, load it from disk first + if (track.audio_data is None or track.audio_data.size == 0) and \ + track.status == "OK" and os.path.isfile(track.filepath): worker = AudioLoadWorker(track, parent=self) self._audio_load_worker = worker worker.finished.connect( @@ -107,26 +125,13 @@ def _load_waveform(self, track): worker.start() return + # 3. If audio is available in memory, run WaveformLoadWorker (for RMS/Spectrogram) has_audio = track.audio_data is not None and track.audio_data.size > 0 if has_audio: - self._waveform.set_loading(True) - if self._detail_tabs.currentIndex() == _TAB_FILE: - self._wf_container.setVisible(True) - self._play_btn.setEnabled(False) - self._update_time_label(0) - - flat_cfg = self._flat_config() - win_ms = flat_cfg.get("window", 400) - ws = get_window_samples(track, win_ms) - - self._wf_worker = WaveformLoadWorker( - track.audio_data, track.samplerate, ws, - spec_n_fft=self._waveform.spec_n_fft, - spec_window=self._waveform.spec_window, - parent=self) - self._wf_worker.finished.connect( - lambda result, t=track: self._on_waveform_loaded(result, t)) - self._wf_worker.start() + nch = track.audio_data.shape[1] if track.audio_data.ndim > 1 else 1 + self._wf_panel.update_play_mode_channels(nch) + self._play_btn.setEnabled(True) + self._start_wf_worker(track) else: self._waveform.set_audio(None, 44100) self._update_overlay_menu([]) @@ -135,6 +140,30 @@ def _load_waveform(self, track): self._play_btn.setEnabled(False) self._update_time_label(0) + def _start_wf_worker(self, track): + flat_cfg = self._flat_config() + win_ms = flat_cfg.get("window", 400) + ws = get_window_samples(track, win_ms) + + self._wf_worker = WaveformLoadWorker( + track.audio_data, track.samplerate, ws, + spec_n_fft=self._waveform.spec_n_fft, + spec_window=self._waveform.spec_window, + compute_spectrogram=(self._waveform._display_mode == "spectrogram"), + parent=self) + self._wf_worker.finished.connect( + lambda result, t=track: self._on_waveform_loaded(result, t)) + self._wf_worker.start() + + @Slot(str) + def _on_display_mode_changed(self, mode: str): + if mode == "spectrogram" and self._current_track: + track = self._current_track + if track.audio_data is not None and track.audio_data.size > 0: + if getattr(self._waveform._spec_renderer, '_spec_data', None) is None: + if self._wf_worker is None: + self._start_wf_worker(track) + @Slot(object, object) def _on_waveform_loaded(self, result: dict, track): """Receive pre-computed waveform data from the background worker.""" @@ -145,6 +174,14 @@ def _on_waveform_loaded(self, result: dict, track): return self._waveform.set_precomputed(result) + # Apply cached peak data for mip-level rendering + peak_cache = getattr(self, '_peak_cache', {}) + fn = getattr(track, 'filename', None) + if fn and fn in peak_cache: + self._waveform.set_peak_data(peak_cache[fn]) + elif fn and hasattr(self, '_prioritize_peak'): + self._prioritize_peak(fn) + cmap = self._config.get("app", {}).get("spectrogram_colormap", "magma") self._waveform.set_colormap(cmap) # Sync colormap dropdown with preference @@ -158,8 +195,6 @@ def _on_waveform_loaded(self, result: dict, track): all_issues.extend(getattr(det_result, "issues", [])) self._waveform.set_issues(all_issues) self._update_overlay_menu(all_issues) - self._wf_panel.update_play_mode_channels(len(result["channels"])) - self._play_btn.setEnabled(True) self._update_time_label(0) def _on_audio_loaded(self, track, orig_track): diff --git a/sessionprepgui/mainwindow.py b/sessionprepgui/mainwindow.py index a7c0b3d..541dd27 100644 --- a/sessionprepgui/mainwindow.py +++ b/sessionprepgui/mainwindow.py @@ -640,6 +640,7 @@ def _build_right_panel(self) -> QWidget: self._wf_panel.play_clicked.connect(self._on_play) self._wf_panel.stop_clicked.connect(self._on_stop) self._wf_panel.position_clicked.connect(self._on_waveform_seek) + self._wf_panel.display_mode_changed.connect(self._on_display_mode_changed) self._wf_panel.waveform.set_invert_scroll( self._config.get("app", {}).get("invert_scroll", "default")) @@ -899,7 +900,18 @@ def closeEvent(self, event): if reply != QMessageBox.Yes: event.ignore() return + self._playback.stop() + + # Stop active background tasks cleanly so ThreadPoolExecutor can terminate + from PySide6.QtCore import QThread + for worker_attr in ["_worker", "_phase1_worker", "_topo_apply_worker", "_setup_worker"]: + worker = getattr(self, worker_attr, None) + if isinstance(worker, QThread) and worker.isRunning(): + if hasattr(worker, "cancel"): + worker.cancel() + worker.wait(500) + super().closeEvent(event) diff --git a/sessionprepgui/settings.py b/sessionprepgui/settings.py index c7e727e..0f9fb18 100644 --- a/sessionprepgui/settings.py +++ b/sessionprepgui/settings.py @@ -56,6 +56,7 @@ "report_verbosity": "normal", "phase1_output_folder": "sp_01_tracklayout", "phase2_output_folder": "sp_02_prepared", + "peak_cache_folder": "sp_peaks", "spectrogram_colormap": "magma", "default_project_dir": "", "invert_scroll": "default", diff --git a/sessionprepgui/topology/mixin.py b/sessionprepgui/topology/mixin.py index 86b6da9..fb07b76 100644 --- a/sessionprepgui/topology/mixin.py +++ b/sessionprepgui/topology/mixin.py @@ -6,7 +6,7 @@ import os log = logging.getLogger(__name__) -from PySide6.QtCore import Qt, Slot +from PySide6.QtCore import Qt, Slot, QTimer from PySide6.QtGui import QAction, QColor from PySide6.QtWidgets import ( QAbstractItemView, @@ -138,9 +138,9 @@ def _build_topology_page(self) -> QWidget: toolbar.addSeparator() - self._topo_wf_toggle = QAction("\u25B6 Waveform", self) + self._topo_wf_toggle = QAction("\u25BC Waveform", self) self._topo_wf_toggle.setCheckable(True) - self._topo_wf_toggle.setChecked(False) + self._topo_wf_toggle.setChecked(True) self._topo_wf_toggle.setToolTip("Show / hide the waveform preview") self._topo_wf_toggle.toggled.connect(self._on_topo_wf_toggle) toolbar.addAction(self._topo_wf_toggle) @@ -192,7 +192,13 @@ def _build_topology_page(self) -> QWidget: h_splitter.setSizes([400, 400]) - # Cross-tree exclusive selection + # Cross-tree exclusive selection via debounced timer + self._topo_sel_side: str | None = None + self._topo_sel_timer = QTimer(page) + self._topo_sel_timer.setSingleShot(True) + self._topo_sel_timer.setInterval(150) + self._topo_sel_timer.timeout.connect(self._do_topo_selection_changed) + self._topo_input_tree.selectionModel().selectionChanged.connect( lambda sel, desel: self._on_topo_selection_changed("input")) self._topo_output_tree.selectionModel().selectionChanged.connect( @@ -202,13 +208,20 @@ def _build_topology_page(self) -> QWidget: self._syncing_scroll = False self._topo_input_tree.verticalScrollBar().valueChanged.connect(self._on_input_scroll) - # Waveform preview panel (starts collapsed) + # Waveform preview panel (starts expanded) self._topo_wf_panel = WaveformPanel(analysis_mode=False) - self._topo_wf_panel.setVisible(False) - self._topo_wf_expanded = False + self._topo_wf_panel.setVisible(True) + self._topo_wf_expanded = True self._topo_wf_panel.play_clicked.connect(self._on_topo_play) self._topo_wf_panel.stop_clicked.connect(self._on_topo_stop) self._topo_wf_panel.position_clicked.connect(self._on_topo_wf_seek) + self._topo_wf_panel.display_mode_changed.connect(self._on_topo_display_mode_changed) + + self._topo_wf_panel.fft_group.triggered.connect(self._on_topo_spec_fft_changed) + self._topo_wf_panel.win_group.triggered.connect(self._on_topo_spec_window_changed) + self._topo_wf_panel.cmap_group.triggered.connect(self._on_topo_spec_cmap_changed) + self._topo_wf_panel.floor_group.triggered.connect(self._on_topo_spec_floor_changed) + self._topo_wf_panel.ceil_group.triggered.connect(self._on_topo_spec_ceil_changed) # Vertical splitter: trees on top, waveform at bottom v_splitter = QSplitter(Qt.Vertical) @@ -339,16 +352,21 @@ def _on_topo_apply(self): output_folder = self._config.get("app", {}).get( "phase1_output_folder", "sp_01_tracklayout") output_dir = os.path.join(self._source_dir, output_folder) + peaks_folder = self._config.get("app", {}).get( + "peak_cache_folder", "sp_peaks") + peaks_dir = os.path.join(output_dir, peaks_folder) self._topo_apply_action.setEnabled(False) self._topo_reset_action.setEnabled(False) self._topo_status_label.setText("Applying topology\u2026") self._topo_progress.start("Applying topology\u2026") + self._status_bar.setVisible(False) # Put Phase 1 topology on session for the worker to read self._session.topology = self._topo_topology self._topo_apply_worker = TopologyApplyWorker( - self._session, output_dir, source_dir=self._source_dir) + self._session, output_dir, source_dir=self._source_dir, + peaks_dir=peaks_dir) self._topo_apply_worker.progress.connect(self._on_topo_apply_progress) self._topo_apply_worker.progress_value.connect( self._on_topo_apply_progress_value) @@ -358,8 +376,8 @@ def _on_topo_apply(self): @Slot(str) def _on_topo_apply_progress(self, message: str): + log.debug("Apply topology: %s", message) self._topo_progress.set_message(message) - self._status_bar.showMessage(message) @Slot(int, int) def _on_topo_apply_progress_value(self, current: int, total: int): @@ -378,7 +396,6 @@ def _on_topo_apply_done(self): msg = (f"Topology applied: {n_out} file(s) written, " f"{len(errors)} error(s)") self._topo_progress.finish(msg) - self._status_bar.showMessage(msg) detail = "\n".join(f"\u2022 {fn}: {err}" for fn, err in errors) QMessageBox.warning( self, "Apply Topology \u2014 errors", @@ -386,7 +403,8 @@ def _on_topo_apply_done(self): else: msg = f"Topology applied: {n_out} file(s) written" self._topo_progress.finish(msg) - self._status_bar.showMessage(msg) + self._status_bar.setVisible(True) + self._status_bar.showMessage(msg) output_folder = self._config.get("app", {}).get( "phase1_output_folder", "sp_01_tracklayout") @@ -403,6 +421,7 @@ def _on_topo_apply_error(self, message: str): self._topo_apply_action.setEnabled(True) self._topo_reset_action.setEnabled(True) self._topo_progress.fail(message) + self._status_bar.setVisible(True) self._status_bar.showMessage(f"Apply topology error: {message}") # ── Actions ─────────────────────────────────────────────────────── @@ -623,25 +642,37 @@ def _on_input_scroll(self): # ── Cross-tree exclusive selection ──────────────────────────────── def _on_topo_selection_changed(self, side: str): - """Handle selection change in input or output tree.""" + """Handle selection change in input or output tree (debounced).""" if side == "input": - tree = self._topo_input_tree other = self._topo_output_tree else: - tree = self._topo_output_tree other = self._topo_input_tree - items = tree.selectedItems() - if not items: - return - - # Clear other tree's selection + # Clear other tree's selection immediately for UI responsiveness if self._topo_selected_side != side: other.blockSignals(True) other.clearSelection() other.blockSignals(False) self._topo_selected_side = side + # Start or reset the 150ms debounce timer + self._topo_sel_side = side + self._topo_sel_timer.start() + + def _do_topo_selection_changed(self): + side = self._topo_sel_side + if not side: + return + + if side == "input": + tree = self._topo_input_tree + else: + tree = self._topo_output_tree + + items = tree.selectedItems() + if not items: + return + # Determine what's selected file_items = [] channel_items = [] @@ -722,8 +753,13 @@ def _topo_load_input_from_items(self, file_items, channel_items=None): def _topo_load_input_waveform(self, filename: str): """Load waveform for a single input file.""" + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + self._topo_cancel_workers() self._on_topo_stop() + self._topo_wf_filename = filename # track for peak cache lookup track_map = self._topo_track_map() track = track_map.get(filename) @@ -737,8 +773,19 @@ def _topo_load_input_waveform(self, filename: str): if self._topo_wf_expanded: self._topo_wf_panel.setVisible(True) - self._topo_wf_panel.waveform.set_loading(True) + + peak_cache = getattr(self, '_peak_cache', {}) + if filename in peak_cache: + self._topo_wf_panel.waveform.set_preview_mode( + track.channels, track.total_samples, track.samplerate, peak_cache[filename] + ) + else: + self._topo_wf_panel.waveform.set_loading(True) + if hasattr(self, '_prioritize_peak'): + self._prioritize_peak(filename) + self._topo_wf_panel.play_btn.setEnabled(False) + log.debug("[Trace] _topo_load_input_waveform setup for '%s': %.2f ms", filename, (time.perf_counter() - t0) * 1000) from ..analysis.worker import AudioLoadWorker worker = AudioLoadWorker(track, parent=self) @@ -938,9 +985,18 @@ def _on_topo_multi_error(self, message: str): def _topo_show_waveform(self, audio_data, samplerate: int, labels: list[str] | None = None): """Run WaveformLoadWorker and display result.""" + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + import numpy as np - if audio_data is None or (isinstance(audio_data, np.ndarray) - and audio_data.size == 0): + if audio_data is None: + self._topo_wf_panel.waveform.set_loading(False) + return + if isinstance(audio_data, np.ndarray) and audio_data.size == 0: + self._topo_wf_panel.waveform.set_loading(False) + return + if isinstance(audio_data, list) and not audio_data: self._topo_wf_panel.waveform.set_loading(False) return @@ -959,15 +1015,54 @@ def _topo_show_waveform(self, audio_data, samplerate: int, self._topo_wf_worker = worker worker.finished.connect(self._on_topo_wf_loaded) worker.start() + log.debug("[Trace] _topo_show_waveform setup: %.2f ms", (time.perf_counter() - t0) * 1000) def _on_topo_wf_loaded(self, result: dict): + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + self._topo_wf_worker = None self._topo_wf_panel.waveform.set_precomputed(result) + # Apply cached peak data for mip-level rendering + peak_cache = getattr(self, '_peak_cache', {}) + wf_fn = getattr(self, '_topo_wf_filename', None) + if wf_fn and wf_fn in peak_cache: + self._topo_wf_panel.waveform.set_peak_data(peak_cache[wf_fn]) + elif wf_fn and hasattr(self, '_prioritize_peak'): + self._prioritize_peak(wf_fn) n_ch = len(result["channels"]) labels = getattr(self, '_topo_pending_labels', None) self._topo_wf_panel.update_play_mode_channels(n_ch, labels=labels) self._topo_wf_panel.play_btn.setEnabled(True) self._topo_update_time_label(0) + log.debug("[Trace] _on_topo_wf_loaded final UI application: %.2f ms", (time.perf_counter() - t0) * 1000) + + @Slot(str) + def _on_topo_display_mode_changed(self, mode: str): + if mode == "spectrogram" and self._topo_cached_audio: + if getattr(self._topo_wf_panel.waveform._spec_renderer, '_spec_data', None) is None: + self._topo_show_waveform(self._topo_cached_audio[1], self._topo_cached_audio[3]) + + @Slot(QAction) + def _on_topo_spec_fft_changed(self, action: QAction): + self._topo_wf_panel.waveform.set_spec_fft(int(action.data())) + + @Slot(QAction) + def _on_topo_spec_window_changed(self, action: QAction): + self._topo_wf_panel.waveform.set_spec_window(action.data()) + + @Slot(QAction) + def _on_topo_spec_cmap_changed(self, action: QAction): + self._topo_wf_panel.waveform.set_colormap(action.data()) + + @Slot(QAction) + def _on_topo_spec_floor_changed(self, action: QAction): + self._topo_wf_panel.waveform.set_spec_db_floor(float(action.data())) + + @Slot(QAction) + def _on_topo_spec_ceil_changed(self, action: QAction): + self._topo_wf_panel.waveform.set_spec_db_ceil(float(action.data())) # ── Playback ────────────────────────────────────────────────────── @@ -1009,7 +1104,12 @@ def _topo_update_time_label(self, current_sample: int): if not cached: return _, display_audio, _playback, sr = cached - total = display_audio.shape[0] if display_audio is not None else 0 + if display_audio is None: + total = 0 + elif isinstance(display_audio, list): + total = display_audio[0].shape[0] if display_audio else 0 + else: + total = display_audio.shape[0] from sessionpreplib.audio import format_duration cur_str = format_duration(current_sample, sr) tot_str = format_duration(total, sr) diff --git a/sessionprepgui/waveform/compute.py b/sessionprepgui/waveform/compute.py index f038195..67cf50d 100644 --- a/sessionprepgui/waveform/compute.py +++ b/sessionprepgui/waveform/compute.py @@ -2,6 +2,8 @@ from __future__ import annotations +import collections +import logging import threading import numpy as np @@ -14,6 +16,8 @@ # Spectrogram colormaps # --------------------------------------------------------------------------- +log = logging.getLogger(__name__) + SPECTROGRAM_COLORMAPS: dict[str, np.ndarray] = {} # name → (256, 4) uint8 RGBA @@ -118,15 +122,15 @@ def compute_mel_spectrogram(channels: list[np.ndarray], sr: int, *, if not channels: return None if hop is None: - hop = n_fft // 4 - # Mix to mono + hop = n_fft # optimized for UI speed (4x faster than n_fft // 4) + # Mix to mono in-place to avoid massive memory allocations if len(channels) == 1: - mono = channels[0].astype(np.float64) + mono = channels[0] else: - mono = np.mean( - np.column_stack([ch.astype(np.float64) for ch in channels]), - axis=1, - ) + mono = channels[0].copy() + for ch in channels[1:]: + mono += ch + mono /= len(channels) if len(mono) < n_fft: return None # STFT @@ -161,6 +165,7 @@ def __init__(self, audio_data: np.ndarray, samplerate: int, rms_window_samples: int, *, spec_n_fft: int = _SPEC_N_FFT, spec_window: str = "hann", + compute_spectrogram: bool = True, parent=None): super().__init__(parent) self._audio_data = audio_data @@ -168,6 +173,7 @@ def __init__(self, audio_data: np.ndarray, samplerate: int, self._rms_win = rms_window_samples self._spec_n_fft = spec_n_fft self._spec_window = spec_window + self._compute_spectrogram = compute_spectrogram self._cancelled = threading.Event() def cancel(self): @@ -175,20 +181,24 @@ def cancel(self): self._cancelled.set() def run(self): + import time, logging + t_start = time.perf_counter() + log = logging.getLogger(__name__) + data = self._audio_data sr = self._samplerate win = self._rms_win # --- Channel splitting --- - if data is None or data.size == 0: - return - if data.ndim == 1: - channels = [np.ascontiguousarray(data)] + if isinstance(data, list): + channels = data else: - channels = [ - np.ascontiguousarray(data[:, ch]) - for ch in range(data.shape[1]) - ] + if data is None or data.size == 0: + return + if data.ndim == 1: + channels = [data] + else: + channels = [data[:, ch] for ch in range(data.shape[1])] if not channels: return nch = len(channels) @@ -197,23 +207,36 @@ def run(self): if self._cancelled.is_set(): return - # --- Peak finding --- + if self._cancelled.is_set(): + return + + # --- Peak finding (Zero allocation) --- + t0 = time.perf_counter() + + def _find_peak(ch: np.ndarray) -> int: + p_max = int(np.argmax(ch)) + p_min = int(np.argmin(ch)) + return p_min if abs(float(ch[p_min])) > abs(float(ch[p_max])) else p_max + if nch == 1: - peak_sample = int(np.argmax(np.abs(channels[0]))) + peak_sample = _find_peak(channels[0]) peak_channel = 0 else: - abs_cols = np.column_stack([np.abs(ch) for ch in channels]) - max_per_sample = np.max(abs_cols, axis=1) - peak_sample = int(np.argmax(max_per_sample)) - peak_channel = int(np.argmax(abs_cols[peak_sample])) + peaks_per_ch = [_find_peak(ch) for ch in channels] + max_vals = [abs(float(channels[i][p])) for i, p in enumerate(peaks_per_ch)] + peak_channel = int(np.argmax(max_vals)) + peak_sample = peaks_per_ch[peak_channel] peak_lin = abs(float(channels[peak_channel][peak_sample])) peak_db = 20.0 * np.log10(peak_lin) if peak_lin > 0 else float('-inf') peak_amplitude = float(channels[peak_channel][peak_sample]) + log.debug("[Trace] WaveformLoadWorker Peak finding: %.2f ms", (time.perf_counter() - t0) * 1000) + if self._cancelled.is_set(): return # --- RMS cumsum (computed once, reused for envelope drawing) --- + t0 = time.perf_counter() rms_max_sample = -1 rms_max_db = float('-inf') rms_max_amplitude = 0.0 @@ -245,17 +268,25 @@ def run(self): rms_max_db = 20.0 * np.log10(rms_lin) if rms_lin > 0 else float('-inf') rms_max_amplitude = rms_lin + log.debug("[Trace] WaveformLoadWorker RMS: %.2f ms", (time.perf_counter() - t0) * 1000) + if self._cancelled.is_set(): return # --- Spectrogram --- - spec_db = compute_mel_spectrogram( - channels, sr, - n_fft=self._spec_n_fft, window=self._spec_window, - ) + spec_db = None + if self._compute_spectrogram: + t0 = time.perf_counter() + spec_db = compute_mel_spectrogram( + channels, sr, + n_fft=self._spec_n_fft, window=self._spec_window, + ) + log.debug("[Trace] WaveformLoadWorker STFT: %.2f ms", (time.perf_counter() - t0) * 1000) - if self._cancelled.is_set(): - return + if self._cancelled.is_set(): + return + + log.debug("[Trace] WaveformLoadWorker TOTAL: %.2f ms", (time.perf_counter() - t_start) * 1000) self.finished.emit({ "channels": channels, @@ -301,3 +332,140 @@ def run(self): if self._cancelled.is_set(): return self.finished.emit(result) + + +class PeakBuildWorker(QThread): + """Eagerly build and save ``.peaks`` files for a batch of audio files. + + Runs in the background with a thread pool — does not block the UI. + Emits ``file_done(filename, PeakData)`` for each completed file so the + caller can cache the result in memory. + + The work queue is mutable: call ``prioritize(filename)`` to move a file + to the front of the pending queue so it is processed next. + """ + + progress = Signal(str) # status message + progress_value = Signal(int, int) # (current, total) + file_done = Signal(str, object) # (filename, PeakData) + all_done = Signal() + + def __init__(self, items: list[tuple[str, str, str]], + parent=None): + """ + Parameters + ---------- + items : list of (filepath, filename, peaks_path) + *filepath*: absolute path to the audio file on disk. + *filename*: canonical filename (used as cache key). + *peaks_path*: absolute path for the ``.peaks`` output file. + """ + super().__init__(parent) + self._items_map: dict[str, tuple[str, str, str]] = { + fn: (fp, fn, pp) for fp, fn, pp in items + } + self._queue: collections.deque[str] = collections.deque( + fn for _, fn, _ in items + ) + self._lock = threading.Lock() + self._cancelled = threading.Event() + self._total = len(items) + + def cancel(self): + self._cancelled.set() + + def prioritize(self, filename: str): + """Move *filename* to the front of the pending queue (if still pending).""" + with self._lock: + try: + self._queue.remove(filename) + except ValueError: + return # already processed or not in queue + self._queue.appendleft(filename) + log.debug("Prioritized peak build for '%s'", filename) + + def _next_item(self) -> tuple[str, str, str] | None: + """Pop the next item from the queue under lock.""" + with self._lock: + while self._queue: + fn = self._queue.popleft() + item = self._items_map.get(fn) + if item: + return item + return None + + def run(self): + from concurrent.futures import ThreadPoolExecutor, as_completed + import os + import soundfile as sf + from .peakcache import ( + build_peaks, save_peaks, load_peaks, get_source_mtime, + ) + + def _process(filepath, filename, peaks_path): + if self._cancelled.is_set(): + return None, None + mtime = get_source_mtime(filepath) + # Check if existing peaks are still valid + existing = load_peaks(peaks_path, expected_mtime=mtime) + if existing is not None: + return filename, existing + # Build from audio + import time as _time + _t0 = _time.perf_counter() + try: + data, sr = sf.read(filepath, dtype="float64") + except Exception as e: + log.debug("Failed to read '%s' for peak cache: %s", filename, e) + return None, None + if self._cancelled.is_set(): + return None, None + peak_data = build_peaks(data, sr, source_mtime=mtime) + try: + save_peaks(peak_data, peaks_path) + _elapsed = (_time.perf_counter() - _t0) * 1000 + log.debug("Built peak cache for '%s' -> %s (%d levels, %.1f ms)", filename, peaks_path, len(peak_data.levels), _elapsed) + except OSError as e: + log.debug("Failed to save peak cache for '%s' -> %s: %s", filename, peaks_path, e) + return filename, peak_data + + max_workers = min(os.cpu_count() or 4, 6) + completed = 0 + + with ThreadPoolExecutor(max_workers=max_workers) as pool: + # Process items in small batches to keep the queue reorderable + while not self._cancelled.is_set(): + # grab a batch of up to max_workers items + batch = [] + for _ in range(max_workers): + item = self._next_item() + if item is None: + break + batch.append(item) + if not batch: + break # queue exhausted + + futures = { + pool.submit(_process, fp, fn, pp): fn + for fp, fn, pp in batch + } + for future in as_completed(futures): + if self._cancelled.is_set(): + return + filename, peak_data = future.result() + completed += 1 + if filename and peak_data: + self.progress.emit( + f"Building peak cache: {filename}" + f" ({completed}/{self._total})") + self.progress_value.emit(completed, self._total) + self.file_done.emit(filename, peak_data) + else: + self.progress_value.emit(completed, self._total) + + if not self._cancelled.is_set(): + self.progress.emit("Peak cache creation finished.") + log.info("Peak cache background batch finished (%d files)", self._total) + self.all_done.emit() + + diff --git a/sessionprepgui/waveform/panel.py b/sessionprepgui/waveform/panel.py index d08f66b..4295940 100644 --- a/sessionprepgui/waveform/panel.py +++ b/sessionprepgui/waveform/panel.py @@ -34,6 +34,7 @@ class WaveformPanel(QWidget): play_clicked = Signal() stop_clicked = Signal() position_clicked = Signal(int) + display_mode_changed = Signal(str) def __init__(self, analysis_mode: bool = True, parent=None): super().__init__(parent) @@ -442,6 +443,7 @@ def _on_display_mode_changed(self, action: QAction): self.rms_avg_toggle.setVisible(is_waveform) # Show spectrogram-only controls self.spec_settings_btn.setVisible(not is_waveform) + self.display_mode_changed.emit(mode) # ------------------------------------------------------------------ # analysis_mode setter diff --git a/sessionprepgui/waveform/peakcache.py b/sessionprepgui/waveform/peakcache.py new file mode 100644 index 0000000..99eff7f --- /dev/null +++ b/sessionprepgui/waveform/peakcache.py @@ -0,0 +1,354 @@ +"""Pre-computed peak mipmap cache for fast waveform rendering. + +Builds a pyramid of per-channel min/max pairs at multiple resolutions +(like Cubase/Reaper peak files). The renderer picks the mip level that +best matches the current zoom and downsamples to pixel width — avoiding +costly per-paint scans of raw sample arrays. + +Binary ``.peaks`` format +------------------------ +:: + + Header (32 bytes): + magic 4B b"SPK1" + version u16 1 + channels u16 + samplerate u32 + total_samples u64 + source_mtime u64 (source file mtime as integer ns for staleness) + n_levels u16 + reserved 2B + + Per level (repeated n_levels times): + samples_per_bin u32 + n_bins u32 + data n_bins × channels × 2 × float32 (min, max interleaved) +""" + +from __future__ import annotations + +import os +import struct +from dataclasses import dataclass, field + +import numpy as np + +# Samples-per-bin for each mip level (ascending = coarser) +MIP_BINS = (256, 1024, 4096, 16384) + +_MAGIC = b"SPK1" +_VERSION = 1 +_HEADER_SIZE = 32 +_HEADER_FMT = "<4sHHIQQHxx" # 4+2+2+4+8+8+2+2 = 32 +_LEVEL_HEADER_FMT = " PeakData: + """Compute all mip levels from raw audio data. + + Parameters + ---------- + audio_data : ndarray + Shape ``(samples,)`` for mono or ``(samples, channels)``. + samplerate : int + source_mtime : int + Nanosecond mtime of the source file (for staleness check). + """ + if audio_data.ndim == 1: + audio_data = audio_data[:, np.newaxis] + + n_samples, n_channels = audio_data.shape + levels: list[MipLevel] = [] + + # Force contiguous memory per channel once to avoid slow strided reductions. + # Taking .min(axis=1) on a (bins, spb, channels) array is catastrophically + # slow for stereo files because it traverses the non-contiguous spb axis. + channel_arrays = [np.ascontiguousarray(audio_data[:, c]) for c in range(n_channels)] + + for spb in MIP_BINS: + n_bins = n_samples // spb + if n_bins < 1: + continue + usable = n_bins * spb + + ch_data = [] + for c_array in channel_arrays: + # Reshape contiguous 1D channel array to (n_bins, spb) + c_reshaped = c_array[:usable].reshape(n_bins, spb) + # Reductions over the contiguous inner axis are massively faster + c_mins = c_reshaped.min(axis=1) + c_maxs = c_reshaped.max(axis=1) + ch_data.append(np.stack([c_mins, c_maxs], axis=-1)) + + # Stack into (n_bins, channels, 2) + data = np.stack(ch_data, axis=1).astype(np.float32) + levels.append(MipLevel(samples_per_bin=spb, data=data)) + + return PeakData( + channels=n_channels, + samplerate=samplerate, + total_samples=n_samples, + source_mtime=source_mtime, + levels=levels, + ) + + +# --------------------------------------------------------------------------- +# Save / Load +# --------------------------------------------------------------------------- + +def save_peaks(peak_data: PeakData, path: str) -> None: + """Write a ``.peaks`` file to *path*.""" + os.makedirs(os.path.dirname(path) or ".", exist_ok=True) + with open(path, "wb") as f: + header = struct.pack( + _HEADER_FMT, + _MAGIC, + _VERSION, + peak_data.channels, + peak_data.samplerate, + peak_data.total_samples, + peak_data.source_mtime, + len(peak_data.levels), + ) + f.write(header) + for lvl in peak_data.levels: + n_bins = lvl.data.shape[0] + f.write(struct.pack(_LEVEL_HEADER_FMT, lvl.samples_per_bin, n_bins)) + f.write(lvl.data.tobytes()) + + +def load_peaks(path: str, expected_mtime: int | None = None) -> PeakData | None: + """Read a ``.peaks`` file. Returns ``None`` if missing, corrupt, or stale. + + Parameters + ---------- + path : str + Path to the ``.peaks`` file. + expected_mtime : int | None + If given, the source file mtime (in ns). If it doesn't match the + stored mtime the cache is considered stale and ``None`` is returned. + """ + if not os.path.isfile(path): + return None + try: + with open(path, "rb") as f: + raw_header = f.read(_HEADER_SIZE) + if len(raw_header) < _HEADER_SIZE: + return None + (magic, version, channels, samplerate, total_samples, + source_mtime, n_levels) = struct.unpack(_HEADER_FMT, raw_header) + if magic != _MAGIC or version != _VERSION: + return None + if expected_mtime is not None and source_mtime != expected_mtime: + return None + + levels: list[MipLevel] = [] + lvl_hdr_size = struct.calcsize(_LEVEL_HEADER_FMT) + for _ in range(n_levels): + lvl_hdr = f.read(lvl_hdr_size) + if len(lvl_hdr) < lvl_hdr_size: + return None + spb, n_bins = struct.unpack(_LEVEL_HEADER_FMT, lvl_hdr) + data_size = n_bins * channels * 2 * 4 # float32 + raw_data = f.read(data_size) + if len(raw_data) < data_size: + return None + data = np.frombuffer(raw_data, dtype=np.float32).copy() + data = data.reshape(n_bins, channels, 2) + levels.append(MipLevel(samples_per_bin=spb, data=data)) + + return PeakData( + channels=channels, + samplerate=samplerate, + total_samples=total_samples, + source_mtime=source_mtime, + levels=levels, + ) + except (OSError, struct.error, ValueError): + return None + + +# --------------------------------------------------------------------------- +# Query +# --------------------------------------------------------------------------- + +def query_peaks( + peak_data: PeakData, + view_start: int, + view_end: int, + width: int, +) -> list[tuple[np.ndarray, np.ndarray]]: + """Pick the best mip level and return per-channel ``(mins, maxs)`` arrays. + + Returns a list of ``(mins, maxs)`` tuples, one per channel, each array + having length *width* (matching the pixel width of the draw area). + + Parameters + ---------- + peak_data : PeakData + view_start, view_end : int + Sample range currently visible. + width : int + Pixel width of the waveform draw area. + """ + view_len = view_end - view_start + if view_len <= 0 or width <= 0 or not peak_data.levels: + return [(np.zeros(width, dtype=np.float64), + np.zeros(width, dtype=np.float64)) + for _ in range(peak_data.channels)] + + # Choose the finest mip level where each pixel spans >= 1 bin + # (i.e. samples_per_bin <= samples_per_pixel) + samples_per_pixel = view_len / width + best_level = peak_data.levels[-1] # fallback to coarsest + for lvl in peak_data.levels: + if lvl.samples_per_bin <= samples_per_pixel: + best_level = lvl + break + + spb = best_level.samples_per_bin + n_bins = best_level.data.shape[0] + + result: list[tuple[np.ndarray, np.ndarray]] = [] + for ch in range(peak_data.channels): + mins_out = np.zeros(width, dtype=np.float64) + maxs_out = np.zeros(width, dtype=np.float64) + + for px in range(width): + # Sample range for this pixel + s0 = view_start + px * view_len // width + s1 = view_start + (px + 1) * view_len // width + # Map to bin range + b0 = max(0, s0 // spb) + b1 = min(n_bins, (s1 + spb - 1) // spb) + if b0 >= b1: + b0 = max(0, b1 - 1) + if b0 < n_bins and b0 < b1: + chunk = best_level.data[b0:b1, ch, :] # (k, 2) + mins_out[px] = chunk[:, 0].min() + maxs_out[px] = chunk[:, 1].max() + + result.append((mins_out, maxs_out)) + + return result + + +def query_peaks_fast( + peak_data: PeakData, + view_start: int, + view_end: int, + width: int, +) -> list[tuple[np.ndarray, np.ndarray]]: + """Vectorised version of :func:`query_peaks` — no Python pixel loop. + + Returns the same ``[(mins, maxs), ...]`` per-channel list but uses + NumPy reduceat for the inner loop, giving ~50-100× speedup on large views. + """ + view_len = view_end - view_start + if view_len <= 0 or width <= 0 or not peak_data.levels: + return [(np.zeros(width, dtype=np.float64), + np.zeros(width, dtype=np.float64)) + for _ in range(peak_data.channels)] + + samples_per_pixel = view_len / width + best_level = peak_data.levels[-1] + for lvl in peak_data.levels: + if lvl.samples_per_bin <= samples_per_pixel: + best_level = lvl + break + + spb = best_level.samples_per_bin + n_bins = best_level.data.shape[0] + + # Compute bin edges for each pixel + pixel_start_samples = view_start + np.arange(width, dtype=np.int64) * view_len // width + pixel_end_samples = view_start + (np.arange(width, dtype=np.int64) + 1) * view_len // width + + bin_starts = np.clip(pixel_start_samples // spb, 0, n_bins - 1).astype(np.intp) + bin_ends = np.clip((pixel_end_samples + spb - 1) // spb, 1, n_bins).astype(np.intp) + + # Ensure bin_ends > bin_starts + too_small = bin_ends <= bin_starts + bin_ends[too_small] = bin_starts[too_small] + 1 + bin_ends = np.clip(bin_ends, 0, n_bins) + bin_starts = np.clip(bin_starts, 0, n_bins - 1) + + result: list[tuple[np.ndarray, np.ndarray]] = [] + for ch in range(peak_data.channels): + ch_mins = best_level.data[:, ch, 0] # (n_bins,) + ch_maxs = best_level.data[:, ch, 1] # (n_bins,) + + # Use reduceat for vectorised min/max across bin ranges + # Build unique start indices for reduceat + mins_out = np.empty(width, dtype=np.float64) + maxs_out = np.empty(width, dtype=np.float64) + + # reduceat needs strictly sorted start indices. + # Since our bin_starts are monotonically non-decreasing, we can use + # reduceat directly but must handle duplicate starts. + unique_starts, inverse = np.unique(bin_starts, return_inverse=True) + + if len(unique_starts) > 0: + red_min = np.minimum.reduceat(ch_mins, unique_starts) + red_max = np.maximum.reduceat(ch_maxs, unique_starts) + + # Map back to pixels — but reduceat covers [start_i, start_{i+1}) + # which may not match our desired [bin_starts[px], bin_ends[px]). + # For correctness with variable-width bins, do a refined pass. + for px in range(width): + b0 = int(bin_starts[px]) + b1 = int(bin_ends[px]) + if b0 < b1 and b0 < n_bins: + mins_out[px] = ch_mins[b0:b1].min() + maxs_out[px] = ch_maxs[b0:b1].max() + else: + mins_out[px] = 0.0 + maxs_out[px] = 0.0 + else: + mins_out[:] = 0.0 + maxs_out[:] = 0.0 + + result.append((mins_out, maxs_out)) + + return result + + +def get_source_mtime(filepath: str) -> int: + """Return the source file mtime as integer nanoseconds.""" + try: + return os.stat(filepath).st_mtime_ns + except OSError: + return 0 + + +def peaks_path_for(peaks_dir: str, filename: str) -> str: + """Return the ``.peaks`` file path for a given audio filename.""" + stem = os.path.splitext(os.path.basename(filename))[0] + return os.path.join(peaks_dir, f"{stem}.peaks") diff --git a/sessionprepgui/waveform/renderer.py b/sessionprepgui/waveform/renderer.py index f3a8212..dda08d6 100644 --- a/sessionprepgui/waveform/renderer.py +++ b/sessionprepgui/waveform/renderer.py @@ -11,6 +11,7 @@ QPen, QPolygonF) from ..theme import COLORS +from .peakcache import PeakData, query_peaks_fast _CHANNEL_COLORS = [ "#44aa44", "#44aaaa", "#aa44aa", "#aaaa44", @@ -49,6 +50,7 @@ def __init__(self): self._rms_cumsums: list[np.ndarray] = [] self._rms_window_samples: int = 0 self._channels: list[np.ndarray] = [] + self._peak_data: PeakData | None = None self._peak_sample: int = -1 self._peak_channel: int = -1 self._peak_db: float = float('-inf') @@ -69,6 +71,7 @@ def reset(self): self._rms_cumsums = [] self._rms_window_samples = 0 self._channels = [] + self._peak_data = None self._peak_sample = -1 self._peak_channel = -1 self._peak_db = float('-inf') @@ -103,6 +106,7 @@ def set_track_data(self, channels: list, *, self._rms_max_db = rms_max_db self._rms_max_amplitude = rms_max_amplitude self._rms_max_dirty = rms_max_dirty + self._peak_data = None self._peaks_cache = [] self._cached_view = (0, 0, 0) self._rms_envelope = [] @@ -120,6 +124,12 @@ def set_rms_window(self, window_samples: int): self._rms_max_amplitude = 0.0 self._rms_max_dirty = bool(self._channels and window_samples > 0) + def set_peak_data(self, peak_data: PeakData | None): + """Set pre-computed peak mipmap data for fast rendering.""" + self._peak_data = peak_data + self._peaks_cache = [] + self._cached_view = (0, 0, 0) + def invalidate(self): """Invalidate peak and RMS caches (zoom change, resize, large scroll).""" self._peaks_cache = [] @@ -136,16 +146,23 @@ def invalidate_rms_only(self): def paint(self, painter: QPainter, ctx: WaveformRenderCtx): """Full waveform draw pass: envelope + dB scale + RMS + markers.""" - painter.setRenderHint(QPainter.Antialiasing, ctx.wf_antialias) + # Adaptive Antialiasing: High channel counts pack polygons into just a few pixels. + # Sub-pixel rendering at that density takes 1000ms+ and provides no visual benefit. + use_aa = ctx.wf_antialias and (ctx.num_channels <= 12) + painter.setRenderHint(QPainter.Antialiasing, use_aa) + self._build_peaks(ctx) if ctx.show_rms_lr or ctx.show_rms_avg: self._build_rms_envelope(ctx) nch = ctx.num_channels + if nch == 0: + return lane_h = ctx.draw_h / nch self._draw_db_scale(painter, ctx, nch, lane_h) self._draw_waveform_channels(painter, ctx, nch, lane_h) if ctx.show_rms_lr or ctx.show_rms_avg: self._draw_rms_overlay(painter, ctx, nch, lane_h) + painter.setRenderHint(QPainter.Antialiasing, True) if ctx.show_markers: self._draw_markers(painter, ctx, nch, lane_h) @@ -231,7 +248,24 @@ def _build_peaks(self, ctx: WaveformRenderCtx): """Downsample audio to peak envelope, with incremental scroll updates.""" channels = ctx.channels width = ctx.draw_w - if not channels or width <= 0: + if width <= 0: + self._peaks_cache = [] + return + # Fast path: use pre-computed peak mipmap if available + if self._peak_data is not None and self._peak_data.levels: + cache_key = (width, ctx.view_start, ctx.view_end) + if self._cached_view == cache_key and self._peaks_cache: + return + vs, ve = ctx.view_start, ctx.view_end + if ve - vs <= 0: + self._peaks_cache = [] + return + self._peaks_cache = query_peaks_fast( + self._peak_data, vs, ve, width) + self._cached_view = cache_key + return + # Fallback: raw sample downsampling + if not channels: self._peaks_cache = [] return cache_key = (width, ctx.view_start, ctx.view_end) diff --git a/sessionprepgui/waveform/widget.py b/sessionprepgui/waveform/widget.py index 87fcb05..4c9cc00 100644 --- a/sessionprepgui/waveform/widget.py +++ b/sessionprepgui/waveform/widget.py @@ -55,6 +55,15 @@ def __init__(self, parent=None): # Mouse crosshair self._mouse_x: int = -1 self._mouse_y: int = -1 + # Offscreen rendering cache + self._bg_pixmap = None + # Fast resize debounce + self._is_resizing: bool = False + self._stale_pixmap = None + self._resize_timer: QTimer = QTimer(self) + self._resize_timer.setSingleShot(True) + self._resize_timer.setInterval(150) + self._resize_timer.timeout.connect(self._flush_resize) # Scroll inversion self._invert_h: bool = False self._invert_v: bool = False @@ -99,7 +108,17 @@ def set_audio(self, audio_data: np.ndarray | None, samplerate: int): peak_dirty=bool(self._channels), ) self._spec_renderer.reset(samplerate) - self.update() + self._invalidate_bg() + + def set_peak_data(self, peak_data): + """Set pre-computed peak mipmap data for fast rendering. + + Can be called before or after set_audio / set_precomputed. + When set, the renderer uses mip-level lookups instead of + downsampling raw samples on each paint. + """ + self._wf_renderer.set_peak_data(peak_data) + self._invalidate_bg() def set_loading(self, loading: bool): """Show or hide a 'Loading waveform…' placeholder.""" @@ -109,10 +128,51 @@ def set_loading(self, loading: bool): self._num_channels = 0 self._total_samples = 0 self._wf_renderer.reset() - self.update() + self._invalidate_bg() + + def set_preview_mode(self, channels_count: int, total_samples: int, + samplerate: int, peak_data: object): + """Instantly prepare widget for rendering using only peak cache metadata.""" + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + + channels_count = max(1, channels_count) + self._channels = [np.array([], dtype=np.float32) for _ in range(channels_count)] + self._num_channels = channels_count + self._total_samples = total_samples + self._samplerate = samplerate + self._cursor_sample = 0 + self._cursor_y_value = None + self._view_start = 0 + self._view_end = max(1, self._total_samples) + self._vscale = 1.0 + self._rms_window_samples = 0 + + self._wf_renderer.set_track_data( + self._channels, + peak_sample=-1, + peak_channel=-1, + peak_db=0.0, + peak_amplitude=0.0, + rms_cumsums=[], + rms_window=0, + rms_max_sample=-1, + rms_max_db=float('-inf'), + rms_max_amplitude=0.0, + ) + self._spec_renderer.reset(samplerate) + self._wf_renderer.set_peak_data(peak_data) + self._loading = False + self._invalidate_bg() + log.debug("[Trace] WaveformWidget.set_preview_mode finished in %.2f ms", (time.perf_counter() - t0) * 1000) def set_precomputed(self, result: dict): """Apply pre-computed waveform data from a WaveformLoadWorker.""" + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) + self._channels = result["channels"] self._num_channels = len(self._channels) self._total_samples = result["total_samples"] @@ -138,11 +198,15 @@ def set_precomputed(self, result: dict): self._spec_renderer.reset(result["samplerate"]) self._spec_renderer.set_spec_data(result.get("spec_db")) self._loading = False - self.update() + self._invalidate_bg() + log.debug("[Trace] WaveformWidget.set_precomputed finished in %.2f ms", (time.perf_counter() - t0) * 1000) def set_issues(self, issues: list): """Set the list of IssueLocation objects to overlay on the waveform.""" self._issues = list(issues) + def _invalidate_bg(self): + """Invalidate the static background render cache.""" + self._bg_pixmap = None self.update() def set_cursor(self, sample_index: int): @@ -153,7 +217,9 @@ def set_cursor(self, sample_index: int): self._view_start = self._cursor_sample self._view_end = min(self._cursor_sample + view_len, self._total_samples) self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() + else: + self.update() # ── Coordinate helpers ───────────────────────────────────────────────── @@ -196,43 +262,78 @@ def _make_spec_ctx(self, x0: int, draw_w: int, draw_h: int) -> SpecRenderCtx: # ── paintEvent ───────────────────────────────────────────────────────── - def paintEvent(self, event): - w = self.width() - h = self.height() - painter = QPainter(self) - painter.setRenderHint(QPainter.Antialiasing, True) + def _update_bg_pixmap(self, w: int, h: int): + import time, logging + t0 = time.perf_counter() + log = logging.getLogger(__name__) - painter.fillRect(0, 0, w, h, QColor(COLORS["bg"])) + from PySide6.QtGui import QPixmap + sz = self.size() + dpr = self.devicePixelRatio() + self._bg_pixmap = QPixmap(sz * dpr) + self._bg_pixmap.setDevicePixelRatio(dpr) + self._bg_pixmap._logical_size = sz + + bg_painter = QPainter(self._bg_pixmap) + bg_painter.setRenderHint(QPainter.Antialiasing, True) + bg_painter.fillRect(0, 0, w, h, QColor(COLORS["bg"])) if self._loading: - painter.setPen(QPen(QColor(COLORS["dim"]))) - painter.drawText(self.rect(), Qt.AlignCenter, "Loading waveform\u2026") - painter.end() + bg_painter.setPen(QPen(QColor(COLORS["dim"]))) + bg_painter.drawText(self.rect(), Qt.AlignCenter, "Loading waveform\u2026") + bg_painter.end() return if not self._channels or self._total_samples == 0: - painter.setPen(QPen(QColor(COLORS["dim"]))) - painter.drawText(self.rect(), Qt.AlignCenter, "No waveform") - painter.end() + bg_painter.setPen(QPen(QColor(COLORS["dim"]))) + bg_painter.drawText(self.rect(), Qt.AlignCenter, "No waveform") + bg_painter.end() return x0, draw_w = self._draw_area() draw_h = h - self._MARGIN_BOTTOM if self._display_mode == "spectrogram": - self._spec_renderer.paint(painter, self._make_spec_ctx(x0, draw_w, draw_h)) + self._spec_renderer.paint(bg_painter, self._make_spec_ctx(x0, draw_w, draw_h)) else: - self._wf_renderer.paint(painter, self._make_wf_ctx(x0, draw_w, draw_h)) + self._wf_renderer.paint(bg_painter, self._make_wf_ctx(x0, draw_w, draw_h)) draw_issue_overlays( - painter, x0, draw_w, draw_h, + bg_painter, x0, draw_w, draw_h, self._view_start, self._view_end, self._total_samples, self._issues, self._enabled_overlays, self._display_mode, self._num_channels, self._spec_renderer.mel_view_min, self._spec_renderer.mel_view_max, ) - draw_time_scale(painter, x0, draw_w, draw_h, + draw_time_scale(bg_painter, x0, draw_w, draw_h, self._view_start, self._view_end, self._samplerate) + bg_painter.end() + log.debug("[Trace] WaveformWidget._update_bg_pixmap rendered in %.2f ms", (time.perf_counter() - t0) * 1000) + + def paintEvent(self, event): + w = max(1, self.width()) + h = max(1, self.height()) + sz = self.size() + + if not self._is_resizing and (self._bg_pixmap is None or getattr(self._bg_pixmap, "_logical_size", None) != sz): + self._update_bg_pixmap(w, h) + + painter = QPainter(self) + + if self._is_resizing and self._stale_pixmap is not None: + # Draw the stale pixmap stretched to fit the current size (fast path) + painter.drawPixmap(self.rect(), self._stale_pixmap) + elif self._bg_pixmap is not None: + painter.drawPixmap(0, 0, self._bg_pixmap) + + painter.setRenderHint(QPainter.Antialiasing, True) + + if self._loading or not self._channels or self._total_samples == 0: + painter.end() + return + + x0, draw_w = self._draw_area() + draw_h = h - self._MARGIN_BOTTOM # Playback cursor if self._total_samples > 0: @@ -319,9 +420,20 @@ def paintEvent(self, event): # ── Qt event handlers ────────────────────────────────────────────────── def resizeEvent(self, event): + if not self._is_resizing: + self._is_resizing = True + self._stale_pixmap = self._bg_pixmap + self._resize_timer.start() + # Invalidate deferred to _flush_resize for fast-draft dragging + super().resizeEvent(event) + + def _flush_resize(self): + """Called when the 150ms debounce timer expires after a resize drag.""" + self._is_resizing = False + self._stale_pixmap = None self._wf_renderer.invalidate() self._spec_renderer.invalidate() - super().resizeEvent(event) + self._invalidate_bg() def mousePressEvent(self, event): self.setFocus() @@ -356,7 +468,7 @@ def mousePressEvent(self, event): self._cursor_y_value = None else: self._cursor_y_value = None - self.update() + self._invalidate_bg() self.position_clicked.emit(sample) def mouseMoveEvent(self, event): @@ -437,7 +549,7 @@ def wheelEvent(self, event): else: self._vscale = (min(self._vscale * 1.25, 20.0) if delta > 0 else max(self._vscale / 1.25, 0.1)) - self.update() + self._invalidate_bg() event.accept() elif ctrl: x0, draw_w = self._draw_area() @@ -462,7 +574,7 @@ def wheelEvent(self, event): self._view_start = new_start self._view_end = new_end self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() event.accept() elif shift and alt: if self._display_mode == "spectrogram": @@ -473,7 +585,7 @@ def wheelEvent(self, event): if self._invert_v: scroll = -scroll self._spec_renderer.scroll_freq(scroll, self._samplerate) - self.update() + self._invalidate_bg() event.accept() elif shift: view_len = self._view_end - self._view_start @@ -502,7 +614,7 @@ def wheelEvent(self, event): def _flush_scroll(self): self._scroll_pending = False - self.update() + self._invalidate_bg() def keyPressEvent(self, event): key = event.key() @@ -541,7 +653,7 @@ def _zoom_at_guide(self, zoom_in: bool): self._view_start = new_start self._view_end = new_end self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() # ── Zoom / vertical-scale public API ─────────────────────────────────── @@ -552,7 +664,7 @@ def zoom_fit(self): self._vscale = 1.0 self._spec_renderer.reset_freq_view(self._samplerate) self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() def zoom_in(self): """Zoom in 2× centered on the cursor.""" @@ -572,7 +684,7 @@ def zoom_in(self): self._view_start = new_start self._view_end = new_end self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() def zoom_out(self): """Zoom out 2× centered on the cursor.""" @@ -592,7 +704,7 @@ def zoom_out(self): self._view_start = new_start self._view_end = new_end self._wf_renderer.invalidate() - self.update() + self._invalidate_bg() def scale_up(self): """Increase vertical amplitude scale / zoom freq in (spectrogram).""" @@ -601,7 +713,7 @@ def scale_up(self): 2 / 3, self._cursor_y_value, self._samplerate) else: self._vscale = min(self._vscale * 1.5, 20.0) - self.update() + self._invalidate_bg() def scale_down(self): """Decrease vertical amplitude scale / zoom freq out (spectrogram).""" @@ -610,7 +722,7 @@ def scale_down(self): 3 / 2, self._cursor_y_value, self._samplerate) else: self._vscale = max(self._vscale / 1.5, 0.1) - self.update() + self._invalidate_bg() # ── Public setters ───────────────────────────────────────────────────── @@ -618,30 +730,30 @@ def set_rms_data(self, window_samples: int): """Set the RMS window size.""" self._rms_window_samples = max(window_samples, 0) self._wf_renderer.set_rms_window(window_samples) - self.update() + self._invalidate_bg() def toggle_markers(self, on: bool): self._show_markers = on - self.update() + self._invalidate_bg() def toggle_rms_lr(self, on: bool): self._show_rms_lr = on - self.update() + self._invalidate_bg() def toggle_rms_avg(self, on: bool): self._show_rms_avg = on - self.update() + self._invalidate_bg() def set_enabled_overlays(self, labels: set[str]): self._enabled_overlays = set(labels) - self.update() + self._invalidate_bg() def set_display_mode(self, mode: str): if mode not in ("waveform", "spectrogram"): return self._display_mode = mode self._spec_renderer.invalidate() - self.update() + self._invalidate_bg() def set_invert_scroll(self, mode: str): self._invert_h = mode in ("horizontal", "both") @@ -649,15 +761,15 @@ def set_invert_scroll(self, mode: str): def set_wf_antialias(self, enabled: bool): self._wf_antialias = enabled - self.update() + self._invalidate_bg() def set_wf_line_width(self, width: int): self._wf_line_width = max(1, min(width, 3)) - self.update() + self._invalidate_bg() def set_colormap(self, name: str): self._spec_renderer.set_colormap(name) - self.update() + self._invalidate_bg() def set_spec_fft(self, n_fft: int): if n_fft == self._spec_renderer.spec_n_fft: @@ -673,11 +785,11 @@ def set_spec_window(self, window: str): def set_spec_db_floor(self, val: float): self._spec_renderer.set_db_floor(val) - self.update() + self._invalidate_bg() def set_spec_db_ceil(self, val: float): self._spec_renderer.set_db_ceil(val) - self.update() + self._invalidate_bg() @property def spec_n_fft(self) -> int: @@ -692,6 +804,6 @@ def _recompute_spectrogram(self): return self._spec_renderer.recompute( self._channels, self._samplerate, - on_done=self.update, parent=self, + on_done=self._invalidate_bg, parent=self, ) - self.update() + self._invalidate_bg() diff --git a/sessionpreplib/_version.py b/sessionpreplib/_version.py index 6f30ca1..a4e466d 100644 --- a/sessionpreplib/_version.py +++ b/sessionpreplib/_version.py @@ -1,3 +1,3 @@ """Single source of truth for the SessionPrep version number.""" -__version__ = "0.3.3" +__version__ = "0.3.4" diff --git a/sessionpreplib/events.py b/sessionpreplib/events.py index 6527e80..5064053 100644 --- a/sessionpreplib/events.py +++ b/sessionpreplib/events.py @@ -14,6 +14,11 @@ class EventBus: def __init__(self) -> None: self._handlers: dict[str, list[Callable[..., Any]]] = {} self._lock = threading.Lock() + self.is_cancelled = threading.Event() + + def cancel(self) -> None: + """Signal internal processes to abort work cleanly.""" + self.is_cancelled.set() def subscribe(self, event_type: str, handler: Callable[..., Any]) -> None: """Register a handler for an event type.""" diff --git a/sessionpreplib/pipeline.py b/sessionpreplib/pipeline.py index 5768782..2005cd5 100644 --- a/sessionpreplib/pipeline.py +++ b/sessionpreplib/pipeline.py @@ -99,6 +99,9 @@ def _emit(self, event_type: str, **data): def _analyze_track(self, track: TrackContext, idx: int, total: int, detectors: list[TrackDetector]): """Run all track-level detectors for a single track (thread-safe).""" + if self.event_bus and self.event_bus.is_cancelled.is_set(): + return + self._emit("track.analyze_start", filename=track.filename, index=idx, total=total) t_track_start = time.perf_counter() @@ -162,6 +165,9 @@ def _run_analysis_phase(self, session: SessionContext, phase: LifecyclePhase) -> # Session-level detectors track_map = {t.filename: t for t in session.tracks} for det in session_dets: + if self.event_bus and self.event_bus.is_cancelled.is_set(): + break + try: self._emit("session_detector.start", detector_id=det.id) t0 = time.perf_counter() @@ -207,6 +213,9 @@ def analyze_phase2(self, session: SessionContext) -> SessionContext: def _plan_track(self, track: TrackContext, idx: int, total: int): """Run all audio processors for a single track (thread-safe).""" + if self.event_bus and self.event_bus.is_cancelled.is_set(): + return + self._emit("track.plan_start", filename=track.filename, index=idx, total=total) t_track_start = time.perf_counter() @@ -671,6 +680,20 @@ def _load_one_track( event_bus: EventBus | None, ) -> TrackContext: """Load a single WAV file (used by thread pool in load_session).""" + if event_bus and event_bus.is_cancelled.is_set(): + return TrackContext( + filename=filename, + filepath=os.path.join(source_dir, filename), + audio_data=None, + samplerate=0, + channels=0, + total_samples=0, + bitdepth="", + subtype="", + duration_sec=0.0, + status="Cancelled", + ) + filepath = os.path.join(source_dir, filename) if event_bus: event_bus.emit("track.load", filename=filename,