diff --git a/.github/workflows/build-nuitka.yml b/.github/workflows/build-nuitka.yml
index 33a91f0..97b805b 100644
--- a/.github/workflows/build-nuitka.yml
+++ b/.github/workflows/build-nuitka.yml
@@ -3,6 +3,9 @@ name: Build (Nuitka)
 on:
   workflow_dispatch:
 
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
   build:
     name: ${{ matrix.name }}
@@ -31,13 +34,13 @@ jobs:
             suffix: win-arm64
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v7
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version-file: "pyproject.toml"
 
@@ -50,7 +53,7 @@ jobs:
         run: brew install ccache
 
       - name: Cache Nuitka & CCache
-        uses: actions/cache@v4
+        uses: actions/cache@v5
         with:
           path: |
             ~/.cache/Nuitka
@@ -169,11 +172,12 @@ jobs:
       # Upload
       # -----------------------------------------------------------------------
       - name: Upload Artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: sessionprep-${{ matrix.suffix }}
           path: |
             dist_nuitka/sessionprep-*.*
+            dist_nuitka/sessionprep_*.*
             !dist_nuitka/sessionprep-*.build
             !dist_nuitka/sessionprep-*.build/**
             !dist_nuitka/sessionprep-*.dist
diff --git a/.github/workflows/build-pyinstaller.yml b/.github/workflows/build-pyinstaller.yml
index 0527204..7b79669 100644
--- a/.github/workflows/build-pyinstaller.yml
+++ b/.github/workflows/build-pyinstaller.yml
@@ -3,6 +3,9 @@ name: Build (PyInstaller)
 on:
   workflow_dispatch:
 
+env:
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
 jobs:
   build:
     name: Build on ${{ matrix.os }}
@@ -13,13 +16,13 @@ jobs:
         os: [ubuntu-latest, windows-latest, macos-latest, macos-15-intel]
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v7
 
       - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version-file: "pyproject.toml"
 
@@ -101,7 +104,7 @@ jobs:
 
       - name: Upload Artifacts (Windows)
         if: runner.os == 'Windows'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: sessionprep-${{ matrix.os }}
           path: |
@@ -112,7 +115,7 @@ jobs:
 
       - name: Upload Artifacts (macOS)
         if: runner.os == 'macOS'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: sessionprep-${{ matrix.os }}
           path: dist_pyinstaller/*.dmg
@@ -120,7 +123,7 @@ jobs:
 
       - name: Upload Artifacts (Linux)
         if: runner.os == 'Linux'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v6
         with:
           name: sessionprep-${{ matrix.os }}
           path: |
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index bfc23c2..81311c1 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -431,9 +431,13 @@ sessionprepgui/                  # GUI package (PySide6)
     waveform/
         __init__.py              # Re-exports WaveformWidget, WaveformLoadWorker, SPECTROGRAM_COLORMAPS
         compute.py               # Colormaps, mel math, spectrogram computation, QThread workers
+                                 #   (WaveformLoadWorker, SpectrogramWorker, PeakBuildWorker)
+        peakcache.py             # Pre-computed peak mipmap cache: build, save, load, query
+                                 #   Binary .peaks format for fast waveform rendering
         renderer.py              # WaveformRenderer — peaks, waveform drawing, RMS overlay, dB scale, markers
         spectrogram.py           # SpectrogramRenderer — mel image, frequency scale, freq zoom/pan
         overlay.py               # Stateless overlay drawing functions (issue overlays, time scale)
+        panel.py                 # WaveformPanel — composite widget: toolbar + WaveformWidget + transport bar
         widget.py                # WaveformWidget — thin orchestrator coordinating WaveformRenderer
                                  #   and SpectrogramRenderer
 
@@ -772,7 +776,7 @@ Contains audio I/O, cached DSP helpers, and stateless DSP functions.
 **File discovery:**
 
 - `discover_track(filepath) -> TrackContext` — reads audio file metadata (channels, samplerate, bitdepth, duration) without loading audio data. Sets `filename = os.path.basename(filepath)` — callers override with relative paths when recursive scanning is active.
-- `discover_audio_files(root_dir, recursive=False, skip_folders=None) -> list[str]` — returns a sorted list of audio file paths relative to *root_dir*. When `recursive=False`, returns bare filenames (flat `os.listdir`). When `True`, walks subdirectories via `os.walk(followlinks=False)` and returns forward-slash–separated relative paths (e.g. `"drums/01_Kick.wav"`). Directories whose name appears in `skip_folders` are pruned (e.g. `{"sp_01_tracklayout", "sp_02_prepared"}`). Results sorted by `protools_sort_key`.
+- `discover_audio_files(root_dir, recursive=False, skip_folders=None) -> list[str]` — returns a sorted list of audio file paths relative to *root_dir*. When `recursive=False`, returns bare filenames (flat `os.listdir`). When `True`, walks subdirectories via `os.walk(followlinks=False)` and returns forward-slash–separated relative paths (e.g. `"drums/01_Kick.wav"`). Directories whose name appears in `skip_folders` are pruned (e.g. `{"sp_01_tracklayout", "sp_02_prepared", "sp_peaks"}`). Results sorted by `protools_sort_key`.
 
 **File I/O:**
 
@@ -1022,7 +1026,7 @@ DAWproject, +12 dB for Pro Tools). The headroom margin is configured via
 `fader_headroom_db` (default 8 dB). If the highest fader offset exceeds
 `ceiling - headroom`, all fader offsets are shifted down uniformly by the
 excess amount. The shift is stored in `ProcessorResult.data["fader_rebalance_shift"]`
-and logged via `dbg()` when `SP_DEBUG` is active.
+and logged via `dbg()` when `SP_LOG_LEVEL=DEBUG` is active.
 
 ### 7.7 Registration
 
@@ -1307,7 +1311,7 @@ the current state.
 
 ### 9.5 Profiling & Debugging
 
-When the `SP_DEBUG` environment variable is set to `1` or `true`, the pipeline
+When the `SP_LOG_LEVEL` environment variable is set to `DEBUG`, the pipeline
 emits per-component timing via `dbg()` (from `sessionprepgui/log.py`).
 Output goes to stderr with timestamps and caller class names.
 
@@ -1664,7 +1668,7 @@ group).
 | `theme.py`                | `COLORS` dict, `FILE_COLOR_*` constants, dark palette + stylesheet                                                                                                                                                                                                                                                                                                                            |
 | `helpers.py`              | `esc()`, `track_analysis_label(track, detectors=None)` (filters via `is_relevant()`), `fmt_time()`, severity maps                                                                                                                                                                                                                                                                             |
 | `widgets.py`              | `BatchEditTableWidget`, `BatchComboBox` — reusable batch-edit base classes preserving multi-row selection across cell-widget clicks (zero app imports)                                                                                                                                                                                                                                        |
-| `log.py`                  | `dbg(msg)` — lightweight debug logging to stderr, gated by `SP_DEBUG` env var. Timestamped output with caller class name. Used by `pipeline.py`, `dawproject.py`, and other modules via conditional import.                                                                                                                                                                                   |
+| `log.py`                  | `dbg(msg)` — lightweight debug logging to stderr, gated by `SP_LOG_LEVEL=DEBUG`. Timestamped output with caller class name. Used by `pipeline.py`, `dawproject.py`, and other modules via conditional import.                                                                                                                                                                                   |
 | `analysis/mixin.py`       | `AnalysisMixin` — open/save/load session, analyze, prepare, session Config tab wiring                                                                                                                                                                                                                                                                                                         |
 | `analysis/worker.py`      | QThread workers: `AnalyzeWorker` (pipeline in background, thread-safe progress, per-track signals), `BatchReanalyzeWorker` (subset re-analysis after batch overrides), `PrepareWorker` (runs `Pipeline.prepare()` in background with progress), `DawCheckWorker` (connectivity check), `DawFetchWorker` (folder fetch), `DawTransferWorker` (transfer with progress + progress_value signals) |
 | `daw/mixin.py`            | `DawMixin` — DAW processor selection, check/fetch/transfer/sync, folder tree, drag-and-drop track assignment, Track Name inline editing, duplication with `-[N]` naming                                                                                                                                                                                                                       |
@@ -1680,10 +1684,12 @@ group).
 | `tracks/groups_mixin.py`  | `GroupsMixin` — group assignment UI, color rendering in track table                                                                                                                                                                                                                                                                                                                           |
 | `tracks/table_widgets.py` | Track table widget classes (custom cell widgets, batch-edit base classes)                                                                                                                                                                                                                                                                                                                     |
 | `waveform/__init__.py`    | Re-exports `WaveformWidget`, `WaveformLoadWorker`, `SPECTROGRAM_COLORMAPS`                                                                                                                                                                                                                                                                                                                    |
-| `waveform/compute.py`     | Colormaps (magma/viridis/grayscale LUTs), mel math, spectrogram computation, `WaveformLoadWorker` QThread                                                                                                                                                                                                                                                                                     |
-| `waveform/renderer.py`    | `WaveformRenderer` — vectorised NumPy peak/RMS downsampling, waveform drawing, RMS L/R and AVG envelopes, dB scale, peak/RMS markers                                                                                                                                                                                                                                                          |
+| `waveform/compute.py`     | Colormaps (magma/viridis/grayscale LUTs), mel math, spectrogram computation, `WaveformLoadWorker` QThread, `PeakBuildWorker` (batch peak cache builder using thread pool)                                                                                                                                                                                                                      |
+| `waveform/peakcache.py`   | Pre-computed peak mipmap cache: `PeakData`, `MipLevel`, `build_peaks()`, `save_peaks()`, `load_peaks()`, `query_peaks_fast()`. Binary `.peaks` file format (`SPK1`) with staleness detection via source file mtime                                                                                                                                                                            |
+| `waveform/renderer.py`    | `WaveformRenderer` — vectorised NumPy peak/RMS downsampling, waveform drawing, RMS L/R and AVG envelopes, dB scale, peak/RMS markers. Uses mip-level peak queries when `PeakData` is available for near-instant rendering                                                                                                                                                                      |
 | `waveform/spectrogram.py` | `SpectrogramRenderer` — mel spectrogram QImage (256 mel bins via `scipy.signal.stft`), frequency scale, freq zoom/pan, background recompute worker                                                                                                                                                                                                                                            |
 | `waveform/overlay.py`     | Stateless overlay drawing functions — detector issue overlays (with optional frequency bounds), horizontal time scale                                                                                                                                                                                                                                                                         |
+| `waveform/panel.py`       | `WaveformPanel` — composite widget: waveform toolbar + `WaveformWidget` + transport bar. Reused in Phase 1 (topology preview) and Phase 2 (file detail)                                                                                                                                                                                                                                       |
 | `waveform/widget.py`      | `WaveformWidget` — thin orchestrator coordinating `WaveformRenderer` and `SpectrogramRenderer`; paintEvent, mouse/keyboard event handlers, zoom/pan API, public setters                                                                                                                                                                                                                       |
 | `prefs/param_form.py`     | **Portable** generic widget factory — `ParamSpec` protocol, `PathPickerMode`, `PathPicker`, `_build_widget`, `_build_param_page`, `_set_widget_value`, `_read_widget`, tooltip/subtext builders, `sanitize_output_folder`.  Zero sessionpreplib dependency; copy to any PySide6 project.                                                                                                      |
 | `prefs/preset_panel.py`   | **Portable** `NamedPresetPanel` — reusable CRUD widget for named presets with add/duplicate/rename/delete signals.                                                                                                                                                                                                                                                                            |
@@ -2040,6 +2046,76 @@ widgets when available, falling back to the global preset otherwise.
 The CLI is **not** affected by this file — it continues to use its own
 `default_config()` + command-line arguments.
 
+### 18.5 Waveform Peak Cache
+
+Waveform rendering uses a **pre-computed peak mipmap cache** (similar to
+Cubase/Reaper `.peaks` files) to avoid expensive raw-sample downsampling on
+every paint. The cache is built eagerly in the background and persisted to disk
+as binary `.peaks` files in the `sp_peaks/` directory.
+
+**Architecture:**
+
+```mermaid
+graph LR
+    A[Folder Open] --> B["_on_phase1_done()"]
+    B --> C["PeakBuildWorker (bg thread pool)"]
+    C --> D["sp_peaks/*.peaks files"]
+    C --> E["In-memory _peak_cache dict"]
+    E --> F["WaveformRenderer.set_peak_data()"]
+    F --> G["query_peaks_fast() → instant paint"]
+```
+
+**File format (`SPK1`):**
+
+| Field          | Size    | Description                           |
+|----------------|---------|---------------------------------------|
+| Magic          | 4 bytes | `SPK1`                                |
+| Version        | 2 bytes | Format version (currently 1)          |
+| Channels       | 2 bytes | Number of audio channels              |
+| Samplerate     | 4 bytes | Source file sample rate                |
+| Total samples  | 8 bytes | Source file total sample count         |
+| Source mtime    | 8 bytes | Source file modification time (epoch)  |
+| Num levels     | 2 bytes | Number of mip levels                  |
+| *(per level)*  |         |                                       |
+| Samples/bin    | 4 bytes | Resolution of this mip level          |
+| Num bins       | 4 bytes | Number of bins in this level          |
+| Data           | varies  | `float32[n_bins × channels × 2]` (min/max) |
+
+Four mip levels at 256, 1024, 4096, and 16384 samples/bin. A 5-minute stereo
+48 kHz file produces ~1.2 MB of peak data total.
+
+**Integration points:**
+
+- `settings.py` — `peak_cache_folder` default (`"sp_peaks"`), added to
+  `skip_folders` so the cache directory is not scanned as audio content.
+- `analysis/mixin.py` — `_start_peak_build()` triggered from
+  `_on_phase1_done()` and `_on_analyze_done()`. Maintains `_peak_cache` dict
+  (filename → `PeakData`). Worker cleanup in `_clear_workspace()`.
+- `topology/mixin.py` — injects `PeakData` into the Phase 1 waveform display
+  via `set_peak_data()` after `set_precomputed()`. Passes `peaks_dir` to
+  `TopologyApplyWorker` so output files get cached during Apply.
+- `detail/mixin.py` — injects `PeakData` into the Phase 2 waveform display
+  after `set_precomputed()`.
+- `analysis/worker.py` — `TopologyApplyWorker` builds and saves `.peaks` files
+  immediately after writing each output file.
+- `waveform/renderer.py` — `_build_peaks()` uses `query_peaks_fast()` when
+  `PeakData` is available, falling back to raw-sample downsampling otherwise.
+
+**Staleness:** Each `.peaks` file stores the source file's `mtime`. On load,
+`load_peaks()` compares the stored mtime against the current source file; stale
+caches return `None` and are silently rebuilt.
+
+**Instant Preview Mode (`set_preview_mode`):**
+To eliminate visual latency caused by heavy audio file disk I/O, the UI supports
+an **Instant Preview Mode**. If a track is selected and its `.peaks` cache exists, 
+the GUI initializes the `WaveformWidget` with dummy empty channels and applies the
+peak cache immediately. This renders the perfect waveform visualization instantaneously
+(zero disk I/O, zero CPU). Asynchronously, `AudioLoadWorker` and `WaveformLoadWorker` 
+load the full audio data in the background. When they complete, they seamlessly replace 
+the dummy channels to enable the Mel Spectrogram, RMS envelope, and audio playback.
+If a peak cache finishes building *while* the UI is blocked loading audio, the UI
+aggressively pushes the new peak cache to instantly resolve the loading state.
+
 ---
 
 ## 19. Migration Notes
diff --git a/README.md b/README.md
index 09e9b6f..2c8b679 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ SessionPrep operates in three phases:
 
 | Phase | Name                   | What happens                                                                                                                                                                                                                                                                                                                                                                                                              | When                           |
 |-------|------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|
-| **1** | Track Layout           | Define how source tracks map to output files: channel routing, reordering, splitting, merging. Automatically optimizes layouts based on Phase 1 diagnostics (e.g. dropping silent files, extracting active channels from dual-mono and one-sided silence). Drag-and-drop between input/output trees with visual insert-position indicator. Optional recursive subfolder scanning. Output written to `sp_01_tracklayout/`. | GUI Phase 1 (always available) |
+| **1** | Track Layout           | Define how source tracks map to output files: channel routing, reordering, splitting, merging. Automatically optimizes layouts based on Phase 1 diagnostics (e.g. dropping silent files, extracting active channels from dual-mono and one-sided silence). Drag-and-drop between input/output trees with visual insert-position indicator. Optional recursive subfolder scanning. Output written to `sp_01_tracklayout/`. Pre-computed peak cache files written to `sp_peaks/` for fast waveform rendering. | GUI Phase 1 (always available) |
 | **2** | Analysis & Preparation | Format checks, clipping, DC offset, stereo compatibility, silence, subsonic, peak/RMS measurement, classification, tail exceedance. Bimodal normalization (clip gain adjustment) via Prepare. Output written to `sp_02_prepared/`.                                                                                                                                                                                        | GUI Phase 2 / CLI              |
 | **3** | DAW Transfer           | Transfer tracks into DAW session with per-track naming and folder assignment. Duplicate entries for multi-track scenarios (same clip on different tracks). Fader offsets applied automatically (Pro Tools via PTSL, DAWproject via file generation). Support for unattended batch processing of multiple songs.                                                                                                           | GUI Phase 3                    |
 
diff --git a/sessionprepgui/analysis/mixin.py b/sessionprepgui/analysis/mixin.py
index 74a71e5..38f83c2 100644
--- a/sessionprepgui/analysis/mixin.py
+++ b/sessionprepgui/analysis/mixin.py
@@ -220,6 +220,8 @@ def _clear_workspace(self):
         self._cancel_worker("_worker")
         self._cancel_worker("_batch_reanalyze_worker")
         self._cancel_worker("_prepare_worker")
+        self._cancel_worker("_peak_build_worker")
+        self._peak_cache = {}
         self._session = None
         self._summary = None
         self._current_track = None
@@ -302,6 +304,7 @@ def _load_directory(self, path: str):
         skip_folders = {
             app_cfg.get("phase1_output_folder", "sp_01_tracklayout"),
             app_cfg.get("phase2_output_folder", "sp_02_prepared"),
+            app_cfg.get("peak_cache_folder", "sp_peaks"),
         }
         wav_files = discover_audio_files(
             path, recursive=self._recursive_scan,
@@ -385,6 +388,9 @@ def _on_phase1_done(self, session):
         self._save_session_action.setEnabled(True)
         self.setWindowTitle("SessionPrep")
 
+        # Eagerly build peak cache for all source tracks in the background
+        self._start_peak_build(session.tracks, self._source_dir)
+
     @Slot()
     def _on_save_session(self):
         """Save the current session state to a .spsession file."""
@@ -489,6 +495,7 @@ def _restore_session_state(self, data: dict):
         skip_folders = {
             app_cfg.get("phase1_output_folder", "sp_01_tracklayout"),
             app_cfg.get("phase2_output_folder", "sp_02_prepared"),
+            app_cfg.get("peak_cache_folder", "sp_peaks"),
         }
         source_tracks = []
         for rel in discover_audio_files(
@@ -696,6 +703,13 @@ def _restore_session_state(self, data: dict):
         )
         self.setWindowTitle("SessionPrep")
 
+        # Eagerly build peak cache for source tracks (Phase 1 waveforms)
+        if source_dir and source_tracks:
+            self._start_peak_build(source_tracks, source_dir)
+        # And for analysis tracks (Phase 2 waveforms) if topology was applied
+        if self._topology_dir and tracks:
+            self._start_peak_build(tracks, self._topology_dir)
+
     # ── Analyze ──────────────────────────────────────────────────────────
 
     @Slot()
@@ -980,6 +994,106 @@ def _on_analyze_done(self, session, summary):
             f"Analysis complete: {ok_count}/{len(session.tracks)} tracks OK"
         )
 
+        # Eagerly build peak cache for Phase 2 tracks
+        analyze_dir = self._topology_dir or self._source_dir
+        if analyze_dir:
+            self._start_peak_build(session.tracks, analyze_dir)
+
+    # ── Peak cache ────────────────────────────────────────────────────────
+
+    def _start_peak_build(self, tracks, base_dir: str):
+        """Launch a background PeakBuildWorker for all tracks under *base_dir*.
+
+        Before launching, scan existing ``sp_peaks/`` for valid ``.peaks``
+        files and load them into the in-memory cache — only stale or missing
+        files are queued for background building.
+        """
+        from ..waveform.compute import PeakBuildWorker
+        from ..waveform.peakcache import (
+            peaks_path_for, load_peaks, get_source_mtime,
+        )
+
+        app_cfg = self._config.get("app", {})
+        peaks_folder = app_cfg.get("peak_cache_folder", "sp_peaks")
+        peaks_dir = os.path.join(base_dir, peaks_folder)
+
+        if not hasattr(self, "_peak_cache"):
+            self._peak_cache = {}
+
+        items = []
+        for track in tracks:
+            filepath = getattr(track, "filepath", None)
+            if not filepath:
+                filepath = os.path.join(base_dir, track.filename)
+            if not os.path.isfile(filepath):
+                continue
+            pp = peaks_path_for(peaks_dir, track.filename)
+            # Try to reuse existing .peaks from disk
+            mtime = get_source_mtime(filepath)
+            existing = load_peaks(pp, expected_mtime=mtime)
+            if existing is not None:
+                self._peak_cache[track.filename] = existing
+            else:
+                items.append((filepath, track.filename, pp))
+
+        if not items:
+            log.info("Peak cache: all %d files already cached", len(tracks))
+            return
+
+        log.info(
+            "Peak cache: %d cached from disk, %d to build",
+            len(tracks) - len(items), len(items),
+        )
+
+        self._cancel_worker("_peak_build_worker")
+        worker = PeakBuildWorker(items)
+        worker.file_done.connect(self._on_peak_file_done)
+        worker.progress.connect(self._status_bar.showMessage)
+        worker.all_done.connect(
+            lambda: log.info("Peak cache build complete (%d files)", len(items)))
+        self._peak_build_worker = worker
+        worker.start()
+
+    @Slot(str, object)
+    def _on_peak_file_done(self, filename: str, peak_data):
+        """Cache a newly built PeakData in memory, and push to UI if active."""
+        if not hasattr(self, "_peak_cache"):
+            self._peak_cache = {}
+        self._peak_cache[filename] = peak_data
+
+        # Push to Phase 1 topology preview if active
+        if getattr(self, "_topo_wf_filename", None) == filename:
+            wf = getattr(self, "_topo_wf_panel", None)
+            if wf and hasattr(wf, "waveform"):
+                wf.waveform.set_peak_data(peak_data)
+                if getattr(wf.waveform, "_loading", False):
+                    track = None
+                    if hasattr(self, "_topo_track_map"):
+                        track = self._topo_track_map().get(filename)
+                    if track:
+                        wf.waveform.set_preview_mode(
+                            track.channels, track.total_samples,
+                            track.samplerate, peak_data
+                        )
+
+        # Push to Phase 2 analysis preview if active
+        cur_track = getattr(self, "_current_track", None)
+        if cur_track and cur_track.filename == filename:
+            wf = getattr(self, "_waveform", None)
+            if wf:
+                wf.set_peak_data(peak_data)
+                if getattr(wf, "_loading", False):
+                    wf.set_preview_mode(
+                        cur_track.channels, cur_track.total_samples,
+                        cur_track.samplerate, peak_data
+                    )
+
+    def _prioritize_peak(self, filename: str):
+        """If a peak build is in progress, move *filename* to the front."""
+        worker = getattr(self, "_peak_build_worker", None)
+        if worker is not None and worker.isRunning():
+            worker.prioritize(filename)
+
     @Slot(str)
     def _on_analyze_error(self, message: str):
         self._analyze_action.setEnabled(True)
diff --git a/sessionprepgui/analysis/worker.py b/sessionprepgui/analysis/worker.py
index e038dcb..229769f 100644
--- a/sessionprepgui/analysis/worker.py
+++ b/sessionprepgui/analysis/worker.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+import logging
 import threading
 
+log = logging.getLogger(__name__)
+
 from PySide6.QtCore import QThread, Signal
 
 from sessionpreplib.daw_processor import DawProcessor
@@ -107,10 +110,16 @@ def __init__(self, session_context: object, config: dict):
         super().__init__()
         self.session_context = session_context
         self.config = config
+        self._event_bus = EventBus()
+        self._is_cancelled = False
+
+    def cancel(self):
+        self._is_cancelled = True
+        self._event_bus.cancel()
 
     def run(self):
         try:
-            event_bus = EventBus()
+            event_bus = self._event_bus
 
             # Use the already loaded session
             session = self.session_context
@@ -153,6 +162,8 @@ def run(self):
                     }
                     loaded = 0
                     for future in as_completed(futures):
+                        if self._is_cancelled:
+                            break
                         t = futures[future]
                         try:
                             res = future.result()
@@ -233,10 +244,16 @@ def __init__(self, source_dir: str, config: dict, recursive: bool = False):
         self.source_dir = source_dir
         self.config = config
         self.recursive = recursive
+        self._event_bus = EventBus()
+        self._is_cancelled = False
+
+    def cancel(self):
+        self._is_cancelled = True
+        self._event_bus.cancel()
 
     def run(self):
         try:
-            event_bus = EventBus()
+            event_bus = self._event_bus
 
             self.progress.emit("Loading session\u2026")
             session = load_session(self.source_dir, self.config, event_bus=event_bus,
@@ -356,10 +373,19 @@ def cancel(self):
 
     def run(self):
         try:
+            import time, logging
+            t0 = time.perf_counter()
+            log = logging.getLogger(__name__)
+
             from sessionpreplib.audio import load_track
             import soundfile as sf
             import numpy as np
             data, sr = sf.read(self._track.filepath, dtype='float64')
+            
+            elapsed = (time.perf_counter() - t0) * 1000
+            if getattr(self._track, 'filename', None):
+                log.debug("[Trace] AudioLoadWorker I/O (sf.read) for '%s': %.2f ms", self._track.filename, elapsed)
+
             if self._cancelled:
                 return
             self._track.audio_data = data
@@ -447,6 +473,11 @@ def cancel(self):
         self._cancelled = True
 
     def run(self):
+        import logging, time
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+        log.debug("[Trace] TopoMultiAudioWorker loading %d items", len(self._items))
+        
         try:
             import os
             import numpy as np
@@ -459,6 +490,47 @@ def run(self):
 
             from sessionprepgui.waveform.panel import WaveformPanel
 
+            from concurrent.futures import ThreadPoolExecutor, as_completed
+            
+            # --- 1. Pre-collect all unique audio file paths ---
+            required_files = set()
+            if self._side == "input":
+                required_files.update(item[0] for item in self._items)
+            else:
+                for item in self._items:
+                    for src in item[0].sources:
+                        required_files.add(os.path.join(self._source_dir, src.input_filename))
+                        
+            # --- 2. Parallel I/O Load ---
+            audio_cache: dict[str, tuple[np.ndarray, int]] = {}
+            # Limit workers to 4 to prevent SSD thrashing and massive memory spike
+            max_workers = min(4, (os.cpu_count() or 1))
+            
+            log.debug("[Trace] TopoMultiAudioWorker loading %d unique files with %d workers", len(required_files), max_workers)
+            
+            with ThreadPoolExecutor(max_workers=max_workers) as pool:
+                fut_to_path = {
+                    pool.submit(sf.read, path, dtype='float64'): path
+                    for path in required_files
+                }
+                for fut in as_completed(fut_to_path):
+                    if self._cancelled:
+                        pool.shutdown(wait=False, cancel_futures=True)
+                        return
+                    path = fut_to_path[fut]
+                    t_load = time.perf_counter()
+                    data, file_sr = fut.result()
+                    log.debug("[Trace] TopoMultiAudioWorker loaded '%s'. Shape: %s, %.1f MB", 
+                              os.path.basename(path), data.shape, data.nbytes / (1024 * 1024))
+                    audio_cache[path] = (data, file_sr)
+
+            if self._cancelled:
+                return
+                
+            log.debug("[Trace] TopoMultiAudioWorker completed parallel I/O in %.2f ms", (time.perf_counter() - t0) * 1000)
+            t_process = time.perf_counter()
+
+            # --- 3. Process loaded audio natively ---
             if self._side == "input":
                 for item in self._items:
                     if self._cancelled:
@@ -467,7 +539,8 @@ def run(self):
                     name = item[1]
                     channels_to_keep = item[2] if len(item) > 2 else None
 
-                    data, file_sr = sf.read(filepath, dtype='float64')
+                    # Retrieve directly from in-memory parallel cache
+                    data, file_sr = audio_cache[filepath]
                     sr = file_sr
                     if data.ndim == 1:
                         data = data.reshape(-1, 1)
@@ -503,7 +576,7 @@ def run(self):
                         if self._cancelled:
                             return
                         path = os.path.join(self._source_dir, src.input_filename)
-                        data, file_sr = sf.read(path, dtype='float64')
+                        data, file_sr = audio_cache[path]
                         track_audio[src.input_filename] = (data, file_sr)
                         sr = file_sr
                     
@@ -531,38 +604,43 @@ def run(self):
             if self._cancelled or not track_arrays:
                 return
 
-            # --- Build display audio (all channels concatenated) ---
+            # --- Build display audio (list of contiguous 1D channels) ---
+            log.debug("[Trace] TopoMultiAudioWorker building display & playback arrays...")
+            t_stack = time.perf_counter()
+            display_audio = []
             max_samples = max(a.shape[0] for a in track_arrays)
-            padded = []
             for a in track_arrays:
                 if a.shape[0] < max_samples:
                     pad = np.zeros((max_samples - a.shape[0], a.shape[1]),
                                    dtype=np.float64)
                     a = np.vstack([a, pad])
-                padded.append(a)
-            display_audio = np.hstack(padded)  # (max_samples, total_ch)
+                # Each channel becomes its own perfectly contiguous slice
+                for c in range(a.shape[1]):
+                    display_audio.append(np.ascontiguousarray(a[:, c]))
 
             # --- Build playback audio (summed by channel position) ---
             max_ch = max(track_ch_counts)
             n_tracks = len(track_arrays)
             playback = np.zeros((max_samples, max_ch), dtype=np.float64)
-            for a in padded:
-                playback[:, :a.shape[1]] += a
+            for a in track_arrays:
+                playback[:a.shape[0], :a.shape[1]] += a
             playback /= n_tracks
 
-            # Squeeze mono
+            # Squeeze mono playback
             if playback.shape[1] == 1:
                 playback = playback[:, 0]
-            if display_audio.shape[1] == 1:
-                display_audio = display_audio[:, 0]
 
             # --- Channel labels ---
             labels = []
             for lst in track_labels_list:
                 labels.extend(lst)
 
+            log.debug("[Trace] TopoMultiAudioWorker finished %d items (Total: %.2f ms, Process/Stack: %.2f ms)", 
+                      len(self._items), (time.perf_counter() - t0) * 1000, (time.perf_counter() - t_process) * 1000)
             self.finished.emit(display_audio, playback, sr, labels)
         except Exception as exc:
+            import traceback, logging
+            logging.getLogger(__name__).error("TopoMultiAudioWorker error: %s", traceback.format_exc())
             self.error.emit(str(exc))
 
 
@@ -615,14 +693,20 @@ class TopologyApplyWorker(QThread):
 
     progress = Signal(str)
     progress_value = Signal(int, int)
-    apply_finished = Signal()           # renamed: avoid shadowing QThread.finished
+    apply_finished = Signal()
     error = Signal(str)
 
-    def __init__(self, session, output_dir: str, source_dir: str | None = None):
+    def __init__(self, session, output_dir: str, source_dir: str | None = None,
+                 peaks_dir: str | None = None):
         super().__init__()
         self._session = session
         self._output_dir = output_dir
         self._source_dir = source_dir
+        self._peaks_dir = peaks_dir
+        self._is_cancelled = False
+
+    def cancel(self):
+        self._is_cancelled = True
 
     def run(self):
         try:
@@ -670,7 +754,10 @@ def run(self):
                 for src in entry.sources:
                     needed_sources.add(src.input_filename)
 
-            total = len(needed_sources) + len(topology.entries)
+            n_sources = len(needed_sources)
+            n_entries = len(topology.entries)
+            # Total includes peak building phase when peaks_dir is set
+            total = n_sources + n_entries + (n_entries if self._peaks_dir else 0)
 
             # Phase A: Load source audio
             # After a previous Apply+Analyze cycle, session.tracks may
@@ -679,6 +766,10 @@ def run(self):
             # track_map.  Fall back to loading directly from source_dir.
             source_audio: dict[str, tuple] = {}
             for step, filename in enumerate(sorted(needed_sources)):
+                if self._is_cancelled:
+                    return
+
+                log.debug("Apply topology: loading source '%s' (%d/%d)", filename, step + 1, total)
                 self.progress.emit(f"Loading {filename}")
                 self.progress_value.emit(step, total)
                 track = track_map.get(filename)
@@ -702,15 +793,24 @@ def run(self):
                     track.total_samples = loaded.total_samples
                 source_audio[filename] = (loaded.audio_data, loaded.samplerate)
 
-            # Phase B: Resolve topology + write output files
+            # Phase B: Resolve topology + write output files (no peak building)
             output_tracks = []
             errors = []
-            base_step = len(needed_sources)
+            written_files: list[tuple[str, str, int]] = []  # (output_filename, dst_path, sr)
+            base_step = n_sources
             for idx, entry in enumerate(topology.entries):
+                if self._is_cancelled:
+                    return
+
                 step = base_step + idx
+                log.debug("Apply topology: writing '%s' (%d/%d)", entry.output_filename, step + 1, total)
                 self.progress.emit(f"Writing {entry.output_filename}")
                 self.progress_value.emit(step, total)
 
+                # Skip entries with no channels (e.g. all channels moved elsewhere)
+                if entry.output_channels < 1 or not entry.sources:
+                    continue
+
                 try:
                     # Check all sources are available before resolving
                     missing = [
@@ -741,6 +841,7 @@ def run(self):
                     dst = os.path.join(output_dir, entry.output_filename)
                     os.makedirs(os.path.dirname(dst), exist_ok=True)
                     sf.write(dst, resolved, sr, subtype=subtype)
+                    written_files.append((entry.output_filename, dst, sr))
 
                     n_samples = resolved.shape[0]
                     out_tc = TrackContext(
@@ -759,6 +860,58 @@ def run(self):
                 except Exception as e:
                     errors.append((entry.output_filename, str(e)))
 
+            # Free source audio memory before peak building
+            source_audio.clear()
+
+            # Phase C: Build peak caches (re-reading from just-written files)
+            if self._peaks_dir:
+                from ..waveform.peakcache import (
+                    build_peaks, save_peaks, load_peaks,
+                    peaks_path_for, get_source_mtime,
+                )
+                peak_base_step = n_sources + n_entries
+                for idx, (out_fn, dst, sr) in enumerate(written_files):
+                    if self._is_cancelled:
+                        return
+
+                    step = peak_base_step + idx
+                    log.debug("Apply topology: building peaks '%s' (%d/%d)", out_fn, step + 1, total)
+                    self.progress.emit(f"Building peaks for {out_fn}")
+                    self.progress_value.emit(step, total)
+
+                    try:
+                        pp = peaks_path_for(self._peaks_dir, out_fn)
+                        mtime = get_source_mtime(dst)
+                        # Check shape: if total_samples and channels match,
+                        # the content is unchanged and we can skip.
+                        info = sf.info(dst)
+                        existing = load_peaks(pp)
+                        if (existing is not None
+                                and existing.total_samples == info.frames
+                                and existing.channels == info.channels):
+                            log.debug("Peak cache for '%s' is up-to-date, skipping rebuild -> %s", out_fn, pp)
+                            continue
+
+                        import time as _time
+                        _t0 = _time.perf_counter()
+                        data, file_sr = sf.read(dst, dtype="float64")
+                        _t_read = _time.perf_counter()
+                        pd = build_peaks(data, file_sr, source_mtime=mtime)
+                        _t_build = _time.perf_counter()
+                        save_peaks(pd, pp)
+                        _t_save = _time.perf_counter()
+                        log.debug(
+                            "Built peak cache for '%s' -> %s "
+                            "(%d ch, %d levels, read=%.1f ms, build=%.1f ms, save=%.1f ms, total=%.1f ms)",
+                            out_fn, pp, pd.channels, len(pd.levels),
+                            (_t_read - _t0) * 1000,
+                            (_t_build - _t_read) * 1000,
+                            (_t_save - _t_build) * 1000,
+                            (_t_save - _t0) * 1000,
+                        )
+                    except Exception as e:
+                        log.debug("Failed to build/save peak cache for '%s' -> %s: %s", out_fn, pp, e)
+
             self.progress_value.emit(total, total)
 
             session.output_tracks = output_tracks
diff --git a/sessionprepgui/detail/mixin.py b/sessionprepgui/detail/mixin.py
index c49f0c5..e3dcabd 100644
--- a/sessionprepgui/detail/mixin.py
+++ b/sessionprepgui/detail/mixin.py
@@ -75,6 +75,10 @@ def _show_track_detail(self, track):
 
     def _load_waveform(self, track):
         """Start background waveform loading for *track*."""
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         # Guard: user may have clicked a different track while we were queued
         if self._current_track is not track:
             return
@@ -89,15 +93,29 @@ def _load_waveform(self, track):
             self._audio_load_worker.finished.disconnect()
             self._audio_load_worker = None
 
-        # If audio_data is absent but the file exists, load it from disk first
-        if (track.audio_data is None or track.audio_data.size == 0) and \
-                track.status == "OK" and os.path.isfile(track.filepath):
+        # 1. Handle instant preview or loading state
+        peak_cache = getattr(self, '_peak_cache', {})
+        fn = getattr(track, 'filename', None)
+        if fn and fn in peak_cache:
+            self._waveform.set_preview_mode(
+                track.channels, track.total_samples, track.samplerate, peak_cache[fn]
+            )
+        else:
             self._waveform.set_loading(True)
-            if self._detail_tabs.currentIndex() == _TAB_FILE:
-                self._wf_container.setVisible(True)
-            self._play_btn.setEnabled(False)
-            self._update_time_label(0)
+            if fn and hasattr(self, '_prioritize_peak'):
+                self._prioritize_peak(fn)
+
+        if self._detail_tabs.currentIndex() == _TAB_FILE:
+            self._wf_container.setVisible(True)
+        self._play_btn.setEnabled(False)
+        self._update_time_label(0)
+
+        if fn:
+            log.debug("[Trace] _load_waveform UI setup for '%s': %.2f ms", fn, (time.perf_counter() - t0) * 1000)
 
+        # 2. If audio_data is absent but the file exists, load it from disk first
+        if (track.audio_data is None or track.audio_data.size == 0) and \
+                track.status == "OK" and os.path.isfile(track.filepath):
             worker = AudioLoadWorker(track, parent=self)
             self._audio_load_worker = worker
             worker.finished.connect(
@@ -107,26 +125,13 @@ def _load_waveform(self, track):
             worker.start()
             return
 
+        # 3. If audio is available in memory, run WaveformLoadWorker (for RMS/Spectrogram)
         has_audio = track.audio_data is not None and track.audio_data.size > 0
         if has_audio:
-            self._waveform.set_loading(True)
-            if self._detail_tabs.currentIndex() == _TAB_FILE:
-                self._wf_container.setVisible(True)
-            self._play_btn.setEnabled(False)
-            self._update_time_label(0)
-
-            flat_cfg = self._flat_config()
-            win_ms = flat_cfg.get("window", 400)
-            ws = get_window_samples(track, win_ms)
-
-            self._wf_worker = WaveformLoadWorker(
-                track.audio_data, track.samplerate, ws,
-                spec_n_fft=self._waveform.spec_n_fft,
-                spec_window=self._waveform.spec_window,
-                parent=self)
-            self._wf_worker.finished.connect(
-                lambda result, t=track: self._on_waveform_loaded(result, t))
-            self._wf_worker.start()
+            nch = track.audio_data.shape[1] if track.audio_data.ndim > 1 else 1
+            self._wf_panel.update_play_mode_channels(nch)
+            self._play_btn.setEnabled(True)
+            self._start_wf_worker(track)
         else:
             self._waveform.set_audio(None, 44100)
             self._update_overlay_menu([])
@@ -135,6 +140,30 @@ def _load_waveform(self, track):
             self._play_btn.setEnabled(False)
             self._update_time_label(0)
 
+    def _start_wf_worker(self, track):
+        flat_cfg = self._flat_config()
+        win_ms = flat_cfg.get("window", 400)
+        ws = get_window_samples(track, win_ms)
+
+        self._wf_worker = WaveformLoadWorker(
+            track.audio_data, track.samplerate, ws,
+            spec_n_fft=self._waveform.spec_n_fft,
+            spec_window=self._waveform.spec_window,
+            compute_spectrogram=(self._waveform._display_mode == "spectrogram"),
+            parent=self)
+        self._wf_worker.finished.connect(
+            lambda result, t=track: self._on_waveform_loaded(result, t))
+        self._wf_worker.start()
+
+    @Slot(str)
+    def _on_display_mode_changed(self, mode: str):
+        if mode == "spectrogram" and self._current_track:
+            track = self._current_track
+            if track.audio_data is not None and track.audio_data.size > 0:
+                if getattr(self._waveform._spec_renderer, '_spec_data', None) is None:
+                    if self._wf_worker is None:
+                        self._start_wf_worker(track)
+
     @Slot(object, object)
     def _on_waveform_loaded(self, result: dict, track):
         """Receive pre-computed waveform data from the background worker."""
@@ -145,6 +174,14 @@ def _on_waveform_loaded(self, result: dict, track):
             return
 
         self._waveform.set_precomputed(result)
+        # Apply cached peak data for mip-level rendering
+        peak_cache = getattr(self, '_peak_cache', {})
+        fn = getattr(track, 'filename', None)
+        if fn and fn in peak_cache:
+            self._waveform.set_peak_data(peak_cache[fn])
+        elif fn and hasattr(self, '_prioritize_peak'):
+            self._prioritize_peak(fn)
+
         cmap = self._config.get("app", {}).get("spectrogram_colormap", "magma")
         self._waveform.set_colormap(cmap)
         # Sync colormap dropdown with preference
@@ -158,8 +195,6 @@ def _on_waveform_loaded(self, result: dict, track):
             all_issues.extend(getattr(det_result, "issues", []))
         self._waveform.set_issues(all_issues)
         self._update_overlay_menu(all_issues)
-        self._wf_panel.update_play_mode_channels(len(result["channels"]))
-        self._play_btn.setEnabled(True)
         self._update_time_label(0)
 
     def _on_audio_loaded(self, track, orig_track):
diff --git a/sessionprepgui/mainwindow.py b/sessionprepgui/mainwindow.py
index a7c0b3d..541dd27 100644
--- a/sessionprepgui/mainwindow.py
+++ b/sessionprepgui/mainwindow.py
@@ -640,6 +640,7 @@ def _build_right_panel(self) -> QWidget:
         self._wf_panel.play_clicked.connect(self._on_play)
         self._wf_panel.stop_clicked.connect(self._on_stop)
         self._wf_panel.position_clicked.connect(self._on_waveform_seek)
+        self._wf_panel.display_mode_changed.connect(self._on_display_mode_changed)
         self._wf_panel.waveform.set_invert_scroll(
             self._config.get("app", {}).get("invert_scroll", "default"))
 
@@ -899,7 +900,18 @@ def closeEvent(self, event):
             if reply != QMessageBox.Yes:
                 event.ignore()
                 return
+
         self._playback.stop()
+
+        # Stop active background tasks cleanly so ThreadPoolExecutor can terminate
+        from PySide6.QtCore import QThread
+        for worker_attr in ["_worker", "_phase1_worker", "_topo_apply_worker", "_setup_worker"]:
+            worker = getattr(self, worker_attr, None)
+            if isinstance(worker, QThread) and worker.isRunning():
+                if hasattr(worker, "cancel"):
+                    worker.cancel()
+                worker.wait(500)
+
         super().closeEvent(event)
 
 
diff --git a/sessionprepgui/settings.py b/sessionprepgui/settings.py
index c7e727e..0f9fb18 100644
--- a/sessionprepgui/settings.py
+++ b/sessionprepgui/settings.py
@@ -56,6 +56,7 @@
     "report_verbosity": "normal",
     "phase1_output_folder": "sp_01_tracklayout",
     "phase2_output_folder": "sp_02_prepared",
+    "peak_cache_folder": "sp_peaks",
     "spectrogram_colormap": "magma",
     "default_project_dir": "",
     "invert_scroll": "default",
diff --git a/sessionprepgui/topology/mixin.py b/sessionprepgui/topology/mixin.py
index 86b6da9..fb07b76 100644
--- a/sessionprepgui/topology/mixin.py
+++ b/sessionprepgui/topology/mixin.py
@@ -6,7 +6,7 @@
 import os
 
 log = logging.getLogger(__name__)
-from PySide6.QtCore import Qt, Slot
+from PySide6.QtCore import Qt, Slot, QTimer
 from PySide6.QtGui import QAction, QColor
 from PySide6.QtWidgets import (
     QAbstractItemView,
@@ -138,9 +138,9 @@ def _build_topology_page(self) -> QWidget:
 
         toolbar.addSeparator()
 
-        self._topo_wf_toggle = QAction("\u25B6 Waveform", self)
+        self._topo_wf_toggle = QAction("\u25BC Waveform", self)
         self._topo_wf_toggle.setCheckable(True)
-        self._topo_wf_toggle.setChecked(False)
+        self._topo_wf_toggle.setChecked(True)
         self._topo_wf_toggle.setToolTip("Show / hide the waveform preview")
         self._topo_wf_toggle.toggled.connect(self._on_topo_wf_toggle)
         toolbar.addAction(self._topo_wf_toggle)
@@ -192,7 +192,13 @@ def _build_topology_page(self) -> QWidget:
 
         h_splitter.setSizes([400, 400])
 
-        # Cross-tree exclusive selection
+        # Cross-tree exclusive selection via debounced timer
+        self._topo_sel_side: str | None = None
+        self._topo_sel_timer = QTimer(page)
+        self._topo_sel_timer.setSingleShot(True)
+        self._topo_sel_timer.setInterval(150)
+        self._topo_sel_timer.timeout.connect(self._do_topo_selection_changed)
+
         self._topo_input_tree.selectionModel().selectionChanged.connect(
             lambda sel, desel: self._on_topo_selection_changed("input"))
         self._topo_output_tree.selectionModel().selectionChanged.connect(
@@ -202,13 +208,20 @@ def _build_topology_page(self) -> QWidget:
         self._syncing_scroll = False
         self._topo_input_tree.verticalScrollBar().valueChanged.connect(self._on_input_scroll)
 
-        # Waveform preview panel (starts collapsed)
+        # Waveform preview panel (starts expanded)
         self._topo_wf_panel = WaveformPanel(analysis_mode=False)
-        self._topo_wf_panel.setVisible(False)
-        self._topo_wf_expanded = False
+        self._topo_wf_panel.setVisible(True)
+        self._topo_wf_expanded = True
         self._topo_wf_panel.play_clicked.connect(self._on_topo_play)
         self._topo_wf_panel.stop_clicked.connect(self._on_topo_stop)
         self._topo_wf_panel.position_clicked.connect(self._on_topo_wf_seek)
+        self._topo_wf_panel.display_mode_changed.connect(self._on_topo_display_mode_changed)
+
+        self._topo_wf_panel.fft_group.triggered.connect(self._on_topo_spec_fft_changed)
+        self._topo_wf_panel.win_group.triggered.connect(self._on_topo_spec_window_changed)
+        self._topo_wf_panel.cmap_group.triggered.connect(self._on_topo_spec_cmap_changed)
+        self._topo_wf_panel.floor_group.triggered.connect(self._on_topo_spec_floor_changed)
+        self._topo_wf_panel.ceil_group.triggered.connect(self._on_topo_spec_ceil_changed)
 
         # Vertical splitter: trees on top, waveform at bottom
         v_splitter = QSplitter(Qt.Vertical)
@@ -339,16 +352,21 @@ def _on_topo_apply(self):
         output_folder = self._config.get("app", {}).get(
             "phase1_output_folder", "sp_01_tracklayout")
         output_dir = os.path.join(self._source_dir, output_folder)
+        peaks_folder = self._config.get("app", {}).get(
+            "peak_cache_folder", "sp_peaks")
+        peaks_dir = os.path.join(output_dir, peaks_folder)
 
         self._topo_apply_action.setEnabled(False)
         self._topo_reset_action.setEnabled(False)
         self._topo_status_label.setText("Applying topology\u2026")
         self._topo_progress.start("Applying topology\u2026")
+        self._status_bar.setVisible(False)
 
         # Put Phase 1 topology on session for the worker to read
         self._session.topology = self._topo_topology
         self._topo_apply_worker = TopologyApplyWorker(
-            self._session, output_dir, source_dir=self._source_dir)
+            self._session, output_dir, source_dir=self._source_dir,
+            peaks_dir=peaks_dir)
         self._topo_apply_worker.progress.connect(self._on_topo_apply_progress)
         self._topo_apply_worker.progress_value.connect(
             self._on_topo_apply_progress_value)
@@ -358,8 +376,8 @@ def _on_topo_apply(self):
 
     @Slot(str)
     def _on_topo_apply_progress(self, message: str):
+        log.debug("Apply topology: %s", message)
         self._topo_progress.set_message(message)
-        self._status_bar.showMessage(message)
 
     @Slot(int, int)
     def _on_topo_apply_progress_value(self, current: int, total: int):
@@ -378,7 +396,6 @@ def _on_topo_apply_done(self):
             msg = (f"Topology applied: {n_out} file(s) written, "
                    f"{len(errors)} error(s)")
             self._topo_progress.finish(msg)
-            self._status_bar.showMessage(msg)
             detail = "\n".join(f"\u2022 {fn}: {err}" for fn, err in errors)
             QMessageBox.warning(
                 self, "Apply Topology \u2014 errors",
@@ -386,7 +403,8 @@ def _on_topo_apply_done(self):
         else:
             msg = f"Topology applied: {n_out} file(s) written"
             self._topo_progress.finish(msg)
-            self._status_bar.showMessage(msg)
+        self._status_bar.setVisible(True)
+        self._status_bar.showMessage(msg)
 
         output_folder = self._config.get("app", {}).get(
             "phase1_output_folder", "sp_01_tracklayout")
@@ -403,6 +421,7 @@ def _on_topo_apply_error(self, message: str):
         self._topo_apply_action.setEnabled(True)
         self._topo_reset_action.setEnabled(True)
         self._topo_progress.fail(message)
+        self._status_bar.setVisible(True)
         self._status_bar.showMessage(f"Apply topology error: {message}")
 
     # ── Actions ───────────────────────────────────────────────────────
@@ -623,25 +642,37 @@ def _on_input_scroll(self):
     # ── Cross-tree exclusive selection ────────────────────────────────
 
     def _on_topo_selection_changed(self, side: str):
-        """Handle selection change in input or output tree."""
+        """Handle selection change in input or output tree (debounced)."""
         if side == "input":
-            tree = self._topo_input_tree
             other = self._topo_output_tree
         else:
-            tree = self._topo_output_tree
             other = self._topo_input_tree
 
-        items = tree.selectedItems()
-        if not items:
-            return
-
-        # Clear other tree's selection
+        # Clear other tree's selection immediately for UI responsiveness
         if self._topo_selected_side != side:
             other.blockSignals(True)
             other.clearSelection()
             other.blockSignals(False)
         self._topo_selected_side = side
 
+        # Start or reset the 150ms debounce timer
+        self._topo_sel_side = side
+        self._topo_sel_timer.start()
+
+    def _do_topo_selection_changed(self):
+        side = self._topo_sel_side
+        if not side:
+            return
+
+        if side == "input":
+            tree = self._topo_input_tree
+        else:
+            tree = self._topo_output_tree
+
+        items = tree.selectedItems()
+        if not items:
+            return
+
         # Determine what's selected
         file_items = []
         channel_items = []
@@ -722,8 +753,13 @@ def _topo_load_input_from_items(self, file_items, channel_items=None):
 
     def _topo_load_input_waveform(self, filename: str):
         """Load waveform for a single input file."""
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         self._topo_cancel_workers()
         self._on_topo_stop()
+        self._topo_wf_filename = filename  # track for peak cache lookup
 
         track_map = self._topo_track_map()
         track = track_map.get(filename)
@@ -737,8 +773,19 @@ def _topo_load_input_waveform(self, filename: str):
 
         if self._topo_wf_expanded:
             self._topo_wf_panel.setVisible(True)
-        self._topo_wf_panel.waveform.set_loading(True)
+
+        peak_cache = getattr(self, '_peak_cache', {})
+        if filename in peak_cache:
+            self._topo_wf_panel.waveform.set_preview_mode(
+                track.channels, track.total_samples, track.samplerate, peak_cache[filename]
+            )
+        else:
+            self._topo_wf_panel.waveform.set_loading(True)
+            if hasattr(self, '_prioritize_peak'):
+                self._prioritize_peak(filename)
+
         self._topo_wf_panel.play_btn.setEnabled(False)
+        log.debug("[Trace] _topo_load_input_waveform setup for '%s': %.2f ms", filename, (time.perf_counter() - t0) * 1000)
 
         from ..analysis.worker import AudioLoadWorker
         worker = AudioLoadWorker(track, parent=self)
@@ -938,9 +985,18 @@ def _on_topo_multi_error(self, message: str):
     def _topo_show_waveform(self, audio_data, samplerate: int,
                             labels: list[str] | None = None):
         """Run WaveformLoadWorker and display result."""
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         import numpy as np
-        if audio_data is None or (isinstance(audio_data, np.ndarray)
-                                  and audio_data.size == 0):
+        if audio_data is None:
+            self._topo_wf_panel.waveform.set_loading(False)
+            return
+        if isinstance(audio_data, np.ndarray) and audio_data.size == 0:
+            self._topo_wf_panel.waveform.set_loading(False)
+            return
+        if isinstance(audio_data, list) and not audio_data:
             self._topo_wf_panel.waveform.set_loading(False)
             return
 
@@ -959,15 +1015,54 @@ def _topo_show_waveform(self, audio_data, samplerate: int,
         self._topo_wf_worker = worker
         worker.finished.connect(self._on_topo_wf_loaded)
         worker.start()
+        log.debug("[Trace] _topo_show_waveform setup: %.2f ms", (time.perf_counter() - t0) * 1000)
 
     def _on_topo_wf_loaded(self, result: dict):
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         self._topo_wf_worker = None
         self._topo_wf_panel.waveform.set_precomputed(result)
+        # Apply cached peak data for mip-level rendering
+        peak_cache = getattr(self, '_peak_cache', {})
+        wf_fn = getattr(self, '_topo_wf_filename', None)
+        if wf_fn and wf_fn in peak_cache:
+            self._topo_wf_panel.waveform.set_peak_data(peak_cache[wf_fn])
+        elif wf_fn and hasattr(self, '_prioritize_peak'):
+            self._prioritize_peak(wf_fn)
         n_ch = len(result["channels"])
         labels = getattr(self, '_topo_pending_labels', None)
         self._topo_wf_panel.update_play_mode_channels(n_ch, labels=labels)
         self._topo_wf_panel.play_btn.setEnabled(True)
         self._topo_update_time_label(0)
+        log.debug("[Trace] _on_topo_wf_loaded final UI application: %.2f ms", (time.perf_counter() - t0) * 1000)
+
+    @Slot(str)
+    def _on_topo_display_mode_changed(self, mode: str):
+        if mode == "spectrogram" and self._topo_cached_audio:
+            if getattr(self._topo_wf_panel.waveform._spec_renderer, '_spec_data', None) is None:
+                self._topo_show_waveform(self._topo_cached_audio[1], self._topo_cached_audio[3])
+
+    @Slot(QAction)
+    def _on_topo_spec_fft_changed(self, action: QAction):
+        self._topo_wf_panel.waveform.set_spec_fft(int(action.data()))
+
+    @Slot(QAction)
+    def _on_topo_spec_window_changed(self, action: QAction):
+        self._topo_wf_panel.waveform.set_spec_window(action.data())
+
+    @Slot(QAction)
+    def _on_topo_spec_cmap_changed(self, action: QAction):
+        self._topo_wf_panel.waveform.set_colormap(action.data())
+
+    @Slot(QAction)
+    def _on_topo_spec_floor_changed(self, action: QAction):
+        self._topo_wf_panel.waveform.set_spec_db_floor(float(action.data()))
+
+    @Slot(QAction)
+    def _on_topo_spec_ceil_changed(self, action: QAction):
+        self._topo_wf_panel.waveform.set_spec_db_ceil(float(action.data()))
 
     # ── Playback ──────────────────────────────────────────────────────
 
@@ -1009,7 +1104,12 @@ def _topo_update_time_label(self, current_sample: int):
         if not cached:
             return
         _, display_audio, _playback, sr = cached
-        total = display_audio.shape[0] if display_audio is not None else 0
+        if display_audio is None:
+            total = 0
+        elif isinstance(display_audio, list):
+            total = display_audio[0].shape[0] if display_audio else 0
+        else:
+            total = display_audio.shape[0]
         from sessionpreplib.audio import format_duration
         cur_str = format_duration(current_sample, sr)
         tot_str = format_duration(total, sr)
diff --git a/sessionprepgui/waveform/compute.py b/sessionprepgui/waveform/compute.py
index f038195..67cf50d 100644
--- a/sessionprepgui/waveform/compute.py
+++ b/sessionprepgui/waveform/compute.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import collections
+import logging
 import threading
 
 import numpy as np
@@ -14,6 +16,8 @@
 # Spectrogram colormaps
 # ---------------------------------------------------------------------------
 
+log = logging.getLogger(__name__)
+
 SPECTROGRAM_COLORMAPS: dict[str, np.ndarray] = {}  # name → (256, 4) uint8 RGBA
 
 
@@ -118,15 +122,15 @@ def compute_mel_spectrogram(channels: list[np.ndarray], sr: int, *,
     if not channels:
         return None
     if hop is None:
-        hop = n_fft // 4
-    # Mix to mono
+        hop = n_fft  # optimized for UI speed (4x faster than n_fft // 4)
+    # Mix to mono in-place to avoid massive memory allocations
     if len(channels) == 1:
-        mono = channels[0].astype(np.float64)
+        mono = channels[0]
     else:
-        mono = np.mean(
-            np.column_stack([ch.astype(np.float64) for ch in channels]),
-            axis=1,
-        )
+        mono = channels[0].copy()
+        for ch in channels[1:]:
+            mono += ch
+        mono /= len(channels)
     if len(mono) < n_fft:
         return None
     # STFT
@@ -161,6 +165,7 @@ def __init__(self, audio_data: np.ndarray, samplerate: int,
                  rms_window_samples: int, *,
                  spec_n_fft: int = _SPEC_N_FFT,
                  spec_window: str = "hann",
+                 compute_spectrogram: bool = True,
                  parent=None):
         super().__init__(parent)
         self._audio_data = audio_data
@@ -168,6 +173,7 @@ def __init__(self, audio_data: np.ndarray, samplerate: int,
         self._rms_win = rms_window_samples
         self._spec_n_fft = spec_n_fft
         self._spec_window = spec_window
+        self._compute_spectrogram = compute_spectrogram
         self._cancelled = threading.Event()
 
     def cancel(self):
@@ -175,20 +181,24 @@ def cancel(self):
         self._cancelled.set()
 
     def run(self):
+        import time, logging
+        t_start = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         data = self._audio_data
         sr = self._samplerate
         win = self._rms_win
 
         # --- Channel splitting ---
-        if data is None or data.size == 0:
-            return
-        if data.ndim == 1:
-            channels = [np.ascontiguousarray(data)]
+        if isinstance(data, list):
+            channels = data
         else:
-            channels = [
-                np.ascontiguousarray(data[:, ch])
-                for ch in range(data.shape[1])
-            ]
+            if data is None or data.size == 0:
+                return
+            if data.ndim == 1:
+                channels = [data]
+            else:
+                channels = [data[:, ch] for ch in range(data.shape[1])]
         if not channels:
             return
         nch = len(channels)
@@ -197,23 +207,36 @@ def run(self):
         if self._cancelled.is_set():
             return
 
-        # --- Peak finding ---
+        if self._cancelled.is_set():
+            return
+
+        # --- Peak finding (Zero allocation) ---
+        t0 = time.perf_counter()
+        
+        def _find_peak(ch: np.ndarray) -> int:
+            p_max = int(np.argmax(ch))
+            p_min = int(np.argmin(ch))
+            return p_min if abs(float(ch[p_min])) > abs(float(ch[p_max])) else p_max
+
         if nch == 1:
-            peak_sample = int(np.argmax(np.abs(channels[0])))
+            peak_sample = _find_peak(channels[0])
             peak_channel = 0
         else:
-            abs_cols = np.column_stack([np.abs(ch) for ch in channels])
-            max_per_sample = np.max(abs_cols, axis=1)
-            peak_sample = int(np.argmax(max_per_sample))
-            peak_channel = int(np.argmax(abs_cols[peak_sample]))
+            peaks_per_ch = [_find_peak(ch) for ch in channels]
+            max_vals = [abs(float(channels[i][p])) for i, p in enumerate(peaks_per_ch)]
+            peak_channel = int(np.argmax(max_vals))
+            peak_sample = peaks_per_ch[peak_channel]
         peak_lin = abs(float(channels[peak_channel][peak_sample]))
         peak_db = 20.0 * np.log10(peak_lin) if peak_lin > 0 else float('-inf')
         peak_amplitude = float(channels[peak_channel][peak_sample])
 
+        log.debug("[Trace] WaveformLoadWorker Peak finding: %.2f ms", (time.perf_counter() - t0) * 1000)
+
         if self._cancelled.is_set():
             return
 
         # --- RMS cumsum (computed once, reused for envelope drawing) ---
+        t0 = time.perf_counter()
         rms_max_sample = -1
         rms_max_db = float('-inf')
         rms_max_amplitude = 0.0
@@ -245,17 +268,25 @@ def run(self):
                 rms_max_db = 20.0 * np.log10(rms_lin) if rms_lin > 0 else float('-inf')
                 rms_max_amplitude = rms_lin
 
+        log.debug("[Trace] WaveformLoadWorker RMS: %.2f ms", (time.perf_counter() - t0) * 1000)
+
         if self._cancelled.is_set():
             return
 
         # --- Spectrogram ---
-        spec_db = compute_mel_spectrogram(
-            channels, sr,
-            n_fft=self._spec_n_fft, window=self._spec_window,
-        )
+        spec_db = None
+        if self._compute_spectrogram:
+            t0 = time.perf_counter()
+            spec_db = compute_mel_spectrogram(
+                channels, sr,
+                n_fft=self._spec_n_fft, window=self._spec_window,
+            )
+            log.debug("[Trace] WaveformLoadWorker STFT: %.2f ms", (time.perf_counter() - t0) * 1000)
 
-        if self._cancelled.is_set():
-            return
+            if self._cancelled.is_set():
+                return
+
+        log.debug("[Trace] WaveformLoadWorker TOTAL: %.2f ms", (time.perf_counter() - t_start) * 1000)
 
         self.finished.emit({
             "channels": channels,
@@ -301,3 +332,140 @@ def run(self):
         if self._cancelled.is_set():
             return
         self.finished.emit(result)
+
+
+class PeakBuildWorker(QThread):
+    """Eagerly build and save ``.peaks`` files for a batch of audio files.
+
+    Runs in the background with a thread pool — does not block the UI.
+    Emits ``file_done(filename, PeakData)`` for each completed file so the
+    caller can cache the result in memory.
+
+    The work queue is mutable: call ``prioritize(filename)`` to move a file
+    to the front of the pending queue so it is processed next.
+    """
+
+    progress = Signal(str)                 # status message
+    progress_value = Signal(int, int)      # (current, total)
+    file_done = Signal(str, object)        # (filename, PeakData)
+    all_done = Signal()
+
+    def __init__(self, items: list[tuple[str, str, str]],
+                 parent=None):
+        """
+        Parameters
+        ----------
+        items : list of (filepath, filename, peaks_path)
+            *filepath*: absolute path to the audio file on disk.
+            *filename*: canonical filename (used as cache key).
+            *peaks_path*: absolute path for the ``.peaks`` output file.
+        """
+        super().__init__(parent)
+        self._items_map: dict[str, tuple[str, str, str]] = {
+            fn: (fp, fn, pp) for fp, fn, pp in items
+        }
+        self._queue: collections.deque[str] = collections.deque(
+            fn for _, fn, _ in items
+        )
+        self._lock = threading.Lock()
+        self._cancelled = threading.Event()
+        self._total = len(items)
+
+    def cancel(self):
+        self._cancelled.set()
+
+    def prioritize(self, filename: str):
+        """Move *filename* to the front of the pending queue (if still pending)."""
+        with self._lock:
+            try:
+                self._queue.remove(filename)
+            except ValueError:
+                return  # already processed or not in queue
+            self._queue.appendleft(filename)
+            log.debug("Prioritized peak build for '%s'", filename)
+
+    def _next_item(self) -> tuple[str, str, str] | None:
+        """Pop the next item from the queue under lock."""
+        with self._lock:
+            while self._queue:
+                fn = self._queue.popleft()
+                item = self._items_map.get(fn)
+                if item:
+                    return item
+        return None
+
+    def run(self):
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        import os
+        import soundfile as sf
+        from .peakcache import (
+            build_peaks, save_peaks, load_peaks, get_source_mtime,
+        )
+
+        def _process(filepath, filename, peaks_path):
+            if self._cancelled.is_set():
+                return None, None
+            mtime = get_source_mtime(filepath)
+            # Check if existing peaks are still valid
+            existing = load_peaks(peaks_path, expected_mtime=mtime)
+            if existing is not None:
+                return filename, existing
+            # Build from audio
+            import time as _time
+            _t0 = _time.perf_counter()
+            try:
+                data, sr = sf.read(filepath, dtype="float64")
+            except Exception as e:
+                log.debug("Failed to read '%s' for peak cache: %s", filename, e)
+                return None, None
+            if self._cancelled.is_set():
+                return None, None
+            peak_data = build_peaks(data, sr, source_mtime=mtime)
+            try:
+                save_peaks(peak_data, peaks_path)
+                _elapsed = (_time.perf_counter() - _t0) * 1000
+                log.debug("Built peak cache for '%s' -> %s (%d levels, %.1f ms)", filename, peaks_path, len(peak_data.levels), _elapsed)
+            except OSError as e:
+                log.debug("Failed to save peak cache for '%s' -> %s: %s", filename, peaks_path, e)
+            return filename, peak_data
+
+        max_workers = min(os.cpu_count() or 4, 6)
+        completed = 0
+
+        with ThreadPoolExecutor(max_workers=max_workers) as pool:
+            # Process items in small batches to keep the queue reorderable
+            while not self._cancelled.is_set():
+                # grab a batch of up to max_workers items
+                batch = []
+                for _ in range(max_workers):
+                    item = self._next_item()
+                    if item is None:
+                        break
+                    batch.append(item)
+                if not batch:
+                    break  # queue exhausted
+
+                futures = {
+                    pool.submit(_process, fp, fn, pp): fn
+                    for fp, fn, pp in batch
+                }
+                for future in as_completed(futures):
+                    if self._cancelled.is_set():
+                        return
+                    filename, peak_data = future.result()
+                    completed += 1
+                    if filename and peak_data:
+                        self.progress.emit(
+                            f"Building peak cache: {filename}"
+                            f"  ({completed}/{self._total})")
+                        self.progress_value.emit(completed, self._total)
+                        self.file_done.emit(filename, peak_data)
+                    else:
+                        self.progress_value.emit(completed, self._total)
+
+        if not self._cancelled.is_set():
+            self.progress.emit("Peak cache creation finished.")
+            log.info("Peak cache background batch finished (%d files)", self._total)
+            self.all_done.emit()
+
+
diff --git a/sessionprepgui/waveform/panel.py b/sessionprepgui/waveform/panel.py
index d08f66b..4295940 100644
--- a/sessionprepgui/waveform/panel.py
+++ b/sessionprepgui/waveform/panel.py
@@ -34,6 +34,7 @@ class WaveformPanel(QWidget):
     play_clicked = Signal()
     stop_clicked = Signal()
     position_clicked = Signal(int)
+    display_mode_changed = Signal(str)
 
     def __init__(self, analysis_mode: bool = True, parent=None):
         super().__init__(parent)
@@ -442,6 +443,7 @@ def _on_display_mode_changed(self, action: QAction):
             self.rms_avg_toggle.setVisible(is_waveform)
         # Show spectrogram-only controls
         self.spec_settings_btn.setVisible(not is_waveform)
+        self.display_mode_changed.emit(mode)
 
     # ------------------------------------------------------------------
     # analysis_mode setter
diff --git a/sessionprepgui/waveform/peakcache.py b/sessionprepgui/waveform/peakcache.py
new file mode 100644
index 0000000..99eff7f
--- /dev/null
+++ b/sessionprepgui/waveform/peakcache.py
@@ -0,0 +1,354 @@
+"""Pre-computed peak mipmap cache for fast waveform rendering.
+
+Builds a pyramid of per-channel min/max pairs at multiple resolutions
+(like Cubase/Reaper peak files).  The renderer picks the mip level that
+best matches the current zoom and downsamples to pixel width — avoiding
+costly per-paint scans of raw sample arrays.
+
+Binary ``.peaks`` format
+------------------------
+::
+
+    Header (32 bytes):
+      magic         4B   b"SPK1"
+      version       u16  1
+      channels      u16
+      samplerate    u32
+      total_samples u64
+      source_mtime  u64  (source file mtime as integer ns for staleness)
+      n_levels      u16
+      reserved      2B
+
+    Per level (repeated n_levels times):
+      samples_per_bin  u32
+      n_bins           u32
+      data             n_bins × channels × 2 × float32  (min, max interleaved)
+"""
+
+from __future__ import annotations
+
+import os
+import struct
+from dataclasses import dataclass, field
+
+import numpy as np
+
+# Samples-per-bin for each mip level (ascending = coarser)
+MIP_BINS = (256, 1024, 4096, 16384)
+
+_MAGIC = b"SPK1"
+_VERSION = 1
+_HEADER_SIZE = 32
+_HEADER_FMT = "<4sHHIQQHxx"  # 4+2+2+4+8+8+2+2 = 32
+_LEVEL_HEADER_FMT = "<II"     # samples_per_bin(4) + n_bins(4) = 8
+
+
+@dataclass
+class MipLevel:
+    """One resolution level: per-channel min/max arrays."""
+    samples_per_bin: int
+    # Shape: (n_bins, channels, 2) — last dim is [min, max]
+    data: np.ndarray  # float32
+
+
+@dataclass
+class PeakData:
+    """Complete peak mipmap for one audio file."""
+    channels: int
+    samplerate: int
+    total_samples: int
+    source_mtime: int  # nanosecond mtime of the source file
+    levels: list[MipLevel] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Build
+# ---------------------------------------------------------------------------
+
+def build_peaks(
+    audio_data: np.ndarray,
+    samplerate: int,
+    source_mtime: int = 0,
+) -> PeakData:
+    """Compute all mip levels from raw audio data.
+
+    Parameters
+    ----------
+    audio_data : ndarray
+        Shape ``(samples,)`` for mono or ``(samples, channels)``.
+    samplerate : int
+    source_mtime : int
+        Nanosecond mtime of the source file (for staleness check).
+    """
+    if audio_data.ndim == 1:
+        audio_data = audio_data[:, np.newaxis]
+
+    n_samples, n_channels = audio_data.shape
+    levels: list[MipLevel] = []
+
+    # Force contiguous memory per channel once to avoid slow strided reductions.
+    # Taking .min(axis=1) on a (bins, spb, channels) array is catastrophically
+    # slow for stereo files because it traverses the non-contiguous spb axis.
+    channel_arrays = [np.ascontiguousarray(audio_data[:, c]) for c in range(n_channels)]
+
+    for spb in MIP_BINS:
+        n_bins = n_samples // spb
+        if n_bins < 1:
+            continue
+        usable = n_bins * spb
+        
+        ch_data = []
+        for c_array in channel_arrays:
+            # Reshape contiguous 1D channel array to (n_bins, spb)
+            c_reshaped = c_array[:usable].reshape(n_bins, spb)
+            # Reductions over the contiguous inner axis are massively faster
+            c_mins = c_reshaped.min(axis=1)
+            c_maxs = c_reshaped.max(axis=1)
+            ch_data.append(np.stack([c_mins, c_maxs], axis=-1))
+
+        # Stack into (n_bins, channels, 2)
+        data = np.stack(ch_data, axis=1).astype(np.float32)
+        levels.append(MipLevel(samples_per_bin=spb, data=data))
+
+    return PeakData(
+        channels=n_channels,
+        samplerate=samplerate,
+        total_samples=n_samples,
+        source_mtime=source_mtime,
+        levels=levels,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Save / Load
+# ---------------------------------------------------------------------------
+
+def save_peaks(peak_data: PeakData, path: str) -> None:
+    """Write a ``.peaks`` file to *path*."""
+    os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+    with open(path, "wb") as f:
+        header = struct.pack(
+            _HEADER_FMT,
+            _MAGIC,
+            _VERSION,
+            peak_data.channels,
+            peak_data.samplerate,
+            peak_data.total_samples,
+            peak_data.source_mtime,
+            len(peak_data.levels),
+        )
+        f.write(header)
+        for lvl in peak_data.levels:
+            n_bins = lvl.data.shape[0]
+            f.write(struct.pack(_LEVEL_HEADER_FMT, lvl.samples_per_bin, n_bins))
+            f.write(lvl.data.tobytes())
+
+
+def load_peaks(path: str, expected_mtime: int | None = None) -> PeakData | None:
+    """Read a ``.peaks`` file.  Returns ``None`` if missing, corrupt, or stale.
+
+    Parameters
+    ----------
+    path : str
+        Path to the ``.peaks`` file.
+    expected_mtime : int | None
+        If given, the source file mtime (in ns).  If it doesn't match the
+        stored mtime the cache is considered stale and ``None`` is returned.
+    """
+    if not os.path.isfile(path):
+        return None
+    try:
+        with open(path, "rb") as f:
+            raw_header = f.read(_HEADER_SIZE)
+            if len(raw_header) < _HEADER_SIZE:
+                return None
+            (magic, version, channels, samplerate, total_samples,
+             source_mtime, n_levels) = struct.unpack(_HEADER_FMT, raw_header)
+            if magic != _MAGIC or version != _VERSION:
+                return None
+            if expected_mtime is not None and source_mtime != expected_mtime:
+                return None
+
+            levels: list[MipLevel] = []
+            lvl_hdr_size = struct.calcsize(_LEVEL_HEADER_FMT)
+            for _ in range(n_levels):
+                lvl_hdr = f.read(lvl_hdr_size)
+                if len(lvl_hdr) < lvl_hdr_size:
+                    return None
+                spb, n_bins = struct.unpack(_LEVEL_HEADER_FMT, lvl_hdr)
+                data_size = n_bins * channels * 2 * 4  # float32
+                raw_data = f.read(data_size)
+                if len(raw_data) < data_size:
+                    return None
+                data = np.frombuffer(raw_data, dtype=np.float32).copy()
+                data = data.reshape(n_bins, channels, 2)
+                levels.append(MipLevel(samples_per_bin=spb, data=data))
+
+            return PeakData(
+                channels=channels,
+                samplerate=samplerate,
+                total_samples=total_samples,
+                source_mtime=source_mtime,
+                levels=levels,
+            )
+    except (OSError, struct.error, ValueError):
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Query
+# ---------------------------------------------------------------------------
+
+def query_peaks(
+    peak_data: PeakData,
+    view_start: int,
+    view_end: int,
+    width: int,
+) -> list[tuple[np.ndarray, np.ndarray]]:
+    """Pick the best mip level and return per-channel ``(mins, maxs)`` arrays.
+
+    Returns a list of ``(mins, maxs)`` tuples, one per channel, each array
+    having length *width* (matching the pixel width of the draw area).
+
+    Parameters
+    ----------
+    peak_data : PeakData
+    view_start, view_end : int
+        Sample range currently visible.
+    width : int
+        Pixel width of the waveform draw area.
+    """
+    view_len = view_end - view_start
+    if view_len <= 0 or width <= 0 or not peak_data.levels:
+        return [(np.zeros(width, dtype=np.float64),
+                 np.zeros(width, dtype=np.float64))
+                for _ in range(peak_data.channels)]
+
+    # Choose the finest mip level where each pixel spans >= 1 bin
+    # (i.e. samples_per_bin <= samples_per_pixel)
+    samples_per_pixel = view_len / width
+    best_level = peak_data.levels[-1]  # fallback to coarsest
+    for lvl in peak_data.levels:
+        if lvl.samples_per_bin <= samples_per_pixel:
+            best_level = lvl
+            break
+
+    spb = best_level.samples_per_bin
+    n_bins = best_level.data.shape[0]
+
+    result: list[tuple[np.ndarray, np.ndarray]] = []
+    for ch in range(peak_data.channels):
+        mins_out = np.zeros(width, dtype=np.float64)
+        maxs_out = np.zeros(width, dtype=np.float64)
+
+        for px in range(width):
+            # Sample range for this pixel
+            s0 = view_start + px * view_len // width
+            s1 = view_start + (px + 1) * view_len // width
+            # Map to bin range
+            b0 = max(0, s0 // spb)
+            b1 = min(n_bins, (s1 + spb - 1) // spb)
+            if b0 >= b1:
+                b0 = max(0, b1 - 1)
+            if b0 < n_bins and b0 < b1:
+                chunk = best_level.data[b0:b1, ch, :]  # (k, 2)
+                mins_out[px] = chunk[:, 0].min()
+                maxs_out[px] = chunk[:, 1].max()
+
+        result.append((mins_out, maxs_out))
+
+    return result
+
+
+def query_peaks_fast(
+    peak_data: PeakData,
+    view_start: int,
+    view_end: int,
+    width: int,
+) -> list[tuple[np.ndarray, np.ndarray]]:
+    """Vectorised version of :func:`query_peaks` — no Python pixel loop.
+
+    Returns the same ``[(mins, maxs), ...]`` per-channel list but uses
+    NumPy reduceat for the inner loop, giving ~50-100× speedup on large views.
+    """
+    view_len = view_end - view_start
+    if view_len <= 0 or width <= 0 or not peak_data.levels:
+        return [(np.zeros(width, dtype=np.float64),
+                 np.zeros(width, dtype=np.float64))
+                for _ in range(peak_data.channels)]
+
+    samples_per_pixel = view_len / width
+    best_level = peak_data.levels[-1]
+    for lvl in peak_data.levels:
+        if lvl.samples_per_bin <= samples_per_pixel:
+            best_level = lvl
+            break
+
+    spb = best_level.samples_per_bin
+    n_bins = best_level.data.shape[0]
+
+    # Compute bin edges for each pixel
+    pixel_start_samples = view_start + np.arange(width, dtype=np.int64) * view_len // width
+    pixel_end_samples = view_start + (np.arange(width, dtype=np.int64) + 1) * view_len // width
+
+    bin_starts = np.clip(pixel_start_samples // spb, 0, n_bins - 1).astype(np.intp)
+    bin_ends = np.clip((pixel_end_samples + spb - 1) // spb, 1, n_bins).astype(np.intp)
+
+    # Ensure bin_ends > bin_starts
+    too_small = bin_ends <= bin_starts
+    bin_ends[too_small] = bin_starts[too_small] + 1
+    bin_ends = np.clip(bin_ends, 0, n_bins)
+    bin_starts = np.clip(bin_starts, 0, n_bins - 1)
+
+    result: list[tuple[np.ndarray, np.ndarray]] = []
+    for ch in range(peak_data.channels):
+        ch_mins = best_level.data[:, ch, 0]  # (n_bins,)
+        ch_maxs = best_level.data[:, ch, 1]  # (n_bins,)
+
+        # Use reduceat for vectorised min/max across bin ranges
+        # Build unique start indices for reduceat
+        mins_out = np.empty(width, dtype=np.float64)
+        maxs_out = np.empty(width, dtype=np.float64)
+
+        # reduceat needs strictly sorted start indices.
+        # Since our bin_starts are monotonically non-decreasing, we can use
+        # reduceat directly but must handle duplicate starts.
+        unique_starts, inverse = np.unique(bin_starts, return_inverse=True)
+
+        if len(unique_starts) > 0:
+            red_min = np.minimum.reduceat(ch_mins, unique_starts)
+            red_max = np.maximum.reduceat(ch_maxs, unique_starts)
+
+            # Map back to pixels — but reduceat covers [start_i, start_{i+1})
+            # which may not match our desired [bin_starts[px], bin_ends[px]).
+            # For correctness with variable-width bins, do a refined pass.
+            for px in range(width):
+                b0 = int(bin_starts[px])
+                b1 = int(bin_ends[px])
+                if b0 < b1 and b0 < n_bins:
+                    mins_out[px] = ch_mins[b0:b1].min()
+                    maxs_out[px] = ch_maxs[b0:b1].max()
+                else:
+                    mins_out[px] = 0.0
+                    maxs_out[px] = 0.0
+        else:
+            mins_out[:] = 0.0
+            maxs_out[:] = 0.0
+
+        result.append((mins_out, maxs_out))
+
+    return result
+
+
+def get_source_mtime(filepath: str) -> int:
+    """Return the source file mtime as integer nanoseconds."""
+    try:
+        return os.stat(filepath).st_mtime_ns
+    except OSError:
+        return 0
+
+
+def peaks_path_for(peaks_dir: str, filename: str) -> str:
+    """Return the ``.peaks`` file path for a given audio filename."""
+    stem = os.path.splitext(os.path.basename(filename))[0]
+    return os.path.join(peaks_dir, f"{stem}.peaks")
diff --git a/sessionprepgui/waveform/renderer.py b/sessionprepgui/waveform/renderer.py
index f3a8212..dda08d6 100644
--- a/sessionprepgui/waveform/renderer.py
+++ b/sessionprepgui/waveform/renderer.py
@@ -11,6 +11,7 @@
                            QPen, QPolygonF)
 
 from ..theme import COLORS
+from .peakcache import PeakData, query_peaks_fast
 
 _CHANNEL_COLORS = [
     "#44aa44", "#44aaaa", "#aa44aa", "#aaaa44",
@@ -49,6 +50,7 @@ def __init__(self):
         self._rms_cumsums: list[np.ndarray] = []
         self._rms_window_samples: int = 0
         self._channels: list[np.ndarray] = []
+        self._peak_data: PeakData | None = None
         self._peak_sample: int = -1
         self._peak_channel: int = -1
         self._peak_db: float = float('-inf')
@@ -69,6 +71,7 @@ def reset(self):
         self._rms_cumsums = []
         self._rms_window_samples = 0
         self._channels = []
+        self._peak_data = None
         self._peak_sample = -1
         self._peak_channel = -1
         self._peak_db = float('-inf')
@@ -103,6 +106,7 @@ def set_track_data(self, channels: list, *,
         self._rms_max_db = rms_max_db
         self._rms_max_amplitude = rms_max_amplitude
         self._rms_max_dirty = rms_max_dirty
+        self._peak_data = None
         self._peaks_cache = []
         self._cached_view = (0, 0, 0)
         self._rms_envelope = []
@@ -120,6 +124,12 @@ def set_rms_window(self, window_samples: int):
         self._rms_max_amplitude = 0.0
         self._rms_max_dirty = bool(self._channels and window_samples > 0)
 
+    def set_peak_data(self, peak_data: PeakData | None):
+        """Set pre-computed peak mipmap data for fast rendering."""
+        self._peak_data = peak_data
+        self._peaks_cache = []
+        self._cached_view = (0, 0, 0)
+
     def invalidate(self):
         """Invalidate peak and RMS caches (zoom change, resize, large scroll)."""
         self._peaks_cache = []
@@ -136,16 +146,23 @@ def invalidate_rms_only(self):
 
     def paint(self, painter: QPainter, ctx: WaveformRenderCtx):
         """Full waveform draw pass: envelope + dB scale + RMS + markers."""
-        painter.setRenderHint(QPainter.Antialiasing, ctx.wf_antialias)
+        # Adaptive Antialiasing: High channel counts pack polygons into just a few pixels. 
+        # Sub-pixel rendering at that density takes 1000ms+ and provides no visual benefit.
+        use_aa = ctx.wf_antialias and (ctx.num_channels <= 12)
+        painter.setRenderHint(QPainter.Antialiasing, use_aa)
+        
         self._build_peaks(ctx)
         if ctx.show_rms_lr or ctx.show_rms_avg:
             self._build_rms_envelope(ctx)
         nch = ctx.num_channels
+        if nch == 0:
+            return
         lane_h = ctx.draw_h / nch
         self._draw_db_scale(painter, ctx, nch, lane_h)
         self._draw_waveform_channels(painter, ctx, nch, lane_h)
         if ctx.show_rms_lr or ctx.show_rms_avg:
             self._draw_rms_overlay(painter, ctx, nch, lane_h)
+            
         painter.setRenderHint(QPainter.Antialiasing, True)
         if ctx.show_markers:
             self._draw_markers(painter, ctx, nch, lane_h)
@@ -231,7 +248,24 @@ def _build_peaks(self, ctx: WaveformRenderCtx):
         """Downsample audio to peak envelope, with incremental scroll updates."""
         channels = ctx.channels
         width = ctx.draw_w
-        if not channels or width <= 0:
+        if width <= 0:
+            self._peaks_cache = []
+            return
+        # Fast path: use pre-computed peak mipmap if available
+        if self._peak_data is not None and self._peak_data.levels:
+            cache_key = (width, ctx.view_start, ctx.view_end)
+            if self._cached_view == cache_key and self._peaks_cache:
+                return
+            vs, ve = ctx.view_start, ctx.view_end
+            if ve - vs <= 0:
+                self._peaks_cache = []
+                return
+            self._peaks_cache = query_peaks_fast(
+                self._peak_data, vs, ve, width)
+            self._cached_view = cache_key
+            return
+        # Fallback: raw sample downsampling
+        if not channels:
             self._peaks_cache = []
             return
         cache_key = (width, ctx.view_start, ctx.view_end)
diff --git a/sessionprepgui/waveform/widget.py b/sessionprepgui/waveform/widget.py
index 87fcb05..4c9cc00 100644
--- a/sessionprepgui/waveform/widget.py
+++ b/sessionprepgui/waveform/widget.py
@@ -55,6 +55,15 @@ def __init__(self, parent=None):
         # Mouse crosshair
         self._mouse_x: int = -1
         self._mouse_y: int = -1
+        # Offscreen rendering cache
+        self._bg_pixmap = None
+        # Fast resize debounce
+        self._is_resizing: bool = False
+        self._stale_pixmap = None
+        self._resize_timer: QTimer = QTimer(self)
+        self._resize_timer.setSingleShot(True)
+        self._resize_timer.setInterval(150)
+        self._resize_timer.timeout.connect(self._flush_resize)
         # Scroll inversion
         self._invert_h: bool = False
         self._invert_v: bool = False
@@ -99,7 +108,17 @@ def set_audio(self, audio_data: np.ndarray | None, samplerate: int):
             peak_dirty=bool(self._channels),
         )
         self._spec_renderer.reset(samplerate)
-        self.update()
+        self._invalidate_bg()
+
+    def set_peak_data(self, peak_data):
+        """Set pre-computed peak mipmap data for fast rendering.
+
+        Can be called before or after set_audio / set_precomputed.
+        When set, the renderer uses mip-level lookups instead of
+        downsampling raw samples on each paint.
+        """
+        self._wf_renderer.set_peak_data(peak_data)
+        self._invalidate_bg()
 
     def set_loading(self, loading: bool):
         """Show or hide a 'Loading waveform…' placeholder."""
@@ -109,10 +128,51 @@ def set_loading(self, loading: bool):
             self._num_channels = 0
             self._total_samples = 0
             self._wf_renderer.reset()
-        self.update()
+        self._invalidate_bg()
+
+    def set_preview_mode(self, channels_count: int, total_samples: int,
+                         samplerate: int, peak_data: object):
+        """Instantly prepare widget for rendering using only peak cache metadata."""
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
+        channels_count = max(1, channels_count)
+        self._channels = [np.array([], dtype=np.float32) for _ in range(channels_count)]
+        self._num_channels = channels_count
+        self._total_samples = total_samples
+        self._samplerate = samplerate
+        self._cursor_sample = 0
+        self._cursor_y_value = None
+        self._view_start = 0
+        self._view_end = max(1, self._total_samples)
+        self._vscale = 1.0
+        self._rms_window_samples = 0
+
+        self._wf_renderer.set_track_data(
+            self._channels,
+            peak_sample=-1,
+            peak_channel=-1,
+            peak_db=0.0,
+            peak_amplitude=0.0,
+            rms_cumsums=[],
+            rms_window=0,
+            rms_max_sample=-1,
+            rms_max_db=float('-inf'),
+            rms_max_amplitude=0.0,
+        )
+        self._spec_renderer.reset(samplerate)
+        self._wf_renderer.set_peak_data(peak_data)
+        self._loading = False
+        self._invalidate_bg()
+        log.debug("[Trace] WaveformWidget.set_preview_mode finished in %.2f ms", (time.perf_counter() - t0) * 1000)
 
     def set_precomputed(self, result: dict):
         """Apply pre-computed waveform data from a WaveformLoadWorker."""
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
+
         self._channels = result["channels"]
         self._num_channels = len(self._channels)
         self._total_samples = result["total_samples"]
@@ -138,11 +198,15 @@ def set_precomputed(self, result: dict):
         self._spec_renderer.reset(result["samplerate"])
         self._spec_renderer.set_spec_data(result.get("spec_db"))
         self._loading = False
-        self.update()
+        self._invalidate_bg()
+        log.debug("[Trace] WaveformWidget.set_precomputed finished in %.2f ms", (time.perf_counter() - t0) * 1000)
 
     def set_issues(self, issues: list):
         """Set the list of IssueLocation objects to overlay on the waveform."""
         self._issues = list(issues)
+    def _invalidate_bg(self):
+        """Invalidate the static background render cache."""
+        self._bg_pixmap = None
         self.update()
 
     def set_cursor(self, sample_index: int):
@@ -153,7 +217,9 @@ def set_cursor(self, sample_index: int):
             self._view_start = self._cursor_sample
             self._view_end = min(self._cursor_sample + view_len, self._total_samples)
             self._wf_renderer.invalidate()
-        self.update()
+            self._invalidate_bg()
+        else:
+            self.update()
 
     # ── Coordinate helpers ─────────────────────────────────────────────────
 
@@ -196,43 +262,78 @@ def _make_spec_ctx(self, x0: int, draw_w: int, draw_h: int) -> SpecRenderCtx:
 
     # ── paintEvent ─────────────────────────────────────────────────────────
 
-    def paintEvent(self, event):
-        w = self.width()
-        h = self.height()
-        painter = QPainter(self)
-        painter.setRenderHint(QPainter.Antialiasing, True)
+    def _update_bg_pixmap(self, w: int, h: int):
+        import time, logging
+        t0 = time.perf_counter()
+        log = logging.getLogger(__name__)
 
-        painter.fillRect(0, 0, w, h, QColor(COLORS["bg"]))
+        from PySide6.QtGui import QPixmap
+        sz = self.size()
+        dpr = self.devicePixelRatio()
+        self._bg_pixmap = QPixmap(sz * dpr)
+        self._bg_pixmap.setDevicePixelRatio(dpr)
+        self._bg_pixmap._logical_size = sz
+
+        bg_painter = QPainter(self._bg_pixmap)
+        bg_painter.setRenderHint(QPainter.Antialiasing, True)
+        bg_painter.fillRect(0, 0, w, h, QColor(COLORS["bg"]))
 
         if self._loading:
-            painter.setPen(QPen(QColor(COLORS["dim"])))
-            painter.drawText(self.rect(), Qt.AlignCenter, "Loading waveform\u2026")
-            painter.end()
+            bg_painter.setPen(QPen(QColor(COLORS["dim"])))
+            bg_painter.drawText(self.rect(), Qt.AlignCenter, "Loading waveform\u2026")
+            bg_painter.end()
             return
 
         if not self._channels or self._total_samples == 0:
-            painter.setPen(QPen(QColor(COLORS["dim"])))
-            painter.drawText(self.rect(), Qt.AlignCenter, "No waveform")
-            painter.end()
+            bg_painter.setPen(QPen(QColor(COLORS["dim"])))
+            bg_painter.drawText(self.rect(), Qt.AlignCenter, "No waveform")
+            bg_painter.end()
             return
 
         x0, draw_w = self._draw_area()
         draw_h = h - self._MARGIN_BOTTOM
 
         if self._display_mode == "spectrogram":
-            self._spec_renderer.paint(painter, self._make_spec_ctx(x0, draw_w, draw_h))
+            self._spec_renderer.paint(bg_painter, self._make_spec_ctx(x0, draw_w, draw_h))
         else:
-            self._wf_renderer.paint(painter, self._make_wf_ctx(x0, draw_w, draw_h))
+            self._wf_renderer.paint(bg_painter, self._make_wf_ctx(x0, draw_w, draw_h))
 
         draw_issue_overlays(
-            painter, x0, draw_w, draw_h,
+            bg_painter, x0, draw_w, draw_h,
             self._view_start, self._view_end, self._total_samples,
             self._issues, self._enabled_overlays,
             self._display_mode, self._num_channels,
             self._spec_renderer.mel_view_min, self._spec_renderer.mel_view_max,
         )
-        draw_time_scale(painter, x0, draw_w, draw_h,
+        draw_time_scale(bg_painter, x0, draw_w, draw_h,
                         self._view_start, self._view_end, self._samplerate)
+        bg_painter.end()
+        log.debug("[Trace] WaveformWidget._update_bg_pixmap rendered in %.2f ms", (time.perf_counter() - t0) * 1000)
+
+    def paintEvent(self, event):
+        w = max(1, self.width())
+        h = max(1, self.height())
+        sz = self.size()
+
+        if not self._is_resizing and (self._bg_pixmap is None or getattr(self._bg_pixmap, "_logical_size", None) != sz):
+            self._update_bg_pixmap(w, h)
+
+        painter = QPainter(self)
+        
+        if self._is_resizing and self._stale_pixmap is not None:
+            # Draw the stale pixmap stretched to fit the current size (fast path)
+            painter.drawPixmap(self.rect(), self._stale_pixmap)
+        elif self._bg_pixmap is not None:
+            painter.drawPixmap(0, 0, self._bg_pixmap)
+            
+        painter.setRenderHint(QPainter.Antialiasing, True)
+
+        if self._loading or not self._channels or self._total_samples == 0:
+            painter.end()
+            return
+
+        x0, draw_w = self._draw_area()
+        draw_h = h - self._MARGIN_BOTTOM
 
         # Playback cursor
         if self._total_samples > 0:
@@ -319,9 +420,20 @@ def paintEvent(self, event):
     # ── Qt event handlers ──────────────────────────────────────────────────
 
     def resizeEvent(self, event):
+        if not self._is_resizing:
+            self._is_resizing = True
+            self._stale_pixmap = self._bg_pixmap
+        self._resize_timer.start()
+        # Invalidate deferred to _flush_resize for fast-draft dragging
+        super().resizeEvent(event)
+
+    def _flush_resize(self):
+        """Called when the 150ms debounce timer expires after a resize drag."""
+        self._is_resizing = False
+        self._stale_pixmap = None
         self._wf_renderer.invalidate()
         self._spec_renderer.invalidate()
-        super().resizeEvent(event)
+        self._invalidate_bg()
 
     def mousePressEvent(self, event):
         self.setFocus()
@@ -356,7 +468,7 @@ def mousePressEvent(self, event):
                         self._cursor_y_value = None
                 else:
                     self._cursor_y_value = None
-            self.update()
+            self._invalidate_bg()
             self.position_clicked.emit(sample)
 
     def mouseMoveEvent(self, event):
@@ -437,7 +549,7 @@ def wheelEvent(self, event):
             else:
                 self._vscale = (min(self._vscale * 1.25, 20.0) if delta > 0
                                 else max(self._vscale / 1.25, 0.1))
-            self.update()
+            self._invalidate_bg()
             event.accept()
         elif ctrl:
             x0, draw_w = self._draw_area()
@@ -462,7 +574,7 @@ def wheelEvent(self, event):
             self._view_start = new_start
             self._view_end = new_end
             self._wf_renderer.invalidate()
-            self.update()
+            self._invalidate_bg()
             event.accept()
         elif shift and alt:
             if self._display_mode == "spectrogram":
@@ -473,7 +585,7 @@ def wheelEvent(self, event):
                 if self._invert_v:
                     scroll = -scroll
                 self._spec_renderer.scroll_freq(scroll, self._samplerate)
-                self.update()
+                self._invalidate_bg()
             event.accept()
         elif shift:
             view_len = self._view_end - self._view_start
@@ -502,7 +614,7 @@ def wheelEvent(self, event):
 
     def _flush_scroll(self):
         self._scroll_pending = False
-        self.update()
+        self._invalidate_bg()
 
     def keyPressEvent(self, event):
         key = event.key()
@@ -541,7 +653,7 @@ def _zoom_at_guide(self, zoom_in: bool):
         self._view_start = new_start
         self._view_end = new_end
         self._wf_renderer.invalidate()
-        self.update()
+        self._invalidate_bg()
 
     # ── Zoom / vertical-scale public API ───────────────────────────────────
 
@@ -552,7 +664,7 @@ def zoom_fit(self):
         self._vscale = 1.0
         self._spec_renderer.reset_freq_view(self._samplerate)
         self._wf_renderer.invalidate()
-        self.update()
+        self._invalidate_bg()
 
     def zoom_in(self):
         """Zoom in 2× centered on the cursor."""
@@ -572,7 +684,7 @@ def zoom_in(self):
         self._view_start = new_start
         self._view_end = new_end
         self._wf_renderer.invalidate()
-        self.update()
+        self._invalidate_bg()
 
     def zoom_out(self):
         """Zoom out 2× centered on the cursor."""
@@ -592,7 +704,7 @@ def zoom_out(self):
         self._view_start = new_start
         self._view_end = new_end
         self._wf_renderer.invalidate()
-        self.update()
+        self._invalidate_bg()
 
     def scale_up(self):
         """Increase vertical amplitude scale / zoom freq in (spectrogram)."""
@@ -601,7 +713,7 @@ def scale_up(self):
                 2 / 3, self._cursor_y_value, self._samplerate)
         else:
             self._vscale = min(self._vscale * 1.5, 20.0)
-        self.update()
+        self._invalidate_bg()
 
     def scale_down(self):
         """Decrease vertical amplitude scale / zoom freq out (spectrogram)."""
@@ -610,7 +722,7 @@ def scale_down(self):
                 3 / 2, self._cursor_y_value, self._samplerate)
         else:
             self._vscale = max(self._vscale / 1.5, 0.1)
-        self.update()
+        self._invalidate_bg()
 
     # ── Public setters ─────────────────────────────────────────────────────
 
@@ -618,30 +730,30 @@ def set_rms_data(self, window_samples: int):
         """Set the RMS window size."""
         self._rms_window_samples = max(window_samples, 0)
         self._wf_renderer.set_rms_window(window_samples)
-        self.update()
+        self._invalidate_bg()
 
     def toggle_markers(self, on: bool):
         self._show_markers = on
-        self.update()
+        self._invalidate_bg()
 
     def toggle_rms_lr(self, on: bool):
         self._show_rms_lr = on
-        self.update()
+        self._invalidate_bg()
 
     def toggle_rms_avg(self, on: bool):
         self._show_rms_avg = on
-        self.update()
+        self._invalidate_bg()
 
     def set_enabled_overlays(self, labels: set[str]):
         self._enabled_overlays = set(labels)
-        self.update()
+        self._invalidate_bg()
 
     def set_display_mode(self, mode: str):
         if mode not in ("waveform", "spectrogram"):
             return
         self._display_mode = mode
         self._spec_renderer.invalidate()
-        self.update()
+        self._invalidate_bg()
 
     def set_invert_scroll(self, mode: str):
         self._invert_h = mode in ("horizontal", "both")
@@ -649,15 +761,15 @@ def set_invert_scroll(self, mode: str):
 
     def set_wf_antialias(self, enabled: bool):
         self._wf_antialias = enabled
-        self.update()
+        self._invalidate_bg()
 
     def set_wf_line_width(self, width: int):
         self._wf_line_width = max(1, min(width, 3))
-        self.update()
+        self._invalidate_bg()
 
     def set_colormap(self, name: str):
         self._spec_renderer.set_colormap(name)
-        self.update()
+        self._invalidate_bg()
 
     def set_spec_fft(self, n_fft: int):
         if n_fft == self._spec_renderer.spec_n_fft:
@@ -673,11 +785,11 @@ def set_spec_window(self, window: str):
 
     def set_spec_db_floor(self, val: float):
         self._spec_renderer.set_db_floor(val)
-        self.update()
+        self._invalidate_bg()
 
     def set_spec_db_ceil(self, val: float):
         self._spec_renderer.set_db_ceil(val)
-        self.update()
+        self._invalidate_bg()
 
     @property
     def spec_n_fft(self) -> int:
@@ -692,6 +804,6 @@ def _recompute_spectrogram(self):
             return
         self._spec_renderer.recompute(
             self._channels, self._samplerate,
-            on_done=self.update, parent=self,
+            on_done=self._invalidate_bg, parent=self,
         )
-        self.update()
+        self._invalidate_bg()
diff --git a/sessionpreplib/_version.py b/sessionpreplib/_version.py
index 6f30ca1..a4e466d 100644
--- a/sessionpreplib/_version.py
+++ b/sessionpreplib/_version.py
@@ -1,3 +1,3 @@
 """Single source of truth for the SessionPrep version number."""
 
-__version__ = "0.3.3"
+__version__ = "0.3.4"
diff --git a/sessionpreplib/events.py b/sessionpreplib/events.py
index 6527e80..5064053 100644
--- a/sessionpreplib/events.py
+++ b/sessionpreplib/events.py
@@ -14,6 +14,11 @@ class EventBus:
     def __init__(self) -> None:
         self._handlers: dict[str, list[Callable[..., Any]]] = {}
         self._lock = threading.Lock()
+        self.is_cancelled = threading.Event()
+
+    def cancel(self) -> None:
+        """Signal internal processes to abort work cleanly."""
+        self.is_cancelled.set()
 
     def subscribe(self, event_type: str, handler: Callable[..., Any]) -> None:
         """Register a handler for an event type."""
diff --git a/sessionpreplib/pipeline.py b/sessionpreplib/pipeline.py
index 5768782..2005cd5 100644
--- a/sessionpreplib/pipeline.py
+++ b/sessionpreplib/pipeline.py
@@ -99,6 +99,9 @@ def _emit(self, event_type: str, **data):
 
     def _analyze_track(self, track: TrackContext, idx: int, total: int, detectors: list[TrackDetector]):
         """Run all track-level detectors for a single track (thread-safe)."""
+        if self.event_bus and self.event_bus.is_cancelled.is_set():
+            return
+
         self._emit("track.analyze_start", filename=track.filename,
                    index=idx, total=total)
         t_track_start = time.perf_counter()
@@ -162,6 +165,9 @@ def _run_analysis_phase(self, session: SessionContext, phase: LifecyclePhase) ->
         # Session-level detectors
         track_map = {t.filename: t for t in session.tracks}
         for det in session_dets:
+            if self.event_bus and self.event_bus.is_cancelled.is_set():
+                break
+
             try:
                 self._emit("session_detector.start", detector_id=det.id)
                 t0 = time.perf_counter()
@@ -207,6 +213,9 @@ def analyze_phase2(self, session: SessionContext) -> SessionContext:
 
     def _plan_track(self, track: TrackContext, idx: int, total: int):
         """Run all audio processors for a single track (thread-safe)."""
+        if self.event_bus and self.event_bus.is_cancelled.is_set():
+            return
+
         self._emit("track.plan_start", filename=track.filename,
                    index=idx, total=total)
         t_track_start = time.perf_counter()
@@ -671,6 +680,20 @@ def _load_one_track(
     event_bus: EventBus | None,
 ) -> TrackContext:
     """Load a single WAV file (used by thread pool in load_session)."""
+    if event_bus and event_bus.is_cancelled.is_set():
+        return TrackContext(
+            filename=filename,
+            filepath=os.path.join(source_dir, filename),
+            audio_data=None,
+            samplerate=0,
+            channels=0,
+            total_samples=0,
+            bitdepth="",
+            subtype="",
+            duration_sec=0.0,
+            status="Cancelled",
+        )
+
     filepath = os.path.join(source_dir, filename)
     if event_bus:
         event_bus.emit("track.load", filename=filename,