livekit · bnovik0v · Apr 21, 2026 · Jun 4, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -27,7 +27,7 @@ jobs:
         run: uv python install ${{ matrix.python-version }}
 
       - name: Install dependencies
-        run: uv sync --extra train --extra export
+        run: uv sync --extra cpu --extra train --extra export
 
       - name: Run tests
         run: uv run pytest --tb=short -q
diff --git a/README.md b/README.md
@@ -118,16 +118,20 @@ sudo apt install portaudio19-dev
 
 **Installation:**
 
+livekit-wakeword needs an ONNX Runtime backend, installed via the `cpu` extra
+(CPU-only) or the `gpu` extra (CUDA — see [GPU acceleration](#gpu-acceleration)).
+Pick exactly one; they share the `onnxruntime` import path and cannot coexist.
+
 ```bash
-pip install livekit-wakeword
+pip install livekit-wakeword[cpu]
 # or
-uv add livekit-wakeword
+uv add "livekit-wakeword[cpu]"
 ```
 
-For microphone listening, install with the `listener` extra:
+For microphone listening, add the `listener` extra:
 
 ```bash
-pip install livekit-wakeword[listener]
+pip install livekit-wakeword[cpu,listener]
 ```
 
 **Basic inference:**
@@ -240,16 +244,18 @@ brew install espeak-ng ffmpeg portaudio
 sudo apt install espeak-ng libsndfile1 ffmpeg sox portaudio19-dev
 ```
 
+Add the `cpu` backend extra (or `gpu` — see [GPU acceleration](#gpu-acceleration)) alongside the training extras.
+
 **Installation (with pip):**
 
 ```bash
-pip install livekit-wakeword[train,eval,export]
+pip install livekit-wakeword[cpu,train,eval,export]
 ```
 
 **Installation (with uv):**
 
 ```bash
-uv tool install livekit-wakeword[train,eval,export]
+uv tool install "livekit-wakeword[cpu,train,eval,export]"
 ```
 
 **Installation (from source):**
@@ -258,12 +264,29 @@ uv tool install livekit-wakeword[train,eval,export]
 # Install uv (if you don't have it)
 curl -LsSf https://astral.sh/uv/install.sh | sh
 
-# Clone and install
+# Clone and install (pick the cpu OR gpu backend — they're mutually exclusive)
 git clone https://github.com/livekit/livekit-wakeword
 cd livekit-wakeword
-uv sync --all-extras
+uv sync --extra cpu --extra train --extra eval --extra export
+```
+
+<a id="gpu-acceleration"></a>
+**GPU acceleration (training, eval, and inference):**
+
+The `cpu` extra is CPU-only — on a GPU pod this makes feature extraction the pipeline bottleneck (a production augmentation run takes ~14 h on CPU vs ~13 min on an RTX 3090). Swap in the `gpu` extra to unlock the CUDA Execution Provider:
+
+```bash
+# pip
+pip install livekit-wakeword[gpu,train,eval,export]
+
+# uv (from source)
+uv sync --extra gpu --extra train --extra eval --extra export
 ```
 
+Provider selection is then automatic (CUDA if available, CPU otherwise). Force a specific provider with `LIVEKIT_WAKEWORD_ORT_PROVIDERS=CPUExecutionProvider` (comma-separated; useful for reproducibility or opting into CoreML / DirectML / ROCm). `onnxruntime-gpu` requires a matching CUDA toolkit — see the [ONNX Runtime GPU compatibility matrix](https://onnxruntime.ai/docs/install/#cuda-and-cudnn).
+
+> **Switching an existing install** between `cpu` and `gpu`: the `onnxruntime` and `onnxruntime-gpu` wheels share the same `onnxruntime` import path and cannot coexist, so remove the old one first (`pip uninstall -y onnxruntime onnxruntime-gpu`, then reinstall with the desired extra). With `uv`, `[tool.uv].conflicts` enforces this automatically when you `uv sync` a different backend extra from this repo.
+
 **Download models and data:**
 
 ```bash
@@ -339,7 +362,7 @@ tts_backend: voxcpm
 And install `livekit-wakeword` with the `voxcpm` optional dependency:
 
 ```bash
-pip install livekit-wakeword[train,eval,export,voxcpm]
+pip install livekit-wakeword[cpu,train,eval,export,voxcpm]
 ```
 
 > [!WARNING]

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,10 +25,18 @@ classifiers = [
 
 dependencies = [
     "numpy>=1.24",
-    "onnxruntime>=1.17",
 ]
 
 [project.optional-dependencies]
+# ONNX Runtime backend — pick exactly one. Kept out of base deps because
+# `onnxruntime` and `onnxruntime-gpu` share the `onnxruntime` import path and
+# clobber each other on disk if both are installed (see [tool.uv] conflicts).
+cpu = [
+    "onnxruntime>=1.20",
+]
+gpu = [
+    "onnxruntime-gpu>=1.20",
+]
 listener = [
     "pyaudio>=0.2.14",
 ]
@@ -61,7 +69,6 @@ eval = [
 ]
 export = [
     "onnx>=1.15",
-    "onnxruntime>=1.17",
     "onnxscript>=0.6.2",
 ]
 [project.urls]
@@ -95,6 +102,12 @@ include = [
     "src/livekit/wakeword/resources/*.onnx",
 ]
 
+[tool.uv]
+# cpu/gpu both provide the onnxruntime import path — never resolve them together.
+# NOTE: this is honored when developing in this repo (`uv sync --extra gpu`), but
+# does NOT propagate to downstream consumers of the published wheel.
+conflicts = [[{ extra = "cpu" }, { extra = "gpu" }]]
+
 [dependency-groups]
 dev = ["pytest>=8.0", "pytest-cov>=4.0", "pytest-asyncio>=0.23", "ruff>=0.4", "mypy>=1.8"]
 

diff --git a/src/livekit/wakeword/__init__.py b/src/livekit/wakeword/__init__.py
@@ -5,8 +5,9 @@
 
 __version__ = "0.1.0"
 
-# Training / CLI imports are lazy-loaded so that the core inference API
-# works with only numpy + onnxruntime (no torch, pydantic, etc.).
+# Training / CLI imports are lazy-loaded so that the core inference API works
+# with only numpy + an ONNX Runtime backend (the cpu/gpu extra; no torch,
+# pydantic, etc.).
 _LAZY_IMPORTS: dict[str, tuple[str, str]] = {
     "WakeWordConfig": (".config", "WakeWordConfig"),
     "load_config": (".config", "load_config"),

diff --git a/src/livekit/wakeword/_ort_providers.py b/src/livekit/wakeword/_ort_providers.py
@@ -0,0 +1,77 @@
+"""Runtime selection of ONNX Runtime execution providers.
+
+Central helper used everywhere an ``ort.InferenceSession`` is created, so the
+provider list stays consistent across training (feature extraction), eval, and
+inference (``WakeWordModel``).
+
+Default behavior: prefer CUDA, fall back to CPU — driven by whatever ONNX
+Runtime distribution is installed.  A backend is no longer a base dependency;
+install the ``cpu`` extra (``onnxruntime``) or the ``gpu`` extra
+(``onnxruntime-gpu``) to get one — see README for the switch.
+
+Override via the ``LIVEKIT_WAKEWORD_ORT_PROVIDERS`` env var (comma-separated
+provider names) — handy for forcing CPU for reproducibility, or opting into
+less-common providers like CoreML / DirectML / ROCm / TensorRT.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from types import ModuleType
+
+_logger = logging.getLogger(__name__)
+
+_DEFAULT_PREFERENCE: tuple[str, ...] = ("CUDAExecutionProvider", "CPUExecutionProvider")
+_ENV_VAR = "LIVEKIT_WAKEWORD_ORT_PROVIDERS"
+
+
+def import_ort() -> ModuleType:
+    """Import ``onnxruntime`` with an actionable error if no backend is installed.
+
+    livekit-wakeword does not bundle an ONNX Runtime backend — exactly one of the
+    ``cpu`` / ``gpu`` extras must be installed.
+    """
+    try:
+        import onnxruntime as ort
+    except ModuleNotFoundError as exc:
+        raise ModuleNotFoundError(
+            "No ONNX Runtime backend is installed. livekit-wakeword does not bundle "
+            "one — install exactly one of:\n"
+            "  pip install 'livekit-wakeword[cpu]'   # CPU-only\n"
+            "  pip install 'livekit-wakeword[gpu]'   # CUDA (see README)"
+        ) from exc
+    return ort
+
+
+def get_providers() -> list[str]:
+    """Return the ONNX Runtime provider list to pass to ``InferenceSession``.
+
+    Resolution order:
+
+    1. If ``LIVEKIT_WAKEWORD_ORT_PROVIDERS`` is set and non-empty, parse it
+       as a comma-separated list and return verbatim.  ORT itself will reject
+       unknown providers at session-creation time with a clear error.
+    2. Otherwise intersect ``onnxruntime.get_available_providers()`` with
+       ``("CUDAExecutionProvider", "CPUExecutionProvider")`` in that order.
+    3. If neither preferred provider is available (unusual — e.g. a non-CPU
+       build without CUDA), return whatever ORT reports as available so
+       session creation still succeeds.
+    """
+    ort = import_ort()
+
+    override = os.environ.get(_ENV_VAR, "").strip()
+    if override:
+        providers = [p.strip() for p in override.split(",") if p.strip()]
+        _logger.info("ORT providers from %s: %s", _ENV_VAR, providers)
+        return providers
+
+    available = set(ort.get_available_providers())
+    providers = [p for p in _DEFAULT_PREFERENCE if p in available]
+    if not providers:
+        providers = list(ort.get_available_providers())
+    _logger.info("ORT providers (auto-selected): %s", providers)
+    return providers
diff --git a/src/livekit/wakeword/eval/evaluate.py b/src/livekit/wakeword/eval/evaluate.py
@@ -7,10 +7,12 @@
 from pathlib import Path
 
 import numpy as np
-import onnxruntime as ort
 
+from .._ort_providers import get_providers, import_ort
 from ..config import WakeWordConfig
 
+ort = import_ort()
+
 logger = logging.getLogger(__name__)
 
 
@@ -194,7 +196,7 @@ def run_eval(config: WakeWordConfig, model_path: str | Path) -> dict[str, float]
     if not model_path.exists():
         raise FileNotFoundError(f"Model not found: {model_path}")
 
-    session = ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
+    session = ort.InferenceSession(str(model_path), providers=get_providers())
     logger.info(f"Loaded model from {model_path}")
 
     # Load validation data

diff --git a/src/livekit/wakeword/inference/model.py b/src/livekit/wakeword/inference/model.py
@@ -75,7 +75,9 @@ def load_model(self, model_path: str | Path, model_name: str | None = None) -> N
             model_path: Path to the ONNX wake word classifier.
             model_name: Optional name for the model. If None, derived from filename.
         """
-        import onnxruntime as ort
+        from .._ort_providers import get_providers, import_ort
+
+        ort = import_ort()
 
         model_path = Path(model_path)
         if not model_path.exists():
@@ -86,7 +88,7 @@ def load_model(self, model_path: str | Path, model_name: str | None = None) -> N
 
         session = ort.InferenceSession(
             str(model_path),
-            providers=["CPUExecutionProvider"],
+            providers=get_providers(),
         )
         input_name = session.get_inputs()[0].name
         self._classifiers[model_name] = (session, input_name)

diff --git a/src/livekit/wakeword/models/feature_extractor.py b/src/livekit/wakeword/models/feature_extractor.py
@@ -16,6 +16,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class MelSpectrogramFrontend:
     """Stage 1: Raw audio → mel-spectrogram features.
 
@@ -37,11 +38,13 @@ def __init__(self, onnx_path: str | Path):
         self._init_onnx(onnx_path)
 
     def _init_onnx(self, onnx_path: str | Path) -> None:
-        import onnxruntime as ort
+        from .._ort_providers import get_providers, import_ort
+
+        ort = import_ort()
 
         self._onnx_session = ort.InferenceSession(
             str(onnx_path),
-            providers=["CPUExecutionProvider"],
+            providers=get_providers(),
         )
         self._input_name = self._onnx_session.get_inputs()[0].name
         logger.info(f"Loaded mel ONNX model from {onnx_path}")
@@ -78,6 +81,7 @@ def _forward_onnx(self, audio: np.ndarray) -> np.ndarray:
         mel = mel / 10.0 + 2.0
         return mel
 
+
 class SpeechEmbedding:
     """Stage 2: Google's speech_embedding CNN via ONNX runtime.
 
@@ -90,7 +94,9 @@ class SpeechEmbedding:
     """
 
     def __init__(self, onnx_path: str | Path):
-        import onnxruntime as ort
+        from .._ort_providers import get_providers, import_ort
+
+        ort = import_ort()
 
         if not Path(onnx_path).exists():
             raise FileNotFoundError(
@@ -100,7 +106,7 @@ def __init__(self, onnx_path: str | Path):
 
         self._session = ort.InferenceSession(
             str(onnx_path),
-            providers=["CPUExecutionProvider"],
+            providers=get_providers(),
         )
         self._input_name = self._session.get_inputs()[0].name
         logger.info(f"Loaded embedding ONNX model from {onnx_path}")