nomadkaraoke · beveradb · Mar 23, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026
diff --git a/.github/workflows/deploy-to-cloudrun.yml b/.github/workflows/deploy-to-cloudrun.yml
@@ -0,0 +1,60 @@
+name: Deploy to Cloud Run
+
+on:
+  # Deploy when a new PyPI release is published
+  workflow_run:
+    workflows: ["Publish to PyPI"]
+    types: [completed]
+
+  # Deploy on changes to Dockerfile or Cloud Run server
+  push:
+    branches: [main]
+    paths:
+      - "Dockerfile.cloudrun"
+      - "audio_separator/remote/deploy_cloudrun.py"
+      - "audio_separator/ensemble_presets.json"
+      - "cloudbuild.yaml"
+
+  # Manual deployment
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    # Only run on successful PyPI publish (or push/manual triggers)
+    if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
+
+    permissions:
+      contents: read
+      id-token: write  # Required for Workload Identity Federation
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Authenticate to Google Cloud
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
+
+      - name: Set up Cloud SDK
+        uses: google-github-actions/setup-gcloud@v2
+
+      # Use Cloud Build for the Docker build — it has native x86 with enough
+      # RAM to load ML models during the build (baking models into the image).
+      - name: Build and push via Cloud Build
+        run: |
+          gcloud builds submit \
+            --config cloudbuild.yaml \
+            --region=us-east4 \
+            --project=nomadkaraoke \
+            --substitutions=SHORT_SHA=${GITHUB_SHA::8}
+
+      - name: Deploy to Cloud Run
+        run: |
+          gcloud run services update audio-separator \
+            --image="us-east4-docker.pkg.dev/nomadkaraoke/audio-separator/api:${GITHUB_SHA::8}" \
+            --region=us-east4 \
+            --project=nomadkaraoke \
+            --quiet
diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun
@@ -0,0 +1,94 @@
+# Audio Separator API - Cloud Run GPU Deployment
+# Optimized for NVIDIA L4 GPU on Google Cloud Run
+#
+# Models are baked into the image for zero cold-start latency.
+# To update models, rebuild the image.
+#
+# Build: docker build -f Dockerfile.cloudrun -t audio-separator-cloudrun .
+# Run:   docker run --gpus all -p 8080:8080 audio-separator-cloudrun
+
+FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04
+
+# Prevent interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install Python 3.12 from deadsnakes PPA (onnxruntime-gpu requires >= 3.11)
+# and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get update && apt-get install -y --no-install-recommends \
+    # Python 3.12
+    python3.12 \
+    python3.12-dev \
+    python3.12-venv \
+    # FFmpeg
+    ffmpeg \
+    # Audio libraries
+    libsndfile1 \
+    libsndfile1-dev \
+    libsox-dev \
+    sox \
+    libportaudio2 \
+    portaudio19-dev \
+    libasound2-dev \
+    libpulse-dev \
+    libjack-dev \
+    libsamplerate0 \
+    libsamplerate0-dev \
+    # Build tools (for compiling Python packages with C extensions)
+    build-essential \
+    gcc \
+    g++ \
+    pkg-config \
+    # Utilities
+    curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && python3.12 --version && ffmpeg -version
+
+# Set Python 3.12 as default and install pip
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
+    && update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
+    && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 \
+    && python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel
+
+# Install audio-separator with GPU support and API dependencies
+COPY . /tmp/audio-separator-src
+RUN cd /tmp/audio-separator-src \
+    && pip install --no-cache-dir ".[gpu]" \
+    && pip install --no-cache-dir \
+        "fastapi>=0.104.0" \
+        "uvicorn[standard]>=0.24.0" \
+        "python-multipart>=0.0.6" \
+        "filetype>=1.2.0" \
+    && rm -rf /tmp/audio-separator-src
+
+# Set up CUDA library paths
+RUN echo '/usr/local/cuda/lib64' >> /etc/ld.so.conf.d/cuda.conf && ldconfig
+
+# Environment configuration
+ENV MODEL_DIR=/models \
+    STORAGE_DIR=/tmp/storage \
+    PORT=8080 \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
+    PATH=/usr/local/cuda/bin:$PATH \
+    PYTHONUNBUFFERED=1
+
+# Create directories
+RUN mkdir -p /models /tmp/storage/outputs
+
+# Bake ensemble preset models into the image.
+# These are the models used by the default presets (instrumental_clean + karaoke).
+# Total: ~1-1.5 GB. This eliminates cold-start model download time.
+COPY scripts/download_preset_models.py /tmp/download_preset_models.py
+RUN python3 /tmp/download_preset_models.py && rm /tmp/download_preset_models.py && ls -lh /models/
+
+# Expose Cloud Run default port
+EXPOSE 8080
+
+# Health check for container orchestration
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:8080/health || exit 1
+
+# Run the API server
+CMD ["python3", "-m", "audio_separator.remote.deploy_cloudrun"]
diff --git a/audio_separator/remote/api_client.py b/audio_separator/remote/api_client.py
@@ -32,6 +32,7 @@ def separate_audio(
         file_path: str,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
+        preset: Optional[str] = None,
         # Output parameters
         output_format: str = "flac",
         output_bitrate: Optional[str] = None,
@@ -76,8 +77,10 @@ def separate_audio(
         files = {"file": (os.path.basename(file_path), open(file_path, "rb"))}
         data = {}
 
-        # Handle model parameters (backwards compatibility)
-        if models:
+        # Handle model/preset parameters
+        if preset:
+            data["preset"] = preset
+        elif models:
             data["models"] = json.dumps(models)
         elif model:
             data["model"] = model
@@ -144,6 +147,7 @@ def separate_audio_and_wait(
         file_path: str,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
+        preset: Optional[str] = None,
         timeout: int = 600,
         poll_interval: int = 10,
         download: bool = True,
@@ -208,13 +212,17 @@ def separate_audio_and_wait(
         import time
 
         # Submit the separation job with all parameters
-        models_desc = models or ([model] if model else ["default"])
-        self.logger.info(f"Submitting separation job for '{file_path}' with models: {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
+        if preset:
+            models_desc = f"preset:{preset}"
+        else:
+            models_desc = models or ([model] if model else ["default"])
+        self.logger.info(f"Submitting separation job for '{file_path}' with {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
 
         result = self.separate_audio(
             file_path,
             model,
             models,
+            preset,
             output_format,
             output_bitrate,
             normalization_threshold,

diff --git a/audio_separator/remote/cli.py b/audio_separator/remote/cli.py
@@ -30,8 +30,9 @@ def main():
     separate_parser = subparsers.add_parser("separate", help="Separate audio files")
     separate_parser.add_argument("audio_files", nargs="+", help="Audio file paths to separate")
 
-    # Model selection
+    # Model selection (mutually exclusive: preset, single model, or multiple models)
     model_group = separate_parser.add_mutually_exclusive_group()
+    model_group.add_argument("-p", "--preset", help="Ensemble preset name (e.g. instrumental_clean, karaoke, vocal_balanced)")
     model_group.add_argument("-m", "--model", help="Single model to use for separation")
     model_group.add_argument("--models", nargs="+", help="Multiple models to use for separation")
 
@@ -168,6 +169,7 @@ def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: l
             kwargs = {
                 "model": args.model,
                 "models": args.models,
+                "preset": args.preset,
                 "timeout": args.timeout,
                 "poll_interval": args.poll_interval,
                 "download": True,  # Always download in CLI