diff --git a/.github/workflows/deploy-to-cloudrun.yml b/.github/workflows/deploy-to-cloudrun.yml
new file mode 100644
index 0000000..1677ddf
--- /dev/null
+++ b/.github/workflows/deploy-to-cloudrun.yml
@@ -0,0 +1,60 @@
+name: Deploy to Cloud Run
+
+on:
+  # Deploy when a new PyPI release is published
+  workflow_run:
+    workflows: ["Publish to PyPI"]
+    types: [completed]
+
+  # Deploy on changes to Dockerfile or Cloud Run server
+  push:
+    branches: [main]
+    paths:
+      - "Dockerfile.cloudrun"
+      - "audio_separator/remote/deploy_cloudrun.py"
+      - "audio_separator/ensemble_presets.json"
+      - "cloudbuild.yaml"
+
+  # Manual deployment
+  workflow_dispatch:
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    # Only run on successful PyPI publish (or push/manual triggers)
+    if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
+
+    permissions:
+      contents: read
+      id-token: write  # Required for Workload Identity Federation
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Authenticate to Google Cloud
+        uses: google-github-actions/auth@v2
+        with:
+          workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
+          service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
+
+      - name: Set up Cloud SDK
+        uses: google-github-actions/setup-gcloud@v2
+
+      # Use Cloud Build for the Docker build — it has native x86 with enough
+      # RAM to load ML models during the build (baking models into the image).
+      - name: Build and push via Cloud Build
+        run: |
+          gcloud builds submit \
+            --config cloudbuild.yaml \
+            --region=us-east4 \
+            --project=nomadkaraoke \
+            --substitutions=SHORT_SHA=${GITHUB_SHA::8}
+
+      - name: Deploy to Cloud Run
+        run: |
+          gcloud run services update audio-separator \
+            --image="us-east4-docker.pkg.dev/nomadkaraoke/audio-separator/api:${GITHUB_SHA::8}" \
+            --region=us-east4 \
+            --project=nomadkaraoke \
+            --quiet
diff --git a/Dockerfile.cloudrun b/Dockerfile.cloudrun
new file mode 100644
index 0000000..f2b104f
--- /dev/null
+++ b/Dockerfile.cloudrun
@@ -0,0 +1,94 @@
+# Audio Separator API - Cloud Run GPU Deployment
+# Optimized for NVIDIA L4 GPU on Google Cloud Run
+#
+# Models are baked into the image for zero cold-start latency.
+# To update models, rebuild the image.
+#
+# Build: docker build -f Dockerfile.cloudrun -t audio-separator-cloudrun .
+# Run:   docker run --gpus all -p 8080:8080 audio-separator-cloudrun
+
+FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04
+
+# Prevent interactive prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install Python 3.12 from deadsnakes PPA (onnxruntime-gpu requires >= 3.11)
+# and system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common \
+    && add-apt-repository -y ppa:deadsnakes/ppa \
+    && apt-get update && apt-get install -y --no-install-recommends \
+    # Python 3.12
+    python3.12 \
+    python3.12-dev \
+    python3.12-venv \
+    # FFmpeg
+    ffmpeg \
+    # Audio libraries
+    libsndfile1 \
+    libsndfile1-dev \
+    libsox-dev \
+    sox \
+    libportaudio2 \
+    portaudio19-dev \
+    libasound2-dev \
+    libpulse-dev \
+    libjack-dev \
+    libsamplerate0 \
+    libsamplerate0-dev \
+    # Build tools (for compiling Python packages with C extensions)
+    build-essential \
+    gcc \
+    g++ \
+    pkg-config \
+    # Utilities
+    curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && python3.12 --version && ffmpeg -version
+
+# Set Python 3.12 as default and install pip
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
+    && update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
+    && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 \
+    && python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel
+
+# Install audio-separator with GPU support and API dependencies
+COPY . /tmp/audio-separator-src
+RUN cd /tmp/audio-separator-src \
+    && pip install --no-cache-dir ".[gpu]" \
+    && pip install --no-cache-dir \
+        "fastapi>=0.104.0" \
+        "uvicorn[standard]>=0.24.0" \
+        "python-multipart>=0.0.6" \
+        "filetype>=1.2.0" \
+    && rm -rf /tmp/audio-separator-src
+
+# Set up CUDA library paths
+RUN echo '/usr/local/cuda/lib64' >> /etc/ld.so.conf.d/cuda.conf && ldconfig
+
+# Environment configuration
+ENV MODEL_DIR=/models \
+    STORAGE_DIR=/tmp/storage \
+    PORT=8080 \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
+    PATH=/usr/local/cuda/bin:$PATH \
+    PYTHONUNBUFFERED=1
+
+# Create directories
+RUN mkdir -p /models /tmp/storage/outputs
+
+# Bake ensemble preset models into the image.
+# These are the models used by the default presets (instrumental_clean + karaoke).
+# Total: ~1-1.5 GB. This eliminates cold-start model download time.
+COPY scripts/download_preset_models.py /tmp/download_preset_models.py
+RUN python3 /tmp/download_preset_models.py && rm /tmp/download_preset_models.py && ls -lh /models/
+
+# Expose Cloud Run default port
+EXPOSE 8080
+
+# Health check for container orchestration
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:8080/health || exit 1
+
+# Run the API server
+CMD ["python3", "-m", "audio_separator.remote.deploy_cloudrun"]
diff --git a/audio_separator/remote/api_client.py b/audio_separator/remote/api_client.py
index a70fed3..7a6d6d2 100644
--- a/audio_separator/remote/api_client.py
+++ b/audio_separator/remote/api_client.py
@@ -32,6 +32,7 @@ def separate_audio(
         file_path: str,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
+        preset: Optional[str] = None,
         # Output parameters
         output_format: str = "flac",
         output_bitrate: Optional[str] = None,
@@ -76,8 +77,10 @@ def separate_audio(
         files = {"file": (os.path.basename(file_path), open(file_path, "rb"))}
         data = {}
 
-        # Handle model parameters (backwards compatibility)
-        if models:
+        # Handle model/preset parameters
+        if preset:
+            data["preset"] = preset
+        elif models:
             data["models"] = json.dumps(models)
         elif model:
             data["model"] = model
@@ -144,6 +147,7 @@ def separate_audio_and_wait(
         file_path: str,
         model: Optional[str] = None,
         models: Optional[List[str]] = None,
+        preset: Optional[str] = None,
         timeout: int = 600,
         poll_interval: int = 10,
         download: bool = True,
@@ -208,13 +212,17 @@ def separate_audio_and_wait(
         import time
 
         # Submit the separation job with all parameters
-        models_desc = models or ([model] if model else ["default"])
-        self.logger.info(f"Submitting separation job for '{file_path}' with models: {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
+        if preset:
+            models_desc = f"preset:{preset}"
+        else:
+            models_desc = models or ([model] if model else ["default"])
+        self.logger.info(f"Submitting separation job for '{file_path}' with {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
 
         result = self.separate_audio(
             file_path,
             model,
             models,
+            preset,
             output_format,
             output_bitrate,
             normalization_threshold,
diff --git a/audio_separator/remote/cli.py b/audio_separator/remote/cli.py
index a5667a9..6dfb2ce 100644
--- a/audio_separator/remote/cli.py
+++ b/audio_separator/remote/cli.py
@@ -30,8 +30,9 @@ def main():
     separate_parser = subparsers.add_parser("separate", help="Separate audio files")
     separate_parser.add_argument("audio_files", nargs="+", help="Audio file paths to separate")
 
-    # Model selection
+    # Model selection (mutually exclusive: preset, single model, or multiple models)
     model_group = separate_parser.add_mutually_exclusive_group()
+    model_group.add_argument("-p", "--preset", help="Ensemble preset name (e.g. instrumental_clean, karaoke, vocal_balanced)")
     model_group.add_argument("-m", "--model", help="Single model to use for separation")
     model_group.add_argument("--models", nargs="+", help="Multiple models to use for separation")
 
@@ -168,6 +169,7 @@ def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: l
             kwargs = {
                 "model": args.model,
                 "models": args.models,
+                "preset": args.preset,
                 "timeout": args.timeout,
                 "poll_interval": args.poll_interval,
                 "download": True,  # Always download in CLI
diff --git a/audio_separator/remote/deploy_cloudrun.py b/audio_separator/remote/deploy_cloudrun.py
new file mode 100644
index 0000000..6fa5dc0
--- /dev/null
+++ b/audio_separator/remote/deploy_cloudrun.py
@@ -0,0 +1,634 @@
+"""
+Audio Separator API - Cloud Run GPU Deployment
+
+A FastAPI service for separating vocals from instrumental tracks using audio-separator,
+deployed on Google Cloud Run with L4 GPU acceleration.
+
+This is the GCP equivalent of deploy_modal.py — same API contract, different infrastructure.
+Models are downloaded from GCS on startup and cached in the container's local filesystem.
+
+Usage with Remote CLI:
+1. Install audio-separator package: pip install audio-separator
+2. Set environment variable: export AUDIO_SEPARATOR_API_URL="https://your-cloudrun-url.run.app"
+3. Use the remote CLI:
+   - audio-separator-remote separate song.mp3
+   - audio-separator-remote separate song.mp3 --model UVR-MDX-NET-Inst_HQ_4
+   - audio-separator-remote status <task_id>
+   - audio-separator-remote models
+   - audio-separator-remote download <task_id> <filename>
+"""
+
+import asyncio
+import hashlib
+import json
+import logging
+import os
+import re
+import shutil
+import threading
+import traceback
+import typing
+import uuid
+from importlib.metadata import version
+from typing import Optional
+from urllib.parse import quote
+
+import filetype
+import uvicorn
+from fastapi import FastAPI, File, Form, HTTPException, Response, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.responses import PlainTextResponse
+from starlette.responses import Response as StarletteResponse
+
+logger = logging.getLogger("audio-separator-api")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+
+# Constants
+MODEL_DIR = os.environ.get("MODEL_DIR", "/models")
+STORAGE_DIR = os.environ.get("STORAGE_DIR", "/tmp/storage")
+MODEL_BUCKET = os.environ.get("MODEL_BUCKET", "")
+PORT = int(os.environ.get("PORT", "8080"))
+
+# In-memory job status tracking (one instance handles one job at a time on Cloud Run GPU)
+job_status_store: dict[str, dict] = {}
+
+# Track model readiness
+models_ready = False
+
+
+def generate_file_hash(filename: str) -> str:
+    """Generate a short, stable hash for a filename to use in download URLs."""
+    return hashlib.sha256(filename.encode("utf-8")).hexdigest()[:16]
+
+
+try:
+    AUDIO_SEPARATOR_VERSION = version("audio-separator")
+except Exception:
+    AUDIO_SEPARATOR_VERSION = "unknown"
+
+
+def download_models_from_gcs():
+    """Download models from GCS bucket on startup."""
+    global models_ready
+
+    if not MODEL_BUCKET:
+        logger.info("MODEL_BUCKET not set, skipping GCS model download (models will be downloaded on demand)")
+        models_ready = True
+        return
+
+    try:
+        from google.cloud import storage
+
+        client = storage.Client()
+        bucket = client.bucket(MODEL_BUCKET)
+        blobs = list(bucket.list_blobs())
+
+        os.makedirs(MODEL_DIR, exist_ok=True)
+
+        for blob in blobs:
+            local_path = os.path.join(MODEL_DIR, blob.name)
+            if os.path.exists(local_path):
+                # Check size to skip already-downloaded models
+                if os.path.getsize(local_path) == blob.size:
+                    logger.info(f"Model already cached: {blob.name} ({blob.size / 1024 / 1024:.1f} MB)")
+                    continue
+
+            logger.info(f"Downloading model: {blob.name} ({blob.size / 1024 / 1024:.1f} MB)")
+            os.makedirs(os.path.dirname(local_path), exist_ok=True)
+            blob.download_to_filename(local_path)
+            logger.info(f"Downloaded: {blob.name}")
+
+        models_ready = True
+        logger.info(f"All models ready in {MODEL_DIR}")
+
+    except Exception as e:
+        logger.error(f"Failed to download models from GCS: {e}")
+        # Still mark as ready — models can be downloaded on demand by Separator
+        models_ready = True
+
+
+def separate_audio_sync(
+    audio_data: bytes,
+    filename: str,
+    task_id: str,
+    models: Optional[list] = None,
+    preset: Optional[str] = None,
+    output_format: str = "flac",
+    output_bitrate: Optional[str] = None,
+    normalization_threshold: float = 0.9,
+    amplification_threshold: float = 0.0,
+    output_single_stem: Optional[str] = None,
+    invert_using_spec: bool = False,
+    sample_rate: int = 44100,
+    use_soundfile: bool = False,
+    use_autocast: bool = False,
+    custom_output_names: Optional[dict] = None,
+    # MDX parameters
+    mdx_segment_size: int = 256,
+    mdx_overlap: float = 0.25,
+    mdx_batch_size: int = 1,
+    mdx_hop_length: int = 1024,
+    mdx_enable_denoise: bool = False,
+    # VR parameters
+    vr_batch_size: int = 1,
+    vr_window_size: int = 512,
+    vr_aggression: int = 5,
+    vr_enable_tta: bool = False,
+    vr_high_end_process: bool = False,
+    vr_enable_post_process: bool = False,
+    vr_post_process_threshold: float = 0.2,
+    # Demucs parameters
+    demucs_segment_size: str = "Default",
+    demucs_shifts: int = 2,
+    demucs_overlap: float = 0.25,
+    demucs_segments_enabled: bool = True,
+    # MDXC parameters
+    mdxc_segment_size: int = 256,
+    mdxc_override_model_segment_size: bool = False,
+    mdxc_overlap: int = 8,
+    mdxc_batch_size: int = 1,
+    mdxc_pitch_shift: int = 0,
+) -> dict:
+    """Separate audio into stems. Runs synchronously (Cloud Run GPU handles one job at a time)."""
+    from audio_separator.separator import Separator
+
+    all_output_files = {}
+    models_used = []
+
+    def update_status(status: str, progress: int = 0, error: str = None, files: dict = None):
+        status_data = {
+            "task_id": task_id,
+            "status": status,
+            "progress": progress,
+            "original_filename": filename,
+            "models_used": models_used,
+            "total_models": len(models) if models else 1,
+            "current_model_index": 0,
+            "files": files or {},
+        }
+        if error:
+            status_data["error"] = error
+        job_status_store[task_id] = status_data
+
+    try:
+        os.makedirs(f"{STORAGE_DIR}/outputs/{task_id}", exist_ok=True)
+        output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
+
+        update_status("processing", 5)
+
+        # Strip existing stem markers from filename (e.g. "_(Vocals)_", "_(Instrumental)_")
+        # to prevent the Separator from confusing them with output stem names during
+        # chained separations (Stage 1 output → Stage 2 input).
+        clean_filename = re.sub(r"_\([^)]+\)_", "_", filename)
+        input_file_path = os.path.join(output_dir, clean_filename)
+        with open(input_file_path, "wb") as f:
+            f.write(audio_data)
+
+        update_status("processing", 10)
+
+        # Build separator kwargs
+        separator_kwargs = {
+            "log_level": logging.INFO,
+            "model_file_dir": MODEL_DIR,
+            "output_dir": output_dir,
+            "output_format": output_format,
+            "output_bitrate": output_bitrate,
+            "normalization_threshold": normalization_threshold,
+            "amplification_threshold": amplification_threshold,
+            "output_single_stem": output_single_stem,
+            "invert_using_spec": invert_using_spec,
+            "sample_rate": sample_rate,
+            "use_soundfile": use_soundfile,
+            "use_autocast": use_autocast,
+            "mdx_params": {
+                "hop_length": mdx_hop_length,
+                "segment_size": mdx_segment_size,
+                "overlap": mdx_overlap,
+                "batch_size": mdx_batch_size,
+                "enable_denoise": mdx_enable_denoise,
+            },
+            "vr_params": {
+                "batch_size": vr_batch_size,
+                "window_size": vr_window_size,
+                "aggression": vr_aggression,
+                "enable_tta": vr_enable_tta,
+                "enable_post_process": vr_enable_post_process,
+                "post_process_threshold": vr_post_process_threshold,
+                "high_end_process": vr_high_end_process,
+            },
+            "demucs_params": {
+                "segment_size": demucs_segment_size,
+                "shifts": demucs_shifts,
+                "overlap": demucs_overlap,
+                "segments_enabled": demucs_segments_enabled,
+            },
+            "mdxc_params": {
+                "segment_size": mdxc_segment_size,
+                "batch_size": mdxc_batch_size,
+                "overlap": mdxc_overlap,
+                "override_model_segment_size": mdxc_override_model_segment_size,
+                "pitch_shift": mdxc_pitch_shift,
+            },
+        }
+
+        if preset:
+            # Use ensemble preset — Separator handles model resolution
+            separator_kwargs["ensemble_preset"] = preset
+            logger.info(f"Using ensemble preset: {preset}")
+
+            separator = Separator(**separator_kwargs)
+            separator.load_model()  # Preset models loaded automatically
+            models_used.append(f"preset:{preset}")
+
+            update_status("processing", 50)
+            output_files = separator.separate(input_file_path, custom_output_names=custom_output_names)
+
+            if not output_files:
+                error_msg = f"Separation with preset {preset} produced no output files"
+                update_status("error", 0, error=error_msg)
+                return {"task_id": task_id, "status": "error", "error": error_msg, "models_used": models_used}
+
+            for f in output_files:
+                fname = os.path.basename(f)
+                all_output_files[generate_file_hash(fname)] = fname
+
+        else:
+            # Traditional multi-model processing (no ensembling)
+            if models is None or len(models) == 0:
+                models_to_run = [None]
+            else:
+                models_to_run = models
+
+            total_models = len(models_to_run)
+
+            for model_index, model_name in enumerate(models_to_run):
+                base_progress = 10 + (model_index * 80 // total_models)
+                model_progress_range = 80 // total_models
+
+                logger.info(f"Processing model {model_index + 1}/{total_models}: {model_name or 'default'}")
+                update_status("processing", base_progress + (model_progress_range // 4))
+
+                separator = Separator(**separator_kwargs)
+
+                update_status("processing", base_progress + (model_progress_range // 2))
+                if model_name:
+                    separator.load_model(model_name)
+                    models_used.append(model_name)
+                else:
+                    separator.load_model()
+                    models_used.append("default")
+
+                update_status("processing", base_progress + (3 * model_progress_range // 4))
+
+                model_custom_output_names = None
+                if total_models > 1 and custom_output_names:
+                    model_suffix = f"_{models_used[-1].replace('.', '_').replace('/', '_')}"
+                    model_custom_output_names = {stem: f"{name}{model_suffix}" for stem, name in custom_output_names.items()}
+                elif custom_output_names:
+                    model_custom_output_names = custom_output_names
+
+                output_files = separator.separate(input_file_path, custom_output_names=model_custom_output_names)
+
+                if not output_files:
+                    error_msg = f"Separation with model {models_used[-1]} produced no output files"
+                    update_status("error", 0, error=error_msg)
+                    return {"task_id": task_id, "status": "error", "error": error_msg, "models_used": models_used}
+
+                for f in output_files:
+                    fname = os.path.basename(f)
+                    all_output_files[generate_file_hash(fname)] = fname
+
+        update_status("completed", 100, files=all_output_files)
+        logger.info(f"Separation completed. {len(all_output_files)} output files.")
+        return {"task_id": task_id, "status": "completed", "files": all_output_files, "models_used": models_used}
+
+    except Exception as e:
+        logger.error(f"Separation error: {e}")
+        traceback.print_exc()
+        update_status("error", 0, error=str(e))
+
+        # Clean up on error
+        output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
+        if os.path.exists(output_dir):
+            shutil.rmtree(output_dir, ignore_errors=True)
+
+        return {"task_id": task_id, "status": "error", "error": str(e), "models_used": models_used}
+
+
+# --- FastAPI Application ---
+
+class PrettyJSONResponse(StarletteResponse):
+    media_type = "application/json"
+
+    def render(self, content: typing.Any) -> bytes:
+        return json.dumps(content, ensure_ascii=False, allow_nan=False, indent=4, separators=(", ", ": ")).encode("utf-8")
+
+
+web_app = FastAPI(
+    title="Audio Separator API",
+    description="Separate vocals from instrumental tracks using AI (Cloud Run GPU)",
+    version=AUDIO_SEPARATOR_VERSION,
+)
+
+web_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
+
+
+@web_app.post("/separate")
+async def separate_audio(
+    file: UploadFile = File(..., description="Audio file to separate"),
+    model: Optional[str] = Form(None, description="Single model to use for separation"),
+    models: Optional[str] = Form(None, description='JSON list of models, e.g. ["model1.ckpt", "model2.onnx"]'),
+    preset: Optional[str] = Form(None, description="Ensemble preset name (e.g. instrumental_clean, karaoke)"),
+    # Output parameters
+    output_format: str = Form("flac", description="Output format"),
+    output_bitrate: Optional[str] = Form(None, description="Output bitrate"),
+    normalization_threshold: float = Form(0.9),
+    amplification_threshold: float = Form(0.0),
+    output_single_stem: Optional[str] = Form(None),
+    invert_using_spec: bool = Form(False),
+    sample_rate: int = Form(44100),
+    use_soundfile: bool = Form(False),
+    use_autocast: bool = Form(False),
+    custom_output_names: Optional[str] = Form(None),
+    # MDX parameters
+    mdx_segment_size: int = Form(256),
+    mdx_overlap: float = Form(0.25),
+    mdx_batch_size: int = Form(1),
+    mdx_hop_length: int = Form(1024),
+    mdx_enable_denoise: bool = Form(False),
+    # VR parameters
+    vr_batch_size: int = Form(1),
+    vr_window_size: int = Form(512),
+    vr_aggression: int = Form(5),
+    vr_enable_tta: bool = Form(False),
+    vr_high_end_process: bool = Form(False),
+    vr_enable_post_process: bool = Form(False),
+    vr_post_process_threshold: float = Form(0.2),
+    # Demucs parameters
+    demucs_segment_size: str = Form("Default"),
+    demucs_shifts: int = Form(2),
+    demucs_overlap: float = Form(0.25),
+    demucs_segments_enabled: bool = Form(True),
+    # MDXC parameters
+    mdxc_segment_size: int = Form(256),
+    mdxc_override_model_segment_size: bool = Form(False),
+    mdxc_overlap: int = Form(8),
+    mdxc_batch_size: int = Form(1),
+    mdxc_pitch_shift: int = Form(0),
+) -> dict:
+    """Upload an audio file and separate it into stems."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No file provided")
+
+    try:
+        # Parse models parameter
+        models_list = None
+        if models:
+            try:
+                models_list = json.loads(models)
+                if not isinstance(models_list, list):
+                    raise ValueError("Models must be a JSON list")
+            except json.JSONDecodeError as e:
+                raise HTTPException(status_code=400, detail=f"Invalid JSON in models parameter: {e}")
+        elif model:
+            models_list = [model]
+
+        # Parse custom_output_names
+        custom_output_names_dict = None
+        if custom_output_names:
+            try:
+                custom_output_names_dict = json.loads(custom_output_names)
+                if not isinstance(custom_output_names_dict, dict):
+                    raise ValueError("Custom output names must be a JSON object")
+            except json.JSONDecodeError as e:
+                raise HTTPException(status_code=400, detail=f"Invalid JSON in custom_output_names parameter: {e}")
+
+        audio_data = await file.read()
+        task_id = str(uuid.uuid4())
+
+        # Set initial status
+        job_status_store[task_id] = {
+            "task_id": task_id,
+            "status": "submitted",
+            "progress": 0,
+            "original_filename": file.filename,
+            "models_used": [f"preset:{preset}"] if preset else (models_list or ["default"]),
+            "total_models": 1 if preset else (len(models_list) if models_list else 1),
+            "current_model_index": 0,
+            "files": {},
+        }
+
+        # Run separation in a background thread to not block the event loop
+        # but keep the request alive (Cloud Run keeps the instance warm)
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(
+            None,
+            lambda: separate_audio_sync(
+                audio_data,
+                file.filename,
+                task_id,
+                models_list,
+                preset,
+                output_format,
+                output_bitrate,
+                normalization_threshold,
+                amplification_threshold,
+                output_single_stem,
+                invert_using_spec,
+                sample_rate,
+                use_soundfile,
+                use_autocast,
+                custom_output_names_dict,
+                mdx_segment_size,
+                mdx_overlap,
+                mdx_batch_size,
+                mdx_hop_length,
+                mdx_enable_denoise,
+                vr_batch_size,
+                vr_window_size,
+                vr_aggression,
+                vr_enable_tta,
+                vr_high_end_process,
+                vr_enable_post_process,
+                vr_post_process_threshold,
+                demucs_segment_size,
+                demucs_shifts,
+                demucs_overlap,
+                demucs_segments_enabled,
+                mdxc_segment_size,
+                mdxc_override_model_segment_size,
+                mdxc_overlap,
+                mdxc_batch_size,
+                mdxc_pitch_shift,
+            ),
+        )
+
+        # Return the final status (completed or error)
+        return job_status_store.get(task_id, {"task_id": task_id, "status": "error", "error": "Job lost"})
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Separation failed: {str(e)}") from e
+
+
+@web_app.get("/status/{task_id}")
+async def get_job_status(task_id: str) -> dict:
+    """Get the status of a separation job."""
+    if task_id in job_status_store:
+        return job_status_store[task_id]
+    return {
+        "task_id": task_id,
+        "status": "not_found",
+        "progress": 0,
+        "error": "Job not found - may have been cleaned up or never existed",
+    }
+
+
+@web_app.get("/download/{task_id}/{file_hash}")
+async def download_file(task_id: str, file_hash: str) -> Response:
+    """Download a separated audio file using its hash identifier."""
+    try:
+        # Look up filename from job status
+        status_data = job_status_store.get(task_id)
+        if not status_data:
+            raise HTTPException(status_code=404, detail="Task not found")
+
+        files_dict = status_data.get("files", {})
+
+        # Handle both dict (hash→filename) and list (legacy) formats
+        actual_filename = None
+        if isinstance(files_dict, dict):
+            actual_filename = files_dict.get(file_hash)
+        elif isinstance(files_dict, list):
+            for fname in files_dict:
+                if generate_file_hash(fname) == file_hash:
+                    actual_filename = fname
+                    break
+
+        if not actual_filename:
+            raise HTTPException(status_code=404, detail=f"File with hash {file_hash} not found")
+
+        file_path = f"{STORAGE_DIR}/outputs/{task_id}/{actual_filename}"
+        if not os.path.exists(file_path):
+            raise HTTPException(status_code=404, detail=f"File not found on disk: {actual_filename}")
+
+        with open(file_path, "rb") as f:
+            file_data = f.read()
+
+        detected_type = filetype.guess(file_data)
+        content_type = detected_type.mime if detected_type and detected_type.mime else "application/octet-stream"
+
+        ascii_filename = "".join(c if ord(c) < 128 else "_" for c in actual_filename)
+        encoded_filename = quote(actual_filename, safe="")
+        content_disposition = f'attachment; filename="{ascii_filename}"; filename*=UTF-8\'\'{encoded_filename}'
+
+        return Response(content=file_data, media_type=content_type, headers={"Content-Disposition": content_disposition})
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}") from e
+
+
+@web_app.get("/models-json")
+async def get_available_models() -> PrettyJSONResponse:
+    """Get list of available separation models."""
+    from audio_separator.separator import Separator
+
+    separator = Separator(info_only=True, model_file_dir=MODEL_DIR)
+    model_list = separator.list_supported_model_files()
+    return PrettyJSONResponse(content=model_list)
+
+
+@web_app.get("/models")
+async def get_simplified_models_list(filter_sort_by: str = None) -> PlainTextResponse:
+    """Get simplified model list in plain text format."""
+    from audio_separator.separator import Separator
+
+    separator = Separator(info_only=True, model_file_dir=MODEL_DIR)
+    models_data = separator.get_simplified_model_list(filter_sort_by=filter_sort_by)
+
+    if not models_data:
+        return PlainTextResponse("No models found")
+
+    filename_width = max(len("Model Filename"), max(len(f) for f in models_data.keys()))
+    arch_width = max(len("Arch"), max(len(info["Type"]) for info in models_data.values()))
+    stems_width = max(len("Output Stems (SDR)"), max(len(", ".join(info["Stems"])) for info in models_data.values()))
+    name_width = max(len("Friendly Name"), max(len(info["Name"]) for info in models_data.values()))
+    total_width = filename_width + arch_width + stems_width + name_width + 15
+
+    output_lines = [
+        "-" * total_width,
+        f"{'Model Filename':<{filename_width}}  {'Arch':<{arch_width}}  {'Output Stems (SDR)':<{stems_width}}  {'Friendly Name'}",
+        "-" * total_width,
+    ]
+    for fname, info in models_data.items():
+        stems = ", ".join(info["Stems"])
+        output_lines.append(f"{fname:<{filename_width}}  {info['Type']:<{arch_width}}  {stems:<{stems_width}}  {info['Name']}")
+
+    return PlainTextResponse("\n".join(output_lines))
+
+
+@web_app.get("/presets")
+async def list_presets() -> PrettyJSONResponse:
+    """List available ensemble presets."""
+    from audio_separator.separator import Separator
+
+    separator = Separator(info_only=True, model_file_dir=MODEL_DIR)
+    presets = separator.list_ensemble_presets()
+    return PrettyJSONResponse(content=presets)
+
+
+@web_app.get("/health")
+async def health_check() -> dict:
+    """Health check endpoint."""
+    return {
+        "status": "healthy",
+        "service": "audio-separator-api",
+        "version": AUDIO_SEPARATOR_VERSION,
+        "models_ready": models_ready,
+        "platform": "cloud-run",
+    }
+
+
+@web_app.get("/")
+async def root() -> dict:
+    """Root endpoint with API information."""
+    return {
+        "message": "Audio Separator API",
+        "version": AUDIO_SEPARATOR_VERSION,
+        "platform": "cloud-run-gpu",
+        "description": "Separate vocals from instrumental tracks using AI",
+        "features": [
+            "Ensemble preset support (instrumental_clean, karaoke, etc.)",
+            "Multiple model processing in single job",
+            "Full separator parameter compatibility",
+            "GPU-accelerated processing (NVIDIA L4)",
+            "All MDX, VR, Demucs, and MDXC architectures supported",
+        ],
+        "endpoints": {
+            "POST /separate": "Upload and separate audio file (supports presets, multiple models, all parameters)",
+            "GET /status/{task_id}": "Get job status and progress",
+            "GET /download/{task_id}/{file_hash}": "Download separated file using hash identifier",
+            "GET /presets": "List available ensemble presets",
+            "GET /models-json": "List available models (JSON)",
+            "GET /models": "List available models (plain text)",
+            "GET /health": "Health check",
+        },
+    }
+
+
+@web_app.on_event("startup")
+async def startup_event():
+    """Download models from GCS on startup."""
+    os.makedirs(MODEL_DIR, exist_ok=True)
+    os.makedirs(f"{STORAGE_DIR}/outputs", exist_ok=True)
+
+    # Download models in background thread to not block startup probe
+    thread = threading.Thread(target=download_models_from_gcs, daemon=True)
+    thread.start()
+
+
+if __name__ == "__main__":
+    uvicorn.run(web_app, host="0.0.0.0", port=PORT)
diff --git a/cloudbuild.yaml b/cloudbuild.yaml
new file mode 100644
index 0000000..7dca246
--- /dev/null
+++ b/cloudbuild.yaml
@@ -0,0 +1,25 @@
+# Cloud Build config for building the audio-separator Docker image with baked models.
+# Run manually: gcloud builds submit --config cloudbuild.yaml --region=us-east4
+# Uses e2-highcpu-32 machine type for fast builds with enough RAM for model loading.
+
+steps:
+  - name: 'gcr.io/cloud-builders/docker'
+    args:
+      - 'build'
+      - '-f'
+      - 'Dockerfile.cloudrun'
+      - '-t'
+      - 'us-east4-docker.pkg.dev/$PROJECT_ID/audio-separator/api:$SHORT_SHA'
+      - '-t'
+      - 'us-east4-docker.pkg.dev/$PROJECT_ID/audio-separator/api:latest'
+      - '.'
+
+images:
+  - 'us-east4-docker.pkg.dev/$PROJECT_ID/audio-separator/api:$SHORT_SHA'
+  - 'us-east4-docker.pkg.dev/$PROJECT_ID/audio-separator/api:latest'
+
+options:
+  machineType: 'E2_HIGHCPU_32'
+  logging: 'CLOUD_LOGGING_ONLY'
+
+timeout: '3600s'
diff --git a/docs/archive/2026-03-22-modal-to-gcp-migration-plan.md b/docs/archive/2026-03-22-modal-to-gcp-migration-plan.md
new file mode 100644
index 0000000..d565d2d
--- /dev/null
+++ b/docs/archive/2026-03-22-modal-to-gcp-migration-plan.md
@@ -0,0 +1,339 @@
+# Plan: Modal → GCP Audio Separation Migration
+
+**Created:** 2026-03-22
+**Branch:** feat/sess-20260321-2314-modal-gcp-migration
+**Worktrees:** `karaoke-gen-modal-gcp-migration` (infra + backend), `python-audio-separator-modal-gcp-migration` (server)
+**Status:** Draft → Ready for implementation
+
+## Overview
+
+Migrate audio stem separation from Modal to a Cloud Run Service with L4 GPU on GCP. This eliminates the only third-party compute dependency, fixes intermittent Modal API failures ("no files were downloaded"), upgrades to latest ensemble models for better quality, and decouples separation from the lyrics review critical path so users can start reviewing lyrics faster.
+
+### Architecture Decision: Cloud Run GPU Service
+
+| Factor | Cloud Run GPU | GCE VM + auto-stop |
+|--------|--------------|-------------------|
+| Idle cost | $0 (scales to zero) | $0 (when stopped) |
+| Cold start | ~30-60s (model load from GCS) | ~60-120s (VM boot + model load) |
+| Ops overhead | None (serverless) | Moderate (start/stop scripts, health monitoring) |
+| GPU available | L4 (24GB VRAM) in us-central1 | T4/L4/A100 |
+| Scaling | Automatic | Manual orchestration |
+| Cost/job (~12 min GPU) | ~$0.13 | ~$0.07-0.10 (T4) |
+| Deployment | Docker image push | Packer image + GCS wheel + SSH restart |
+
+Cloud Run GPU wins on simplicity. L4 is faster than T4, cold start is acceptable, and per-job cost well under $1.
+
+### Model Upgrade: Ensemble Presets as Default
+
+**Current models (single-model):**
+| Stage | Model | SDR | Notes |
+|-------|-------|-----|-------|
+| 1 (instrumental) | `model_bs_roformer_ep_317_sdr_12.9755.ckpt` | 12.97 | Older BS-Roformer |
+| 1 (other stems) | `htdemucs_6s.yaml` | — | Demucs 6-stem — **dropping** |
+| 2 (karaoke/BV) | `mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt` | 10.20 | Single karaoke model |
+
+**New defaults (ensemble presets):**
+| Stage | Preset | Models | SDR | Notes |
+|-------|--------|--------|-----|-------|
+| 1 | `instrumental_clean` | Fv7z + Resurrection | ~17.5 | +35% quality, bleedless |
+| 2 | `karaoke` | 3 karaoke models (aufr33+gabox_v2+becruily) | ~10.6 | +4% quality, 3-model ensemble |
+
+**Key design: preset-name references, not model filenames.** karaoke-gen references preset names (`instrumental_clean`, `karaoke`). The audio-separator package resolves preset → models + ensemble algorithm. When better models come out, update presets in audio-separator and release a new version — no karaoke-gen changes needed.
+
+### Pipeline Decoupling: Separation Off Critical Path
+
+**Current flow (both gate review):**
+```
+Job created
+├── Audio worker (separation) ──→ audio_complete=True ─┐
+│                                                       ├→ GENERATING_SCREENS → AWAITING_REVIEW
+└── Lyrics worker (transcription) → lyrics_complete=True┘
+```
+
+**New flow (lyrics gates review, separation runs in background):**
+```
+Job created
+├── Audio worker (separation) ──→ audio_complete=True (background, not gating)
+│
+└── Lyrics worker (transcription) → lyrics_complete=True → GENERATING_SCREENS → AWAITING_REVIEW
+                                                                                     │
+                                                                              User reviews lyrics
+                                                                                     │
+                                                                              Instrumental review
+                                                                              (waits for audio_complete
+                                                                               if not ready yet)
+```
+
+**Why this works:**
+- Lyrics review (`/app/jobs#/{jobId}/review`) only needs transcription output — no stems needed
+- Instrumental review (`/app/jobs#/{jobId}/instrumental`) needs stems — but user typically spends 5+ min on lyrics review, buying time for separation to finish
+- In the rare case separation isn't done when user reaches instrumental review, show a "Separation in progress..." waiting state
+- Screens worker only truly needs lyrics to generate title/end screens
+
+### Estimated Timeline
+
+| Scenario | Stage 1 | Stage 2 | Cold start | Total |
+|----------|---------|---------|------------|-------|
+| Current (Modal, single models) | 3-5 min | 2-3 min | 0 | 7-11 min |
+| New ensemble (Cloud Run L4) | ~4-6 min | ~3-5 min | ~30-60s | ~8-12 min |
+| **User-perceived wait** (new) | — | — | — | **0 min** (decoupled) |
+
+Separation takes slightly longer with ensembles, but users never wait for it — they're reviewing lyrics while it runs.
+
+## Requirements
+
+- [ ] Audio separation runs on GCP Cloud Run with L4 GPU
+- [ ] Same HTTP API contract as Modal deployment (endpoints, request/response format)
+- [ ] `audio-separator-remote` CLI and `AudioSeparatorAPIClient` work unchanged
+- [ ] Default models use ensemble presets (`instrumental_clean` + `karaoke`)
+- [ ] karaoke-gen references preset names, not model filenames
+- [ ] Demucs 6-stem separation dropped from pipeline
+- [ ] Scale-to-zero when not processing (no idle GPU cost)
+- [ ] Cold start < 60 seconds
+- [ ] Per-job cost < $1
+- [ ] Models stored in GCS, loaded on container startup
+- [ ] Publicly accessible endpoint with auth token (reuse `admin-tokens` secret)
+- [ ] Infrastructure managed via Pulumi in karaoke-gen
+- [ ] Separation decoupled from lyrics review critical path
+- [ ] Instrumental review page handles "separation still in progress" gracefully
+- [ ] Docker image CI lives in python-audio-separator repo, pushes to Artifact Registry
+
+## Implementation Steps
+
+### Phase 1: Cloud Run GPU Server (python-audio-separator repo)
+
+#### Step 1.1 — Create Cloud Run-compatible FastAPI server
+- [ ] Create `audio_separator/remote/deploy_cloudrun.py` adapted from `deploy_modal.py`
+- [ ] Replace Modal-specific code:
+  - `modal.Dict` → in-memory `dict` (single instance handles one job at a time)
+  - `modal.Volume` → local `/tmp` storage + GCS for model cache
+  - `modal.Function.spawn()` → synchronous processing (no background tasks needed)
+  - `modal.Image` → Dockerfile
+  - `modal.App` → standard FastAPI + uvicorn
+- [ ] Keep all existing API endpoints identical:
+  - `POST /separate` — submit separation job
+  - `GET /status/{task_id}` — return job status
+  - `GET /download/{task_id}/{file_hash}` — download result file
+  - `GET /models-json`, `GET /models` — list models
+  - `GET /health` — health check (with model readiness indicator)
+  - `GET /` — root info
+- [ ] Add model download on startup from GCS bucket (`gs://nomadkaraoke-audio-separator-models/`)
+- [ ] Add ensemble preset support: accept `preset` parameter in `/separate` that resolves to model list + algorithm
+- [ ] Add startup probe endpoint for Cloud Run GPU readiness
+
+**Design:** Make `/separate` effectively synchronous — process inline, store results in-memory dict + local filesystem. Cloud Run instance stays alive for scale-down timeout (600s), so Stage 2 hits the same warm instance. Async polling API contract preserved for client compatibility.
+
+#### Step 1.2 — Create Dockerfile
+- [ ] Create `Dockerfile.cloudrun` in repo root
+- [ ] Base: `nvidia/cuda:12.6.3-runtime-ubuntu22.04` (matches Cloud Run L4 driver support)
+- [ ] Install: Python 3.13, FFmpeg, libsndfile, sox, system audio libs
+- [ ] Install: `audio-separator[gpu]` from current repo
+- [ ] Entrypoint: `python -m audio_separator.remote.deploy_cloudrun`
+- [ ] Expose port 8080
+- [ ] Set env: `MODEL_DIR=/models`, `STORAGE_DIR=/tmp/storage`
+
+#### Step 1.3 — Upload models to GCS
+- [ ] Create GCS bucket `nomadkaraoke-audio-separator-models` (us-central1, standard storage)
+- [ ] Upload all models needed by default ensemble presets:
+  - `mel_band_roformer_instrumental_fv7z_gabox.ckpt` (instrumental_clean preset)
+  - `bs_roformer_instrumental_resurrection_unwa.ckpt` (instrumental_clean preset)
+  - `mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt` (karaoke preset)
+  - `mel_band_roformer_karaoke_gabox_v2.ckpt` (karaoke preset)
+  - `mel_band_roformer_karaoke_becruily.ckpt` (karaoke preset)
+- [ ] Total: ~1-1.5 GB of models
+
+#### Step 1.4 — Local testing
+- [ ] Build Docker image locally
+- [ ] Test with `docker run --gpus all` (if local GPU available) or CPU mode
+- [ ] Verify API compatibility: submit job with `preset=instrumental_clean`, poll status, download files
+- [ ] Verify output filename format matches expected pattern: `filename_(StemType)_modelname.ext`
+- [ ] Verify ensemble output: ensembled stems have correct naming
+- [ ] Compare output quality with Modal (A/B test on reference songs)
+
+#### Step 1.5 — CI/CD for Docker image
+- [ ] Create `.github/workflows/deploy-to-cloudrun.yml` in python-audio-separator repo
+- [ ] Triggers: PyPI release, changes to Dockerfile.cloudrun, manual dispatch
+- [ ] Steps: build Docker image → push to Artifact Registry (`us-central1-docker.pkg.dev/nomadkaraoke/audio-separator`)
+- [ ] Use Workload Identity Federation for GCP auth
+
+### Phase 2: GCP Infrastructure (karaoke-gen repo)
+
+#### Step 2.1 — Artifact Registry
+- [ ] Add Artifact Registry Docker repo to Pulumi
+- [ ] Repository: `audio-separator` in `us-central1`
+
+#### Step 2.2 — GCS Model Bucket
+- [ ] Create `nomadkaraoke-audio-separator-models` bucket via Pulumi
+- [ ] Standard storage class, us-central1
+- [ ] Grant read access to Cloud Run service account
+
+#### Step 2.3 — Cloud Run GPU Service
+- [ ] Create `infrastructure/modules/audio_separator_service.py`
+- [ ] Cloud Run Service configuration:
+  - Image: from Artifact Registry
+  - GPU: 1x NVIDIA L4
+  - CPU: 4 vCPU (minimum required for L4)
+  - Memory: 16 GiB
+  - Min instances: 0 (scale to zero)
+  - Max instances: 2 (handle concurrent jobs)
+  - Request timeout: 1800s (30 min)
+  - Scale-down delay: 600s (keep warm between Stage 1 → Stage 2)
+  - Startup probe: HTTP GET /health, 120s initial delay, 10s period
+  - Env vars:
+    - `MODEL_BUCKET=nomadkaraoke-audio-separator-models`
+    - `MODEL_DIR=/models`
+    - `ADMIN_TOKEN` (from Secret Manager, reuse existing `admin-tokens`)
+  - Region: us-central1
+  - Ingress: all traffic (public endpoint with auth)
+
+#### Step 2.4 — Service Account & IAM
+- [ ] Create `audio-separator` service account
+- [ ] Grant: `storage.objectViewer` on model bucket
+- [ ] Grant: `secretmanager.secretAccessor` for admin-tokens
+- [ ] Grant: `logging.logWriter`, `monitoring.metricWriter`
+
+#### Step 2.5 — Wire into Pulumi
+- [ ] Add to `infrastructure/__main__.py`
+- [ ] Add config constants to `infrastructure/config.py`
+
+### Phase 3: Pipeline Decoupling + Model Upgrade (karaoke-gen repo)
+
+#### Step 3.1 — Decouple separation from lyrics review path
+- [ ] In `backend/services/job_manager.py`:
+  - Change `check_parallel_processing_complete()` to only check `lyrics_complete` (not `audio_complete`)
+  - `mark_lyrics_complete()` triggers screens worker on its own (no need to wait for audio)
+  - `mark_audio_complete()` no longer triggers screens — just sets the flag
+- [ ] In `backend/workers/screens_worker.py`:
+  - Remove validation that `audio_complete` must be True
+  - Screens only needs lyrics data to generate title/end screens
+  - Skip instrumental analysis step if audio isn't complete yet (or make it a no-op)
+- [ ] Verify: lyrics review page works without stems present
+
+#### Step 3.2 — Add "waiting for separation" state to instrumental review
+- [ ] In frontend instrumental review page (`/app/jobs#/{jobId}/instrumental`):
+  - Check `state_data.audio_complete` on page load
+  - If false, show "Audio separation in progress..." with a spinner/progress indicator
+  - Poll job status every 5-10 seconds until `audio_complete=True`
+  - Once complete, load and display instrumental options as normal
+- [ ] Backend: ensure instrumental review API endpoint returns separation status
+
+#### Step 3.3 — Switch to preset-based model configuration
+- [ ] In `backend/workers/audio_worker.py`:
+  - Replace `DEFAULT_CLEAN_MODEL` with `DEFAULT_INSTRUMENTAL_PRESET = "instrumental_clean"`
+  - Replace `DEFAULT_BACKING_MODELS` with `DEFAULT_KARAOKE_PRESET = "karaoke"`
+  - Remove `DEFAULT_OTHER_MODELS` (Demucs dropped)
+  - Pass `preset=` parameter to API client instead of `models=`
+- [ ] In `karaoke_gen/audio_processor.py`:
+  - Update `_process_audio_separation_remote()` to pass presets
+  - Stage 1: `api_client.separate_audio_and_wait(audio_file, preset="instrumental_clean", ...)`
+  - Stage 2: `api_client.separate_audio_and_wait(vocals_file, preset="karaoke", ...)`
+  - Remove `other_stems_models` parameter (or default to empty)
+  - Update result organization for ensemble outputs (stem names may include ensemble info)
+- [ ] In `audio_separator/remote/api_client.py` (python-audio-separator repo):
+  - Add `preset` parameter to `separate_audio()` and `separate_audio_and_wait()`
+  - Client passes `preset` field in multipart form data to API
+  - API server resolves preset → models + algorithm
+
+#### Step 3.4 — Update tests
+- [ ] Update `tests/unit/test_audio_remote.py`:
+  - Test preset-based separation calls
+  - Remove Demucs 6-stem references
+  - Test new default model/preset names
+- [ ] Add test for pipeline decoupling:
+  - Verify `mark_lyrics_complete()` triggers screens without `audio_complete`
+  - Verify `mark_audio_complete()` sets flag but doesn't trigger screens
+- [ ] Add frontend test for instrumental review waiting state
+
+### Phase 4: Cutover & Cleanup
+
+#### Step 4.1 — Deploy and test
+- [ ] Deploy Cloud Run GPU service via `pulumi up`
+- [ ] Run separation on 3-5 test songs with ensemble presets
+- [ ] Compare output quality to Modal (listen test)
+- [ ] Verify timing: ensemble separation completes within ~8-12 min
+- [ ] Test cold start scenario (wait for scale-down, then submit)
+- [ ] Test back-to-back jobs (Stage 1 → Stage 2 hits warm instance)
+- [ ] Test pipeline decoupling: verify lyrics review available before separation completes
+- [ ] Test instrumental review waiting state
+
+#### Step 4.2 — Update Cloud Run audio worker config
+- [ ] Change `AUDIO_SEPARATOR_API_URL` in `infrastructure/modules/cloud_run.py` from Modal URL to Cloud Run URL
+- [ ] Deploy via `pulumi up`
+- [ ] Run 5-10 production jobs, monitor for errors
+
+#### Step 4.3 — Monitor (1 week)
+- [ ] Watch Cloud Run logs for errors
+- [ ] Monitor separation timing in job state_data
+- [ ] Check Cloud Run billing (verify per-job cost < $1)
+- [ ] Verify scale-to-zero works (no idle GPU charges)
+- [ ] Watch for users hitting the "waiting for separation" state — measure frequency
+
+#### Step 4.4 — Decommission Modal
+- [ ] Remove Modal deployment workflow from python-audio-separator repo
+- [ ] Delete Modal app
+- [ ] Close Modal account
+- [ ] Remove `modal` from python-audio-separator dependencies
+- [ ] Update `AUDIO_SEPARATOR_API_URL` env var in local `.envrc` files
+
+## Files to Create/Modify
+
+### python-audio-separator repo (`python-audio-separator-modal-gcp-migration` worktree)
+| File | Action | Description |
+|------|--------|-------------|
+| `audio_separator/remote/deploy_cloudrun.py` | Create | Cloud Run-compatible FastAPI server (adapted from deploy_modal.py) |
+| `audio_separator/remote/api_client.py` | Modify | Add `preset` parameter to separate methods |
+| `Dockerfile.cloudrun` | Create | Docker image for Cloud Run GPU deployment |
+| `.github/workflows/deploy-to-cloudrun.yml` | Create | CI/CD: build image → push to Artifact Registry |
+
+### karaoke-gen repo (`karaoke-gen-modal-gcp-migration` worktree)
+| File | Action | Description |
+|------|--------|-------------|
+| `infrastructure/modules/audio_separator_service.py` | Create | Pulumi: Cloud Run GPU service + model bucket + IAM |
+| `infrastructure/__main__.py` | Modify | Wire up audio separator service |
+| `infrastructure/config.py` | Modify | Add audio separator constants |
+| `infrastructure/modules/cloud_run.py` | Modify | Update `AUDIO_SEPARATOR_API_URL` to Cloud Run URL |
+| `backend/services/job_manager.py` | Modify | Decouple: lyrics_complete alone triggers screens |
+| `backend/workers/screens_worker.py` | Modify | Remove audio_complete prerequisite |
+| `backend/workers/audio_worker.py` | Modify | Switch to preset-based config, drop Demucs |
+| `karaoke_gen/audio_processor.py` | Modify | Pass presets instead of model filenames |
+| `frontend/` (instrumental review) | Modify | Add "waiting for separation" state |
+| `tests/unit/test_audio_remote.py` | Modify | Update for presets, remove Demucs tests |
+| `.github/workflows/deploy-audio-separator.yml` | Create | CI: deploy Cloud Run revision on image push |
+
+## Testing Strategy
+
+- **Unit tests:** Preset resolution, pipeline decoupling (lyrics triggers screens alone), model name updates
+- **Integration test:** Deploy Cloud Run service, run full separation with ensemble presets, verify output files
+- **A/B comparison:** Same songs through Modal (single model) and Cloud Run (ensemble) — quality should be better
+- **Pipeline test:** Submit job, verify lyrics review available before separation completes
+- **Frontend test:** Playwright E2E for instrumental review waiting state
+- **Cold start test:** Wait for scale-down, submit job, measure total time
+- **Production E2E:** After cutover, run 10 production jobs through full pipeline
+
+## Cost Estimate
+
+| Scenario | Monthly cost |
+|----------|-------------|
+| 10 jobs/day × 12 min GPU = 2 hrs/day | ~$40/mo |
+| 30 jobs/day × 12 min GPU = 6 hrs/day | ~$120/mo |
+| Per-job cost (12 min L4 @ $0.67/hr) | ~$0.13 |
+
+Well under $1/job budget. No idle cost due to scale-to-zero.
+
+## Resolved Questions
+
+- [x] Cloud Run vs GCE VM → **Cloud Run GPU Service** (simplest, scale-to-zero)
+- [x] Which GPU → **L4** (only option on Cloud Run, 24GB VRAM)
+- [x] Model upgrade → **Ensemble presets as default** (quality > speed)
+- [x] Demucs 6-stem → **Drop it**
+- [x] Auth → **Reuse existing `admin-tokens` secret**
+- [x] Docker CI → **python-audio-separator repo builds + pushes image**
+- [x] Ensemble presets UI → **Backend-only; presets defined in audio-separator package**
+- [x] Speed vs quality → **Quality wins; decouple separation from critical path so user never waits**
+
+## Rollback Plan
+
+1. **Quick rollback:** Change `AUDIO_SEPARATOR_API_URL` back to Modal URL in Pulumi config, `pulumi up`. Takes ~2 minutes.
+2. **Pipeline rollback:** Revert job_manager changes to re-gate screens on `audio_complete`. One commit.
+3. **Keep Modal running** during the monitoring period (Phase 4.3). Don't decommission until confident.
+4. **Model rollback:** Preset config can be changed back to direct model filenames in one commit.
diff --git a/scripts/download_preset_models.py b/scripts/download_preset_models.py
new file mode 100644
index 0000000..d4603b6
--- /dev/null
+++ b/scripts/download_preset_models.py
@@ -0,0 +1,19 @@
+"""Download ensemble preset models for baking into Docker image."""
+import json
+import importlib.resources as resources
+from audio_separator.separator import Separator
+
+with resources.open_text("audio_separator", "ensemble_presets.json") as f:
+    presets = json.load(f)["presets"]
+
+models_to_download = set()
+for preset_name in ["instrumental_clean", "karaoke"]:
+    models_to_download.update(presets[preset_name]["models"])
+
+print(f"Downloading {len(models_to_download)} models for ensemble presets...")
+for model in sorted(models_to_download):
+    print(f"  Downloading: {model}")
+    sep = Separator(model_file_dir="/models")
+    sep.load_model(model)
+    print(f"  Done: {model}")
+print("All models downloaded successfully.")