Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions .github/workflows/deploy-to-cloudrun.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: Deploy to Cloud Run

on:
# Deploy when a new PyPI release is published
workflow_run:
workflows: ["Publish to PyPI"]
types: [completed]

# Deploy on changes to Dockerfile or Cloud Run server
push:
branches: [main]
paths:
- "Dockerfile.cloudrun"
- "audio_separator/remote/deploy_cloudrun.py"
- "audio_separator/ensemble_presets.json"
- "cloudbuild.yaml"

# Manual deployment
workflow_dispatch:

jobs:
deploy:
runs-on: ubuntu-latest
# Only run on successful PyPI publish (or push/manual triggers)
if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}

permissions:
contents: read
id-token: write # Required for Workload Identity Federation

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}

- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v2

# Use Cloud Build for the Docker build — it has native x86 with enough
# RAM to load ML models during the build (baking models into the image).
- name: Build and push via Cloud Build
run: |
gcloud builds submit \
--config cloudbuild.yaml \
--region=us-east4 \
--project=nomadkaraoke \
--substitutions=SHORT_SHA=${GITHUB_SHA::8}

- name: Deploy to Cloud Run
run: |
gcloud run services update audio-separator \
--image="us-east4-docker.pkg.dev/nomadkaraoke/audio-separator/api:${GITHUB_SHA::8}" \
--region=us-east4 \
--project=nomadkaraoke \
--quiet
94 changes: 94 additions & 0 deletions Dockerfile.cloudrun
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# Audio Separator API - Cloud Run GPU Deployment
# Optimized for NVIDIA L4 GPU on Google Cloud Run
#
# Models are baked into the image for zero cold-start latency.
# To update models, rebuild the image.
#
# Build: docker build -f Dockerfile.cloudrun -t audio-separator-cloudrun .
# Run: docker run --gpus all -p 8080:8080 audio-separator-cloudrun

FROM nvidia/cuda:12.6.3-runtime-ubuntu22.04

# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive

# Install Python 3.12 from deadsnakes PPA (onnxruntime-gpu requires >= 3.11)
# and system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
# Python 3.12
python3.12 \
python3.12-dev \
python3.12-venv \
# FFmpeg
ffmpeg \
# Audio libraries
libsndfile1 \
libsndfile1-dev \
libsox-dev \
sox \
libportaudio2 \
portaudio19-dev \
libasound2-dev \
libpulse-dev \
libjack-dev \
libsamplerate0 \
libsamplerate0-dev \
# Build tools (for compiling Python packages with C extensions)
build-essential \
gcc \
g++ \
pkg-config \
# Utilities
curl \
&& rm -rf /var/lib/apt/lists/* \
&& python3.12 --version && ffmpeg -version

# Set Python 3.12 as default and install pip
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \
&& curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12 \
&& python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel

# Install audio-separator with GPU support and API dependencies
COPY . /tmp/audio-separator-src
RUN cd /tmp/audio-separator-src \
&& pip install --no-cache-dir ".[gpu]" \
&& pip install --no-cache-dir \
"fastapi>=0.104.0" \
"uvicorn[standard]>=0.24.0" \
"python-multipart>=0.0.6" \
"filetype>=1.2.0" \
&& rm -rf /tmp/audio-separator-src

# Set up CUDA library paths
RUN echo '/usr/local/cuda/lib64' >> /etc/ld.so.conf.d/cuda.conf && ldconfig

# Environment configuration
ENV MODEL_DIR=/models \
STORAGE_DIR=/tmp/storage \
PORT=8080 \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
PATH=/usr/local/cuda/bin:$PATH \
PYTHONUNBUFFERED=1

# Create directories
RUN mkdir -p /models /tmp/storage/outputs

# Bake ensemble preset models into the image.
# These are the models used by the default presets (instrumental_clean + karaoke).
# Total: ~1-1.5 GB. This eliminates cold-start model download time.
COPY scripts/download_preset_models.py /tmp/download_preset_models.py
RUN python3 /tmp/download_preset_models.py && rm /tmp/download_preset_models.py && ls -lh /models/

# Expose Cloud Run default port
EXPOSE 8080

# Health check for container orchestration
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1

# Run the API server
CMD ["python3", "-m", "audio_separator.remote.deploy_cloudrun"]
16 changes: 12 additions & 4 deletions audio_separator/remote/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def separate_audio(
file_path: str,
model: Optional[str] = None,
models: Optional[List[str]] = None,
preset: Optional[str] = None,
# Output parameters
output_format: str = "flac",
output_bitrate: Optional[str] = None,
Expand Down Expand Up @@ -76,8 +77,10 @@ def separate_audio(
files = {"file": (os.path.basename(file_path), open(file_path, "rb"))}
data = {}

# Handle model parameters (backwards compatibility)
if models:
# Handle model/preset parameters
if preset:
data["preset"] = preset
elif models:
data["models"] = json.dumps(models)
elif model:
data["model"] = model
Expand Down Expand Up @@ -144,6 +147,7 @@ def separate_audio_and_wait(
file_path: str,
model: Optional[str] = None,
models: Optional[List[str]] = None,
preset: Optional[str] = None,
timeout: int = 600,
poll_interval: int = 10,
download: bool = True,
Expand Down Expand Up @@ -208,13 +212,17 @@ def separate_audio_and_wait(
import time

# Submit the separation job with all parameters
models_desc = models or ([model] if model else ["default"])
self.logger.info(f"Submitting separation job for '{file_path}' with models: {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")
if preset:
models_desc = f"preset:{preset}"
else:
models_desc = models or ([model] if model else ["default"])
self.logger.info(f"Submitting separation job for '{file_path}' with {models_desc} (audio-separator v{AUDIO_SEPARATOR_VERSION})")

result = self.separate_audio(
file_path,
model,
models,
preset,
output_format,
output_bitrate,
normalization_threshold,
Expand Down
4 changes: 3 additions & 1 deletion audio_separator/remote/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ def main():
separate_parser = subparsers.add_parser("separate", help="Separate audio files")
separate_parser.add_argument("audio_files", nargs="+", help="Audio file paths to separate")

# Model selection
# Model selection (mutually exclusive: preset, single model, or multiple models)
model_group = separate_parser.add_mutually_exclusive_group()
model_group.add_argument("-p", "--preset", help="Ensemble preset name (e.g. instrumental_clean, karaoke, vocal_balanced)")
model_group.add_argument("-m", "--model", help="Single model to use for separation")
model_group.add_argument("--models", nargs="+", help="Multiple models to use for separation")

Expand Down Expand Up @@ -168,6 +169,7 @@ def handle_separate_command(args, api_client: AudioSeparatorAPIClient, logger: l
kwargs = {
"model": args.model,
"models": args.models,
"preset": args.preset,
"timeout": args.timeout,
"poll_interval": args.poll_interval,
"download": True, # Always download in CLI
Expand Down
Loading
Loading