growlf · growlf · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
@@ -23,14 +23,22 @@
 STACK_USER=yourusername
 OLLAMA_DATA=/home/${STACK_USER}/.ollama
 
-# ─── Intel GPU device nodes ───────────────────────────────────────────────────
-GPU_CARD=/dev/dri/card1
-GPU_RENDER=/dev/dri/renderD128
-
 # ── Ollama ────────────────────────────────────────────────────────────────────
 OLLAMA_PORT=11434
 OLLAMA_SSH_KEY=/home/${STACK_USER}/.ssh/id_ed25519_ollama
 OLLAMA_SSH_KEY_PUB=/home/${STACK_USER}/.ssh/id_ed25519_ollama.pub
+# Memory limits — adjust for your hardware.
+# Intel Arc users: 16gb / 32g is a good starting point for a 32GB system.
+# NVIDIA GPU users: match shm to your VRAM size.
+# CPU-only users: 2gb / 8g (defaults set in docker-compose.yml).
+OLLAMA_SHM_SIZE=2gb
+OLLAMA_MEM_LIMIT=8g
+
+# ── Intel Arc GPU device nodes (only needed with docker-compose.arc.yml) ──────
+# Run scripts/check-arc-gpu.sh to find the correct device paths for your system.
+# These values are ignored when using the CPU or NVIDIA compose overlay.
+GPU_CARD=/dev/dri/card1
+GPU_RENDER=/dev/dri/renderD128
 
 # ── Cloud API Keys ────────────────────────────────────────────────────────────
 ANTHROPIC_API_KEY=sk-ant-...
@@ -39,7 +47,8 @@ OPENAI_API_KEY=  # optional — leave blank if not using OpenAI
 
 # ── LiteLLM ───────────────────────────────────────────────────────────────────
 LITELLM_PORT=4000
-LITELLM_MASTER_KEY=sk-local-admin-changeme
+# Generate a secure key with: scripts/generate-keys.sh
+LITELLM_MASTER_KEY=sk-change-this-before-deployment
 
 # ── Olla — unified LLM router ─────────────────────────────────────────────────
 OLLA_PORT=40114

@@ -23,6 +23,9 @@ jobs:
           docker compose version
 
       - name: Validate docker-compose.yml syntax
+        env:
+          LITELLM_MASTER_KEY: ci-validation-key
+          STACK_USER: ci-user
         run: docker compose config --quiet
 
       - name: Lint shell scripts

@@ -0,0 +1,34 @@
+# docker-compose.arc.yml — Intel Arc iGPU overlay
+#
+# Use with:
+#   docker compose -f docker-compose.yml -f docker-compose.arc.yml up -d
+#
+# Requires:
+#   - Intel Arc GPU (checked by scripts/check-arc-gpu.sh)
+#   - GPU_CARD and GPU_RENDER set in .env (see .env.example)
+#   - OLLAMA_SHM_SIZE and OLLAMA_MEM_LIMIT sized for your GPU (default 16gb/32g)
+#
+# See docs/hardware/arc.md for setup guide and model recommendations.
+#
+# Source image: https://github.com/Ava-AgentOne/ollama-intel
+# Replaces intelanalytics/ipex-llm-inference-cpp-xpu (archived Jan 28, 2026)
+
+services:
+  ollama:
+    image: ghcr.io/ava-agentone/ollama-intel:latest
+    container_name: ollama
+    environment:
+      - DEVICE=Arc
+      - OLLAMA_INTEL_GPU=true
+      - ONEAPI_DEVICE_SELECTOR=level_zero:0
+      - ZES_ENABLE_SYSMAN=1
+      - OLLAMA_DEBUG=1
+    devices:
+      # GPU card node — may be card0 or card1 depending on boot order.
+      # Run scripts/check-arc-gpu.sh to find the correct device paths.
+      - ${GPU_CARD:-/dev/dri/card1}:${GPU_CARD:-/dev/dri/card1}
+      - ${GPU_RENDER:-/dev/dri/renderD128}:${GPU_RENDER:-/dev/dri/renderD128}
+    shm_size: '${OLLAMA_SHM_SIZE:-16gb}'
+    mem_limit: '${OLLAMA_MEM_LIMIT:-32g}'
+    healthcheck:
+      start_period: 60s   # Arc iGPU needs extra time for driver init
@@ -0,0 +1,30 @@
+# docker-compose.nvidia.yml — NVIDIA GPU overlay
+#
+# Use with:
+#   docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
+#
+# Requires:
+#   - NVIDIA GPU with CUDA support
+#   - NVIDIA Container Toolkit installed:
+#     https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
+#   - nvidia-smi working on the host
+#   - OLLAMA_SHM_SIZE and OLLAMA_MEM_LIMIT sized for your GPU (default 8gb/16g)
+#
+# See docs/hardware/nvidia.md for setup guide and model recommendations.
+
+services:
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    shm_size: '${OLLAMA_SHM_SIZE:-8gb}'
+    mem_limit: '${OLLAMA_MEM_LIMIT:-16g}'
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
@@ -1,20 +1,27 @@
 ###############################################################################
-# AI Stack — with Intel Arc iGPU + cloud models + LiteLLM, unified via Olla router
+# AI Stack — local LLMs + cloud models + LiteLLM, unified via Olla router
 #
 # Topology:
 #
-#   Retriever  ──[embed]──▶  Olla :40114  ──▶  ollama-arc :11434  (Intel Arc iGPU)
+#   Retriever  ──[embed]──▶  Olla :40114  ──▶  ollama :11434  (local LLM)
 #   OpenCode   ──[tool / provider]──▶  Olla :40114
 #               ──[provider]───────▶  LiteLLM :4000  ──▶  Claude (Anthropic)
 #                                                       ──▶  Gemini (Google)
 #
 #   All inference through Olla for unified routing + load balancing.
 #   OpenCode is the primary AI interface (CLI + Obsidian sidebar plugin).
 #
+# Hardware profiles (pick one):
+#   CPU / generic:     docker compose up -d                         (this file)
+#   Intel Arc iGPU:    docker compose -f docker-compose.yml -f docker-compose.arc.yml up -d
+#   NVIDIA GPU:        docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
+#
+#   See docs/hardware/ for requirements, tuning, and troubleshooting.
+#
 # Quick start:
 #   1. cp .env.example .env && nano .env
-#   2. ./check-arc-gpu.sh          # validate GPU_CARD / GPU_RENDER device paths
-#   3. docker compose up -d
+#   2. scripts/generate-olla-config.sh   # generate proxy/olla.yaml
+#   3. docker compose up -d              # add -f overlay for your GPU
 #
 # Service URLs:
 #   Olla status   http://localhost:40114/internal/status/endpoints
@@ -24,36 +31,33 @@
 
 services:
 
-  # ─── Ollama (Intel Arc iGPU) ───────────────────────────────────────────────
-  # Uses ava-agentone/ollama-intel: community-maintained, iGPU-first
-  # Replaces intelanalytics/ipex-llm-inference-cpp-xpu (archived Jan 28, 2026)
-  # Source: https://github.com/Ava-AgentOne/ollama-intel
-  ollama-arc:
-    image: ghcr.io/ava-agentone/ollama-intel:latest
-    container_name: ollama-arc
+  # ─── Ollama — local LLM inference ─────────────────────────────────────────
+  # Default: standard Ollama image (CPU / CUDA).
+  # For GPU acceleration, add a hardware overlay:
+  #   Intel Arc:  -f docker-compose.arc.yml
+  #   NVIDIA:     -f docker-compose.nvidia.yml
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
     restart: unless-stopped
     environment:
       - OLLAMA_HOST=0.0.0.0
       - OLLAMA_ORIGINS=app://obsidian.md*
-      - OLLAMA_DEBUG=1
-      - DEVICE=Arc
-      - OLLAMA_INTEL_GPU=true
-      - ONEAPI_DEVICE_SELECTOR=level_zero:0
-      - ZES_ENABLE_SYSMAN=1
       - OLLAMA_KEEP_ALIVE=-1
     volumes:
       - ${OLLAMA_DATA:-/home/user/.ollama}:/root/.ollama
       - ${OLLAMA_SSH_KEY:-/dev/null}:/root/.ollama/id_ed25519:ro
       - ${OLLAMA_SSH_KEY_PUB:-/dev/null}:/root/.ollama/id_ed25519.pub:ro
-    devices:
-      # GPU card node — may be card0 or card1 depending on boot
-      # check-arc-gpu.sh validates this before stack starts
-      - ${GPU_CARD:-/dev/dri/card1}:${GPU_CARD:-/dev/dri/card1}
-      - ${GPU_RENDER:-/dev/dri/renderD128}:${GPU_RENDER:-/dev/dri/renderD128}
     ports:
       - "${OLLAMA_PORT:-11434}:11434"
-    shm_size: '16gb'
-    mem_limit: 32g
+    shm_size: '${OLLAMA_SHM_SIZE:-2gb}'
+    mem_limit: '${OLLAMA_MEM_LIMIT:-8g}'
+    healthcheck:
+      test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:11434/"]
+      interval: 15s
+      timeout: 10s
+      retries: 5
+      start_period: 30s
     networks:
       - ai-net
 
@@ -73,7 +77,7 @@ services:
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
       - GEMINI_API_KEY=${GEMINI_API_KEY}
       - OPENAI_API_KEY=${OPENAI_API_KEY:-not-set}
-      - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-sk-local-admin}
+      - LITELLM_MASTER_KEY=${LITELLM_MASTER_KEY:-sk-changeme-set-in-env}
     command: ["--config", "/app/config.yaml", "--port", "4000"]
     healthcheck:
       test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveness?key=${LITELLM_MASTER_KEY:-sk-local-admin}')"]
@@ -85,7 +89,7 @@ services:
       - ai-net
 
   # ─── Olla — LLM proxy & load balancer ─────────────────────────────────────
-  # Single unified endpoint for all models — local Arc GPU + cloud via LiteLLM.
+  # Single unified endpoint for all models — local Ollama + cloud via LiteLLM.
   # LAN node config is NOT baked in here — declare extra nodes as OLLAMA_REMOTE_*
   # variables in your .env, then run scripts/generate-olla-config.sh to produce
   # proxy/olla.yaml before bringing up the stack.
@@ -100,8 +104,10 @@ services:
       - ./proxy/olla.yaml:/app/config.yaml:ro   # generated by scripts/generate-olla-config.sh — in .gitignore
       - olla-logs:/app/logs
     depends_on:
-      - ollama-arc
-      - litellm
+      ollama:
+        condition: service_healthy
+      litellm:
+        condition: service_healthy
     healthcheck:
       test: ["CMD", "wget", "--quiet", "--tries=1", "--spider", "http://localhost:40114/internal/health"]
       interval: 30s
@@ -116,7 +122,7 @@ services:
   # Uses sqlite-vec for vector storage, FTS5 for keyword search,
   # and watchdog for live vault indexing.
   # Hybrid search: BM25 (FTS5) + vector similarity, fused via RRF.
-  # Embeddings via Olla → ollama-arc (nomic-embed-text).
+  # Embeddings via Olla → ollama (nomic-embed-text).
   # API-only: no web UI. Intended for use as an OpenCode tool.
   retriever:
     build: ./retriever
@@ -135,7 +141,8 @@ services:
       - DB_PATH=/data/retriever.db
       - VAULT_PATH=/vault
     depends_on:
-      - olla
+      olla:
+        condition: service_healthy
     networks:
       - ai-net
 
@@ -154,7 +161,8 @@ services:
       - LISTEN_HOST=0.0.0.0
       - LISTEN_PORT=40115
     depends_on:
-      - olla
+      olla:
+        condition: service_healthy
     networks:
       - ai-net
 
@@ -165,4 +173,4 @@ volumes:
 
 # ─── Networks ─────────────────────────────────────────────────────────────────
 networks:
-  ai-net:
+  ai-net:
@@ -16,7 +16,7 @@
 #   OLLAMA_REMOTE_NAS=http://192.168.1.51:11434
 #
 # Fixed nodes (always present, not configurable):
-#   priority 100 — ollama-arc   (local Intel Arc iGPU)
+#   priority 100 — ollama   (local Ollama node — Arc, NVIDIA, or CPU depending on overlay)
 #   priority  50 — litellm      (cloud gateway: Claude, Gemini)
 #
 # Advanced tuning (optional .env vars):
@@ -82,7 +82,7 @@ fi
 
 echo "→ Generating olla.yaml..."
 if [[ ${#REMOTE_URLS[@]} -eq 0 ]]; then
-  echo "   (no OLLAMA_REMOTE_* entries found — only local Arc node + LiteLLM)"
+  echo "   (no OLLAMA_REMOTE_* entries found — only local Ollama node + LiteLLM)"
 fi
 
 # ── Write olla.yaml ────────────────────────────────────────────────────────────
@@ -108,9 +108,9 @@ discovery:
   static:
     endpoints:
 
-      # ── Local Intel Arc iGPU node (priority 100) ─────────────────
-      - url: "http://ollama-arc:11434"
-        name: "ollama-arc"
+      # ── Local Ollama node (priority 100) ─────────────────
+      - url: "http://ollama:11434"
+        name: "ollama"
         type: "ollama"
         priority: 100
         check_interval: 15s