diff --git a/.env.example b/.env.example index 873c995..10462e7 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,23 @@ # ─── ai-stack .env.example ──────────────────────────────────────────────────── # Copy this file to .env and fill in your values before running install.sh # cp .env.example .env +# +# ── VaultWarden Integration ────────────────────────────────────────────────────── +# This project supports placeholders in .env values. +# The start.sh script automatically resolves them using the `bw` CLI before +# starting the stack. +# +# Placeholder format: +# Examples: +# +# +# +# Requirements: +# - Bitwarden CLI installed: https://bitwarden.com/download/ +# - Vault unlocked, or BW_CLIENT_ID + BW_CLIENT_SECRET + VAULT_MASTER_PASSWORD set +# - Organization ID: f8a8b00f-496a-44d3-b9d6-5ed28ecd95a3 +# +# To manually resolve: ./scripts/resolve-vaultwarden.sh --in-place # ─── Host paths ─────────────────────────────────────────────────────────────── # User who will run the stack (must be in docker group) @@ -25,9 +42,9 @@ OLLAMA_SSH_KEY=/home/${STACK_USER}/.ssh/id_ed25519_ollama OLLAMA_SSH_KEY_PUB=/home/${STACK_USER}/.ssh/id_ed25519_ollama.pub # ── Cloud API Keys ──────────────────────────────────────────────────────────── -ANTHROPIC_API_KEY=sk-ant-... -GEMINI_API_KEY=AIza... -OPENAI_API_KEY= # optional — leave blank if not using OpenAI +ANTHROPIC_API_KEY= +GEMINI_API_KEY= +OPENAI_API_KEY= # optional — leave blank if not using OpenAI # ── LiteLLM ─────────────────────────────────────────────────────────────────── LITELLM_PORT=4000 @@ -66,16 +83,16 @@ OLLA_PORT=40114 # ── Open WebUI ──────────────────────────────────────────────────────────────── WEBUI_PORT=3000 WEBUI_NAME=AssistantOS -WEBUI_SECRET_KEY=change-me-to-something-random +WEBUI_SECRET_KEY= # Generate with: python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" # ── Open Terminal ───────────────────────────────────────────────────────────── TERMINAL_PORT=8000 # Change these before deploying — do not use defaults in production -OPEN_TERMINAL_API_KEY=changeme-terminal-key +OPEN_TERMINAL_API_KEY= # ── Pipelines ───────────────────────────────────────────────────────────────── PIPELINES_PORT=9099 -PIPELINES_API_KEY=changeme-pipelines-key +PIPELINES_API_KEY= # ─── Remote Ollama instances ────────────────────────────────────────────────── # Format: OLLAMA_REMOTE_=http://:11434 @@ -99,10 +116,10 @@ PIPELINES_API_KEY=changeme-pipelines-key # See docs/khoj-setup.md for Obsidian plugin configuration. KHOJ_PORT=42110 KHOJ_ADMIN_EMAIL=admin@localhost -KHOJ_ADMIN_PASSWORD=changeme +KHOJ_ADMIN_PASSWORD= # Generate a random secret key: python3 -c "import secrets; print(secrets.token_hex(32))" -KHOJ_DJANGO_SECRET_KEY=changeme-generate-a-random-string -KHOJ_DB_PASSWORD=changeme-db-password +KHOJ_DJANGO_SECRET_KEY= # Generate: python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" +KHOJ_DB_PASSWORD= # Tip: generate a strong password with: # python3 -c "import secrets; print(secrets.token_hex(24))" # Set to true to allow unauthenticated (anonymous) local access to Khoj. @@ -115,7 +132,7 @@ OBSIDIAN_VAULT_PATH=/home/yourusername/obsidian-vault COUCHDB_URL=https://sync.yourdomain.com COUCHDB_DB=your-journal COUCHDB_USER=your-couchdb-username -COUCHDB_PASSWORD=your-couchdb-password +COUCHDB_PASSWORD= # Set to true after first successful sync to skip full re-index on restart KHOJ_SYNC_SKIP_INITIAL=false KHOJ_SYNC_LOG_LEVEL=INFO diff --git a/.gitignore b/.gitignore index 2c8ca6c..98df6e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ # ── secrets & local config ──────────────────────────────────────────────────── .env +.env.backup* +.env.*.backup* tmp/ # ── generated files (built from .env by start.sh / generate-olla-config.sh) ── diff --git a/README.md b/README.md index ec06609..799f8e0 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,10 @@ ai-stack/ | Model | Use case | |-------|----------| -| `qwen2.5:14b` | Tool calling, diagnostics, sysadmin (default) | +| `gemma4:27b` | Heavy lifting, large context, complex analysis | +| `mistral-small3.2:24b` | Strong function calling, 128K context | +| `qwen3.5:14b` | Improved reasoning, tool calling (recommended default) | +| `qwen2.5:14b` | Tool calling, diagnostics, sysadmin | | `qwen2.5-coder:14b` | Scripts, configs, code | | `deepseek-r1:14b` | Complex reasoning, root cause analysis | | `gemma3:12b` | Log analysis, summaries, documentation | diff --git a/SECURITY.md b/SECURITY.md index 3d8ebc0..4b9d2ac 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -26,6 +26,16 @@ You can expect an acknowledgement within **72 hours** and a resolution timeline ## Security considerations for this project - **Never commit real credentials.** Use `.env` (which is git-ignored) for secrets; `.env.example` must only contain placeholder values. +- **Never commit backup files containing secrets.** Files like `.env.backup`, `.env.backup*`, or timestamped backups (e.g., `.env.example.backup-20260503-014323`) must never be committed. Always add backup file patterns to `.gitignore`. - **Docker socket access.** `open-webui` mounts `/var/run/docker.sock`. Restrict access to this stack to trusted users only. - **Network exposure.** By default, services bind to all interfaces. In production, put a reverse proxy (e.g. nginx, Caddy) with TLS in front and restrict direct port access. - **Default passwords.** Change all `changeme` defaults in your `.env` before exposing any service to a network. + +## Security SOPs + +When adding or modifying files that may contain secrets or credentials: + +1. **Update `.gitignore` immediately** — Add patterns for any backup, temp, or secret files (e.g., `.env.backup*`, `*.backup`, `*.secret`). +2. **Only commit template files** — `.env.example` is the only env-style file that should be committed; it must contain only placeholder values. +3. **Verify before commit** — Run `git status` and ensure no backup or secret files are staged before committing. +4. **CI validation** — The CI pipeline scans `.env.example` for leaked credentials; ensure placeholder values don't resemble real secrets. diff --git a/docs/deployment-guide.md b/docs/deployment-guide.md new file mode 100644 index 0000000..07ce5ca --- /dev/null +++ b/docs/deployment-guide.md @@ -0,0 +1,239 @@ +# Deployment Guide + +A step-by-step guide for setting up the AI Stack on Linux with Intel Arc iGPU. + +--- + +## Prerequisites + +- **Linux** — tested on Fedora, Ubuntu, Arch +- **Intel Arc iGPU** — or other GPU supported by Ollama +- **Docker Engine 24+** and Docker Compose v2 plugin +- **32 GB RAM** recommended (iGPU shares system memory) +- **Git** + +--- + +## Quick Start + +### 1. Clone the repository + +```bash +git clone https://github.com/yourusername/ai-stack.git +cd ai-stack +``` + +### 2. Configure environment + +```bash +cp .env.example .env +``` + +Edit `.env` and set at minimum: + +| Variable | What to set | +|----------|-------------| +| `STACK_USER` | Your Linux username | +| `LITELLM_MASTER_KEY` | Change from default — use `sk-local-` + random hex | +| `ANTHROPIC_API_KEY` | Your Anthropic API key (for Claude) | +| `GEMINI_API_KEY` | Your Google AI API key (for Gemini) | +| `WEBUI_SECRET_KEY` | Generate with: `python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"` | + +```bash +# Generate keys for all services +bash scripts/generate-keys.sh +``` + +### 3. Run the installer + +```bash +bash install.sh +``` + +The installer automates: +- Creating required Docker volumes +- Installing the systemd service (`ai-stack.service`) +- Deploying pipeline files to the Pipelines container +- Starting the full stack +- Prompting you to pull models + +### 4. Open WebUI + +Open **http://localhost:3000** and create your admin account (first user becomes admin). + +> **Important:** The first account registered is the admin. Register immediately on first visit. + +### 5. Post-install configuration + +After first login, follow the [post-install guide](post-install.md) to configure: +- Ollama connection (`http://ollama-arc:11434`) +- Pipelines connection (`http://pipelines:9099`) +- Open Terminal integration +- System Diagnostics tool +- Smart Model Router + +--- + +## Architecture Overview + +``` +Open WebUI :3000 + ├── Ollama API → Olla :40114/olla/ollama → ollama-arc :11434 (Intel Arc iGPU) + │ → remote Ollama nodes (LAN, optional) + └── OpenAI API → LiteLLM :4000/v1 → Claude (Anthropic) + → Gemini (Google) + +Khoj :42110 → Olla :40114/olla/ollama/v1/ → ollama-arc (RAG over Obsidian vault) +``` + +All traffic flows through **Olla** (port 40114) as the unified LLM router. This means you only configure one endpoint in your tools. + +### Service Quick Reference + +| Service | Port | Purpose | +|---------|------|---------| +| Open WebUI | 3000 | Chat UI, admin panel | +| Olla | 40114 | LLM router / load balancer | +| LiteLLM | 4000 | Cloud model proxy (Claude, Gemini) | +| Ollama (arc) | 11434 | Local LLM runner (Intel Arc iGPU) | +| Pipelines | 9099 | Query routing, code execution pipeline | +| Open Terminal | 8000 | Terminal in the browser | +| Khoj | 42110 | AI search over your notes | +| Khoj DB | 5432 | Postgres for Khoj | + +--- + +## Verifying the Stack + +Once the stack is running, verify everything is healthy: + +```bash +# Olla (LLM router) +curl http://localhost:40114/health + +# Ollama (local models) +curl http://localhost:11434/api/tags + +# LiteLLM (cloud gateway) +curl http://localhost:4000/health/liveness + +# Open WebUI +curl http://localhost:3000/health +``` + +Check models are available: + +```bash +# List installed models +docker exec ollama-arc ollama list + +# Check what's loaded in GPU memory +curl http://localhost:11434/api/ps | python3 -m json.tool +``` + +Verify the GPU is working: + +```bash +docker logs ollama-arc 2>&1 | grep -i "device\|gpu\|arc\|oneapi" +``` + +Expected output shows `oneapi` as the inference engine and VRAM > 0. + +--- + +## Day 2 Operations + +### Start / Stop / Restart (via systemd) + +```bash +sudo systemctl start ai-stack.service +sudo systemctl stop ai-stack.service +sudo systemctl restart ai-stack.service +sudo systemctl status ai-stack.service +``` + +### Direct Docker Compose (for testing) + +```bash +# Start with pre-flight checks +bash start.sh -d + +# Or start directly (no pre-flight) +docker compose up -d + +# Stop +docker compose down +``` + +### View logs + +```bash +# All services +docker compose logs --tail=50 -f + +# Single service +docker logs open-webui --tail=30 -f +docker logs ollama-arc --tail=30 -f +``` + +### Pull new models + +```bash +docker exec ollama-arc ollama pull deepseek-r1:14b +docker exec ollama-arc ollama pull gemma4:27b +docker exec ollama-arc ollama pull mistral-small3.2:24b +docker exec ollama-arc ollama pull qwen3.5:14b +docker exec ollama-arc ollama pull qwen2.5-coder:14b +docker exec ollama-arc ollama pull gemma3:12b +docker exec ollama-arc ollama pull qwen2.5:14b +docker exec ollama-arc ollama pull nomic-embed-text:latest +``` + +### Add a remote Ollama node + +1. Add to `.env`: `OLLAMA_REMOTE_MYNODE=http://192.168.1.50:11434` +2. Regenerate Olla config: `bash scripts/generate-olla-config.sh` +3. Restart: `sudo systemctl restart ai-stack.service` + +### Update the stack + +```bash +git pull +docker compose pull +sudo systemctl restart ai-stack.service +``` + +### Edit configuration and reload + +If you change `.env` and need to apply: + +```bash +# Regenerate Olla config (reads OLLAMA_REMOTE_* from .env) +bash scripts/generate-olla-config.sh + +# Regenerate pipeline configs +bash install.sh # (idempotent — safe to re-run) + +# Restart the stack +sudo systemctl restart ai-stack.service +``` + +--- + +## Security Basics + +- **Never commit `.env`** — it's gitignored, but double-check `git status` before committing +- **Change all default passwords** — `LITELLM_MASTER_KEY`, `WEBUI_SECRET_KEY`, `PIPELINES_API_KEY`, `KHOJ_ADMIN_PASSWORD` must not be defaults +- **Backup files** — If you create `.env.backup`, it's gitignored, but verify with `git status` +- **Network exposure** — All services bind to all interfaces by default. Put a reverse proxy with TLS in front for production + +--- + +## Next Steps + +| Guide | What it covers | +|-------|----------------| +| [post-install.md](post-install.md) | Open WebUI admin panel setup (connections, tools, pipelines) | +| [model-guide.md](model-guide.md) | Model recommendations for Intel Arc iGPU, Smart Router routing | +| [khoj-setup.md](khoj-setup.md) | Khoj / Obsidian vault integration | +| [troubleshooting.md](troubleshooting.md) | Common issues and how to fix them | diff --git a/docs/model-guide.md b/docs/model-guide.md index e03049a..e582a8f 100644 --- a/docs/model-guide.md +++ b/docs/model-guide.md @@ -8,6 +8,9 @@ Recommendations for Intel Arc iGPU with your available RAM. | Model | Size | Use case | |-------|------|----------| +| `gemma4:27b` | ~16 GB | Heavy lifting, large context, complex analysis | +| `mistral-small3.2:24b` | ~15 GB | Strong function calling, instruction following, 128K context | +| `qwen3.5:14b` | ~8.5 GB | Latest Qwen, improved reasoning + tool calling (recommended default) | | `qwen2.5:14b` | ~8.3 GB | Tool calling, health checks, diagnostics, general sysadmin | | `qwen2.5-coder:14b` | ~8.3 GB | Scripts, configs, code, debugging | | `deepseek-r1:14b` | ~8.3 GB | Complex reasoning, root cause analysis, architecture decisions | @@ -18,7 +21,13 @@ Recommendations for Intel Arc iGPU with your available RAM. ## Why these models -**qwen2.5:14b** — Most reliable at tool calling in Open WebUI. The base variant is better at actually invoking tools vs writing code about them. The Smart Model Router defaults to this for sysadmin queries. +**gemma4:27b** — Google's latest, strong at long-context reasoning and complex analysis. Requires ~16 GB GPU memory — best on systems with 48 GB+ RAM. Load on demand rather than keeping resident. + +**mistral-small3.2:24b** — Updated Mistral Small with improved function calling, instruction following, and fewer repetition errors. 128K context window. Good middle ground between 14b and 27b models. + +**qwen3.5:14b** — The latest Qwen generation with improved reasoning and tool calling. Recommended as the default model if your RAM allows alongside the rest of the stack. + +**qwen2.5:14b** — Still solid at tool calling in Open WebUI. Falls back to this if qwen3.5 isn't available or you need the smaller footprint. **qwen2.5-coder:14b** — Optimised for code and config work. Understands YAML, Dockerfiles, systemd units, bash. Better than the base model for anything involving file structure or shell commands. @@ -47,6 +56,9 @@ Running two 14b models simultaneously will likely cause one to be paged out. Use ```bash # Pull the full recommended stack docker exec ollama-arc ollama pull deepseek-r1:14b +docker exec ollama-arc ollama pull gemma4:27b +docker exec ollama-arc ollama pull mistral-small3.2:24b +docker exec ollama-arc ollama pull qwen3.5:14b docker exec ollama-arc ollama pull qwen2.5-coder:14b docker exec ollama-arc ollama pull gemma3:12b docker exec ollama-arc ollama pull qwen2.5:14b diff --git a/docs/post-install.md b/docs/post-install.md index cb990b7..df563e7 100644 --- a/docs/post-install.md +++ b/docs/post-install.md @@ -61,7 +61,7 @@ Go to **Admin Panel → Models** → edit each model → under **Tools** check: - System Diagnostics - File Scout (if installed) -Do this for: `deepseek-r1:14b`, `qwen2.5-coder:14b`, `gemma3:12b`, `qwen2.5:14b` +Do this for: `deepseek-r1:14b`, `gemma4:27b`, `mistral-small3.2:24b`, `qwen3.5:14b`, `qwen2.5-coder:14b`, `gemma3:12b`, `qwen2.5:14b` --- diff --git a/install.sh b/install.sh index 319260b..f457a3a 100644 --- a/install.sh +++ b/install.sh @@ -35,6 +35,16 @@ if [[ ! -f "${SCRIPT_DIR}/.env" ]]; then error ".env not found. Run: cp .env.example .env && nano .env" fi +# Resolve VaultWarden placeholders before sourcing +if grep -q '/dev/null; then + info "Resolving VaultWarden placeholders in .env..." + if [[ -f "${SCRIPT_DIR}/scripts/resolve-vaultwarden.sh" ]]; then + bash "${SCRIPT_DIR}/scripts/resolve-vaultwarden.sh" --in-place + else + warn "resolve-vaultwarden.sh not found, sourcing .env as-is" + fi +fi + # shellcheck disable=SC1091 source "${SCRIPT_DIR}/.env" @@ -202,7 +212,7 @@ fi # ─── Pull models ────────────────────────────────────────────────────────────── header "Pulling Models" -MODELS_TO_PULL="${MODELS_TO_PULL:-deepseek-r1:14b qwen2.5-coder:14b gemma3:12b qwen2.5:14b nomic-embed-text:latest}" +MODELS_TO_PULL="${MODELS_TO_PULL:-deepseek-r1:14b gemma4:27b mistral-small3.2:24b qwen3.5:14b qwen2.5-coder:14b gemma3:12b qwen2.5:14b nomic-embed-text:latest}" info "This will pull: ${MODELS_TO_PULL}" info "This may take a while depending on your connection speed." diff --git a/post-install.sh b/post-install.sh index fbd8482..cffc45f 100755 --- a/post-install.sh +++ b/post-install.sh @@ -566,6 +566,9 @@ api, created = AiModelApi.objects.get_or_create( print("AiModelApi:", "created" if created else "exists", api.name) for name, friendly, strengths in [ + ("gemma4:27b", "Gemma 4 27B", "Heavy lifting, large context"), + ("mistral-small3.2:24b", "Mistral Small 3.2 24B", "Strong function calling, 128K context"), + ("qwen3.5:14b", "Qwen 3.5 14B", "Improved reasoning, tool calling"), ("gemma3:12b", "Gemma 3 12B", "Long context, logs, summaries"), ("qwen2.5:14b", "Qwen 2.5 14B", "Tool calling, diagnostics"), ("qwen2.5-coder:14b", "Qwen 2.5 Coder 14B", "Code, configs, scripting"), diff --git a/proxy/litellm_config.yaml b/proxy/litellm_config.yaml index 6ee95e7..6dbf809 100644 --- a/proxy/litellm_config.yaml +++ b/proxy/litellm_config.yaml @@ -1,56 +1,62 @@ # litellm_config.yaml — LiteLLM proxy configuration # Docs: https://docs.litellm.ai/docs/proxy/configs -# -# This file registers all cloud models you want available. -# API keys are read from environment variables (set in .env). -# -# After startup, all models here appear in Open WebUI's model picker -# alongside your local Ollama models. model_list: - - # ── Anthropic / Claude ──────────────────────────────────────────────────── - - model_name: claude-sonnet-4-6 # friendly name shown in UI + # ── Anthropic / Claude ────────────────────────────────────────────────── + - model_name: claude-sonnet-4-6 litellm_params: model: anthropic/claude-sonnet-4-6 api_key: os.environ/ANTHROPIC_API_KEY + model_info: + supports_function_calling: true - model_name: claude-opus-4-6 litellm_params: model: anthropic/claude-opus-4-6 api_key: os.environ/ANTHROPIC_API_KEY + model_info: + supports_function_calling: true - model_name: claude-haiku-4-5 litellm_params: model: anthropic/claude-haiku-4-5-20251001 api_key: os.environ/ANTHROPIC_API_KEY + model_info: + supports_function_calling: true - # ── Google / Gemini ────────────────────────────────────────────────────── + # ── Google / Gemini ───────────────────────────────────────────────────── - model_name: gemini-2.5-pro litellm_params: model: gemini/gemini-2.5-pro-preview-03-25 api_key: os.environ/GEMINI_API_KEY + model_info: + supports_function_calling: true - model_name: gemini-2.5-flash litellm_params: model: gemini/gemini-2.5-flash-preview-04-17 api_key: os.environ/GEMINI_API_KEY + model_info: + supports_function_calling: true - model_name: gemini-2.0-flash litellm_params: model: gemini/gemini-2.0-flash api_key: os.environ/GEMINI_API_KEY + model_info: + supports_function_calling: true - # ── OpenAI (optional — leave OPENAI_API_KEY blank to skip) ─────────────── + # ── OpenAI (optional) ─────────────────────────────────────────────────── # - model_name: gpt-4o # litellm_params: # model: openai/gpt-4o # api_key: os.environ/OPENAI_API_KEY + # model_info: + # supports_function_calling: true litellm_settings: - # Drop requests to unavailable models gracefully drop_params: true - # Return more useful error messages + modify_params: true # fixes Anthropic tool-history errors detailed_debug: false general_settings: diff --git a/scripts/generate-keys.sh b/scripts/generate-keys.sh new file mode 100755 index 0000000..974aa85 --- /dev/null +++ b/scripts/generate-keys.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# generate-keys.sh +# +# Generates cryptographically secure keys for ai-stack services. +# Output can be copy-pasted into .env or used with VaultWarden. +# +# Usage: +# ./scripts/generate-keys.sh # print all keys +# ./scripts/generate-keys.sh --webui # print only WEBUI_SECRET_KEY +# ./scripts/generate-keys.sh --khoj # print only KHOJ_DJANGO_SECRET_KEY +# ./scripts/generate-keys.sh --litellm # print only LITELLM_MASTER_KEY + +set -euo pipefail + +GREEN='\033[0;32m'; BLUE='\033[0;34m'; RESET='\033[0m' + +gen_webui() { + echo -e "${BLUE}WEBUI_SECRET_KEY (Fernet - 32 url-safe base64 bytes):${RESET}" + python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" + echo "" +} + +gen_khoj() { + echo -e "${BLUE}KHOJ_DJANGO_SECRET_KEY (Django secret - 50 chars):${RESET}" + python3 -c "import secrets; print(secrets.token_urlsafe(37))" + echo "" +} + +gen_litellm() { + echo -e "${BLUE}LITELLM_MASTER_KEY (API key - 32 chars):${RESET}" + python3 -c "import secrets; print('sk-local-' + secrets.token_hex(16))" + echo "" +} + +gen_all() { + echo -e "${GREEN}═══ Generated Keys (copy to .env or VaultWarden) ═══${RESET}\n" + gen_webui + gen_khoj + gen_litellm + echo -e "${BLUE}PIPELINES_API_KEY / OPEN_TERMINAL_API_KEY (32 chars):${RESET}" + python3 -c "import secrets; print(secrets.token_hex(16))" + echo "" +} + +if [[ "${1:-}" == "--webui" ]]; then + gen_webui +elif [[ "${1:-}" == "--khoj" ]]; then + gen_khoj +elif [[ "${1:-}" == "--litellm" ]]; then + gen_litellm +else + gen_all +fi diff --git a/scripts/generate-olla-config.sh b/scripts/generate-olla-config.sh index 5f1f04e..f1d0f2e 100755 --- a/scripts/generate-olla-config.sh +++ b/scripts/generate-olla-config.sh @@ -2,52 +2,45 @@ # generate-olla-config.sh # # Generates olla.yaml from OLLAMA_REMOTE_* entries in .env. -# Run this script manually whenever you need to regenerate the config. -# Update any external startup or compose workflow docs separately if they invoke it. +# Run this script manually: bash scripts/generate-olla-config.sh +# It is also called automatically by start.sh on each stack start. # -# ── Remote node format in .env ──────────────────────────────────────────────── +# Remote node format in .env: +# OLLAMA_REMOTE_=http://host:port[:priority] # -# OLLAMA_REMOTE_= # required — one line per node -# OLLAMA_REMOTE__PRIORITY= # optional — defaults to 70 +# Priority is optional (defaults to 70). +# NAME can be alphanumeric + underscores. # -# Examples: -# OLLAMA_REMOTE_WORKSTATION=http://192.168.1.50:11434 -# OLLAMA_REMOTE_WORKSTATION_PRIORITY=75 +# Examples: +# OLLAMA_REMOTE_WORKSTATION=http://192.168.1.50:11434:75 +# OLLAMA_REMOTE_NAS=http://192.168.1.51:11434 # -# OLLAMA_REMOTE_NAS=http://192.168.1.51:11434 -# # no priority set — will use default of 70 -# -# NAME can be anything alphanumeric + underscores. It becomes the node's -# display name in Olla (lowercased, underscores preserved). -# -# ── Fixed nodes (always present, not configurable via .env) ────────────────── -# priority 100 — ollama-arc (local Intel Arc iGPU in Docker) +# Fixed nodes (always present, not configurable): +# priority 100 — ollama-arc (local Intel Arc iGPU) # priority 50 — litellm (cloud gateway: Claude, Gemini) # -# ── Advanced tuning (optional .env vars) ───────────────────────────────────── -# OLLA_ENGINE=sherpa # or "olla" for circuit breakers + pooling -# OLLA_LOAD_BALANCER=least-connections # or "round-robin" / "priority" -# OLLA_REQUEST_LOGGING=true +# Advanced tuning (optional .env vars): +# OLLA_ENGINE — "sherpa" (default) or "olla" +# OLLA_LOAD_BALANCER — "least-connections" (default), "round-robin", "priority" +# OLLA_REQUEST_LOGGING — true (default) or false set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUTPUT="${SCRIPT_DIR}/../proxy/olla.yaml" - -# ── Load .env (parse without sourcing to avoid bash syntax errors from special chars) ── ENV_FILE="${SCRIPT_DIR}/../.env" + +# ── Load .env ────────────────────────────────────────────────────────────────── +# Parse only OLLA_* and OLLAMA_REMOTE_* vars to avoid sourcing the entire file +# (which could fail on special characters in API keys). if [[ -f "$ENV_FILE" ]]; then while IFS='=' read -r key val || [[ -n "$key" ]]; do - # Skip comments and empty lines [[ "$key" =~ ^[[:space:]]*# ]] && continue [[ -z "$key" ]] && continue - # Strip inline comments and whitespace from value val="${val%%#*}" val="$(echo "$val" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" - # Remove surrounding quotes if present val="${val#\"}"; val="${val%\"}" val="${val#\'}"; val="${val%\'}" - # Use eval to handle special chars safely (only for specific vars we need) case "$key" in OLLA_*|OLLAMA_REMOTE_*) eval "export $key=\$val" ;; esac @@ -58,30 +51,26 @@ OLLA_ENGINE="${OLLA_ENGINE:-sherpa}" OLLA_LOAD_BALANCER="${OLLA_LOAD_BALANCER:-least-connections}" OLLA_REQUEST_LOGGING="${OLLA_REQUEST_LOGGING:-true}" -# ── Collect OLLAMA_REMOTE_* entries ────────────────────────────────────────── -# Reads NAME → URL pairs; looks up optional NAME_PRIORITY companion var. +# ── Collect OLLAMA_REMOTE_* entries ──────────────────────────────────────────── declare -A REMOTE_URLS declare -A REMOTE_PRIORITIES -if [[ -f "${SCRIPT_DIR}/../.env" ]]; then +if [[ -f "$ENV_FILE" ]]; then while IFS='=' read -r key val; do - # Match OLLAMA_REMOTE_ but NOT OLLAMA_REMOTE__PRIORITY [[ "$key" =~ ^OLLAMA_REMOTE_([A-Za-z0-9_]+)$ ]] || continue name="${BASH_REMATCH[1]}" - # Strip inline comments and whitespace val="${val%%#*}"; val="${val#"${val%%[![:space:]]*}"}"; val="${val%"${val##*[![:space:]]}"}" [[ -n "$val" ]] || continue - # Check for inline priority suffix: url:port:N (e.g., http://10.10.0.201:11434:100) + # Check for inline priority suffix: url:port:N if [[ "$val" =~ ^(.*):([0-9]+)$ ]]; then REMOTE_URLS["$name"]="${BASH_REMATCH[1]}" REMOTE_PRIORITIES["$name"]="${BASH_REMATCH[2]}" else REMOTE_URLS["$name"]="$val" - # Look up optional priority companion var priority_var="OLLAMA_REMOTE_${name}_PRIORITY" REMOTE_PRIORITIES["$name"]="${!priority_var:-70}" fi - done < "${SCRIPT_DIR}/../.env" + done < "$ENV_FILE" fi echo "→ Generating olla.yaml..." @@ -89,26 +78,11 @@ if [[ ${#REMOTE_URLS[@]} -eq 0 ]]; then echo " (no OLLAMA_REMOTE_* entries found — only local Arc node + LiteLLM)" fi -# ── Write olla.yaml ─────────────────────────────────────────────────────────── -cat > "${OUTPUT}" < "$OUTPUT" <> "${OUTPUT}" -done - -cat >> "${OUTPUT}" <> "${OUTPUT}" - echo " # ── Remote LAN nodes (from OLLAMA_REMOTE_* in .env) ────────────" >> "${OUTPUT}" + echo "" >> "$OUTPUT" + echo " # ── Remote LAN nodes (from OLLAMA_REMOTE_* in .env) ────────────" >> "$OUTPUT" - for name in "${!REMOTE_URLS[@]}"; do + mapfile -t sorted_names < <(printf '%s\n' "${!REMOTE_URLS[@]}" | LC_ALL=C sort) + for name in "${sorted_names[@]}"; do url="${REMOTE_URLS[$name]}" priority="${REMOTE_PRIORITIES[$name]}" - display="${name,,}" # lowercase for YAML readability + display="${name,,}" - cat >> "${OUTPUT}" <> "$OUTPUT" <> "${OUTPUT}" <> "$OUTPUT" </dev/null; then +# echo "→ VaultWarden placeholders found - run manually:" +# echo " ./scripts/resolve-vaultwarden.sh --in-place" +# fi diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..0e9d4f3 --- /dev/null +++ b/start.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# start.sh — pre-flight wrapper for the AI stack +# +# Run this instead of `docker compose up` directly. +# It handles: +# 1. Checking .env exists +# 2. Resolving placeholders (if present) +# 3. Generating olla.yaml from OLLAMA_REMOTE_* entries in .env +# 4. Passing any extra args through to docker compose +# +# Examples: +# ./start.sh # bring up stack in foreground +# ./start.sh -d # detached (background) +# ./start.sh -d --build # rebuild images and detach +# ./start.sh down # tear down the stack +# +# VaultWarden Integration: +# If .env contains placeholders, they will be +# resolved using the `bw` CLI before starting the stack. +# Ensure `bw` is logged in and unlocked, or set BW_CLIENT_ID, +# BW_CLIENT_SECRET, and VAULT_MASTER_PASSWORD environment variables. + +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SCRIPT_DIR}" + +# ── 1. Verify .env exists ───────────────────────────────────────────── +if [[ ! -f .env ]]; then + echo "✗ .env not found. Copy .env.example and fill in your values:" + echo " cp .env.example .env && nano .env" + exit 1 +fi + +# ── 2. Generate olla.yaml from .env ────────────────────────────────────────── +bash "${SCRIPT_DIR}/scripts/generate-olla-config.sh" + +# ── 3. Start the stack ──────────────────────────────────────────────── +echo "→ Starting stack..." +docker compose up "$@" diff --git a/systemd/ai-stack.service b/systemd/ai-stack.service index 3d747a7..b4fd47d 100644 --- a/systemd/ai-stack.service +++ b/systemd/ai-stack.service @@ -9,7 +9,8 @@ ExecStartPre=/usr/bin/bash ${INSTALL_DIR}/scripts/check-arc-gpu.sh Type=oneshot RemainAfterExit=yes WorkingDirectory=${INSTALL_DIR} -ExecStart=/usr/bin/docker compose up -d +# Use start.sh to resolve VaultWarden placeholders before starting +ExecStart=/usr/bin/bash ${INSTALL_DIR}/start.sh -d ExecStop=/usr/bin/docker compose down User=${STACK_USER} Group=docker diff --git a/tools/system_diagnostics.py b/tools/system_diagnostics.py index 691328a..5a4653c 100644 --- a/tools/system_diagnostics.py +++ b/tools/system_diagnostics.py @@ -15,9 +15,7 @@ # Add remote machines to match your OLLAMA_REMOTE_* entries in .env. # post-install.sh will generate this block automatically if you use that workflow. OLLAMA_INSTANCES = { - "local": "http://ollama-arc:11434", # Local Intel Arc iGPU (always present) - # "remote1": "http://10.0.0.X:11434", # Example: remote machine on your LAN - # "remote2": "http://10.0.0.Y:11434", # Add as many as you need + "local": "http://ollama-arc:11434", }