growlf · growlf · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
@@ -1,6 +1,23 @@
 # ─── ai-stack .env.example ────────────────────────────────────────────────────
 # Copy this file to .env and fill in your values before running install.sh
 # cp .env.example .env
+#
+# ── VaultWarden Integration ──────────────────────────────────────────────────────
+# This project supports <vaultwarden:path> placeholders in .env values.
+# The start.sh script automatically resolves them using the `bw` CLI before
+# starting the stack.
+#
+# Placeholder format: <vaultwarden:collection/path>
+#   Examples:
+#     <vaultwarden:ai-cluster/services/open-webui>
+#     <vaultwarden:ai-cluster/infra/anthropic-api>
+#
+# Requirements:
+#   - Bitwarden CLI installed: https://bitwarden.com/download/
+#   - Vault unlocked, or BW_CLIENT_ID + BW_CLIENT_SECRET + VAULT_MASTER_PASSWORD set
+#   - Organization ID: f8a8b00f-496a-44d3-b9d6-5ed28ecd95a3
+#
+# To manually resolve: ./scripts/resolve-vaultwarden.sh --in-place
 
 # ─── Host paths ───────────────────────────────────────────────────────────────
 # User who will run the stack (must be in docker group)
@@ -25,9 +42,9 @@ OLLAMA_SSH_KEY=/home/${STACK_USER}/.ssh/id_ed25519_ollama
 OLLAMA_SSH_KEY_PUB=/home/${STACK_USER}/.ssh/id_ed25519_ollama.pub
 
 # ── Cloud API Keys ────────────────────────────────────────────────────────────
-ANTHROPIC_API_KEY=sk-ant-...
-GEMINI_API_KEY=AIza...
-OPENAI_API_KEY=                   # optional — leave blank if not using OpenAI
+ANTHROPIC_API_KEY=<vaultwarden:ai-cluster/infra/anthropic-api>
+GEMINI_API_KEY=<vaultwarden:ai-cluster/infra/gemini-api>
+OPENAI_API_KEY=<vaultwarden:ai-cluster/infra/openai-api>  # optional — leave blank if not using OpenAI
 
 # ── LiteLLM ───────────────────────────────────────────────────────────────────
 LITELLM_PORT=4000
@@ -66,16 +83,16 @@ OLLA_PORT=40114
 # ── Open WebUI ────────────────────────────────────────────────────────────────
 WEBUI_PORT=3000
 WEBUI_NAME=AssistantOS
-WEBUI_SECRET_KEY=change-me-to-something-random
+WEBUI_SECRET_KEY=<vaultwarden:ai-cluster/services/open-webui>  # Generate with: python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
 
 # ── Open Terminal ─────────────────────────────────────────────────────────────
 TERMINAL_PORT=8000
 # Change these before deploying — do not use defaults in production
-OPEN_TERMINAL_API_KEY=changeme-terminal-key
+OPEN_TERMINAL_API_KEY=<vaultwarden:ai-cluster/services/open-terminal>
 
 # ── Pipelines ─────────────────────────────────────────────────────────────────
 PIPELINES_PORT=9099
-PIPELINES_API_KEY=changeme-pipelines-key
+PIPELINES_API_KEY=<vaultwarden:ai-cluster/services/pipelines>
 
 # ─── Remote Ollama instances ──────────────────────────────────────────────────
 # Format: OLLAMA_REMOTE_<name>=http://<ip>:11434
@@ -99,10 +116,10 @@ PIPELINES_API_KEY=changeme-pipelines-key
 # See docs/khoj-setup.md for Obsidian plugin configuration.
 KHOJ_PORT=42110
 KHOJ_ADMIN_EMAIL=admin@localhost
-KHOJ_ADMIN_PASSWORD=changeme
+KHOJ_ADMIN_PASSWORD=<vaultwarden:ai-cluster/services/khoj>
 # Generate a random secret key: python3 -c "import secrets; print(secrets.token_hex(32))"
-KHOJ_DJANGO_SECRET_KEY=changeme-generate-a-random-string
-KHOJ_DB_PASSWORD=changeme-db-password
+KHOJ_DJANGO_SECRET_KEY=<vaultwarden:ai-cluster/services/khoj (notes)>  # Generate: python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+KHOJ_DB_PASSWORD=<vaultwarden:ai-cluster/services/khoj-db>
 # Tip: generate a strong password with:
 #   python3 -c "import secrets; print(secrets.token_hex(24))"
 # Set to true to allow unauthenticated (anonymous) local access to Khoj.
@@ -115,7 +132,7 @@ OBSIDIAN_VAULT_PATH=/home/yourusername/obsidian-vault
 COUCHDB_URL=https://sync.yourdomain.com
 COUCHDB_DB=your-journal
 COUCHDB_USER=your-couchdb-username
-COUCHDB_PASSWORD=your-couchdb-password
+COUCHDB_PASSWORD=<vaultwarden:ai-cluster/services/khoj-sync-couchdb>
 # Set to true after first successful sync to skip full re-index on restart
 KHOJ_SYNC_SKIP_INITIAL=false
 KHOJ_SYNC_LOG_LEVEL=INFO
@@ -1,5 +1,7 @@
 # ── secrets & local config ────────────────────────────────────────────────────
 .env
+.env.backup*
+.env.*.backup*
 tmp/
 
 # ── generated files (built from .env by start.sh / generate-olla-config.sh) ──

@@ -84,7 +84,10 @@ ai-stack/
 
 | Model | Use case |
 |-------|----------|
-| `qwen2.5:14b` | Tool calling, diagnostics, sysadmin (default) |
+| `gemma4:27b` | Heavy lifting, large context, complex analysis |
+| `mistral-small3.2:24b` | Strong function calling, 128K context |
+| `qwen3.5:14b` | Improved reasoning, tool calling (recommended default) |
+| `qwen2.5:14b` | Tool calling, diagnostics, sysadmin |
 | `qwen2.5-coder:14b` | Scripts, configs, code |
 | `deepseek-r1:14b` | Complex reasoning, root cause analysis |
 | `gemma3:12b` | Log analysis, summaries, documentation |

@@ -26,6 +26,16 @@ You can expect an acknowledgement within **72 hours** and a resolution timeline
 ## Security considerations for this project
 
 - **Never commit real credentials.** Use `.env` (which is git-ignored) for secrets; `.env.example` must only contain placeholder values.
+- **Never commit backup files containing secrets.** Files like `.env.backup`, `.env.backup*`, or timestamped backups (e.g., `.env.example.backup-20260503-014323`) must never be committed. Always add backup file patterns to `.gitignore`.
 - **Docker socket access.** `open-webui` mounts `/var/run/docker.sock`. Restrict access to this stack to trusted users only.
 - **Network exposure.** By default, services bind to all interfaces. In production, put a reverse proxy (e.g. nginx, Caddy) with TLS in front and restrict direct port access.
 - **Default passwords.** Change all `changeme` defaults in your `.env` before exposing any service to a network.
+
+## Security SOPs
+
+When adding or modifying files that may contain secrets or credentials:
+
+1. **Update `.gitignore` immediately** — Add patterns for any backup, temp, or secret files (e.g., `.env.backup*`, `*.backup`, `*.secret`).
+2. **Only commit template files** — `.env.example` is the only env-style file that should be committed; it must contain only placeholder values.
+3. **Verify before commit** — Run `git status` and ensure no backup or secret files are staged before committing.
+4. **CI validation** — The CI pipeline scans `.env.example` for leaked credentials; ensure placeholder values don't resemble real secrets.
@@ -0,0 +1,239 @@
+# Deployment Guide
+
+A step-by-step guide for setting up the AI Stack on Linux with Intel Arc iGPU.
+
+---
+
+## Prerequisites
+
+- **Linux** — tested on Fedora, Ubuntu, Arch
+- **Intel Arc iGPU** — or other GPU supported by Ollama
+- **Docker Engine 24+** and Docker Compose v2 plugin
+- **32 GB RAM** recommended (iGPU shares system memory)
+- **Git**
+
+---
+
+## Quick Start
+
+### 1. Clone the repository
+
+```bash
+git clone https://github.com/yourusername/ai-stack.git
+cd ai-stack
+```
+
+### 2. Configure environment
+
+```bash
+cp .env.example .env
+```
+
+Edit `.env` and set at minimum:
+
+| Variable | What to set |
+|----------|-------------|
+| `STACK_USER` | Your Linux username |
+| `LITELLM_MASTER_KEY` | Change from default — use `sk-local-` + random hex |
+| `ANTHROPIC_API_KEY` | Your Anthropic API key (for Claude) |
+| `GEMINI_API_KEY` | Your Google AI API key (for Gemini) |
+| `WEBUI_SECRET_KEY` | Generate with: `python3 -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"` |
+
+```bash
+# Generate keys for all services
+bash scripts/generate-keys.sh
+```
+
+### 3. Run the installer
+
+```bash
+bash install.sh
+```
+
+The installer automates:
+- Creating required Docker volumes
+- Installing the systemd service (`ai-stack.service`)
+- Deploying pipeline files to the Pipelines container
+- Starting the full stack
+- Prompting you to pull models
+
+### 4. Open WebUI
+
+Open **http://localhost:3000** and create your admin account (first user becomes admin).
+
+> **Important:** The first account registered is the admin. Register immediately on first visit.
+
+### 5. Post-install configuration
+
+After first login, follow the [post-install guide](post-install.md) to configure:
+- Ollama connection (`http://ollama-arc:11434`)
+- Pipelines connection (`http://pipelines:9099`)
+- Open Terminal integration
+- System Diagnostics tool
+- Smart Model Router
+
+---
+
+## Architecture Overview
+
+```
+Open WebUI :3000
+  ├── Ollama API → Olla :40114/olla/ollama → ollama-arc :11434 (Intel Arc iGPU)
+  │                                       → remote Ollama nodes (LAN, optional)
+  └── OpenAI API → LiteLLM :4000/v1 → Claude (Anthropic)
+                                     → Gemini (Google)
+
+Khoj :42110 → Olla :40114/olla/ollama/v1/ → ollama-arc (RAG over Obsidian vault)
+```
+
+All traffic flows through **Olla** (port 40114) as the unified LLM router. This means you only configure one endpoint in your tools.
+
+### Service Quick Reference
+
+| Service | Port | Purpose |
+|---------|------|---------|
+| Open WebUI | 3000 | Chat UI, admin panel |
+| Olla | 40114 | LLM router / load balancer |
+| LiteLLM | 4000 | Cloud model proxy (Claude, Gemini) |
+| Ollama (arc) | 11434 | Local LLM runner (Intel Arc iGPU) |
+| Pipelines | 9099 | Query routing, code execution pipeline |
+| Open Terminal | 8000 | Terminal in the browser |
+| Khoj | 42110 | AI search over your notes |
+| Khoj DB | 5432 | Postgres for Khoj |
+
+---
+
+## Verifying the Stack
+
+Once the stack is running, verify everything is healthy:
+
+```bash
+# Olla (LLM router)
+curl http://localhost:40114/health
+
+# Ollama (local models)
+curl http://localhost:11434/api/tags
+
+# LiteLLM (cloud gateway)
+curl http://localhost:4000/health/liveness
+
+# Open WebUI
+curl http://localhost:3000/health
+```
+
+Check models are available:
+
+```bash
+# List installed models
+docker exec ollama-arc ollama list
+
+# Check what's loaded in GPU memory
+curl http://localhost:11434/api/ps | python3 -m json.tool
+```
+
+Verify the GPU is working:
+
+```bash
+docker logs ollama-arc 2>&1 | grep -i "device\|gpu\|arc\|oneapi"
+```
+
+Expected output shows `oneapi` as the inference engine and VRAM > 0.
+
+---
+
+## Day 2 Operations
+
+### Start / Stop / Restart (via systemd)
+
+```bash
+sudo systemctl start ai-stack.service
+sudo systemctl stop ai-stack.service
+sudo systemctl restart ai-stack.service
+sudo systemctl status ai-stack.service
+```
+
+### Direct Docker Compose (for testing)
+
+```bash
+# Start with pre-flight checks
+bash start.sh -d
+
+# Or start directly (no pre-flight)
+docker compose up -d
+
+# Stop
+docker compose down
+```
+
+### View logs
+
+```bash
+# All services
+docker compose logs --tail=50 -f
+
+# Single service
+docker logs open-webui --tail=30 -f
+docker logs ollama-arc --tail=30 -f
+```
+
+### Pull new models
+
+```bash
+docker exec ollama-arc ollama pull deepseek-r1:14b
+docker exec ollama-arc ollama pull gemma4:27b
+docker exec ollama-arc ollama pull mistral-small3.2:24b
+docker exec ollama-arc ollama pull qwen3.5:14b
+docker exec ollama-arc ollama pull qwen2.5-coder:14b
+docker exec ollama-arc ollama pull gemma3:12b
+docker exec ollama-arc ollama pull qwen2.5:14b
+docker exec ollama-arc ollama pull nomic-embed-text:latest
+```
+
+### Add a remote Ollama node
+
+1. Add to `.env`: `OLLAMA_REMOTE_MYNODE=http://192.168.1.50:11434`
+2. Regenerate Olla config: `bash scripts/generate-olla-config.sh`
+3. Restart: `sudo systemctl restart ai-stack.service`
+
+### Update the stack
+
+```bash
+git pull
+docker compose pull
+sudo systemctl restart ai-stack.service
+```
+
+### Edit configuration and reload
+
+If you change `.env` and need to apply:
+
+```bash
+# Regenerate Olla config (reads OLLAMA_REMOTE_* from .env)
+bash scripts/generate-olla-config.sh
+
+# Regenerate pipeline configs
+bash install.sh  # (idempotent — safe to re-run)
+
+# Restart the stack
+sudo systemctl restart ai-stack.service
+```
+
+---
+
+## Security Basics
+
+- **Never commit `.env`** — it's gitignored, but double-check `git status` before committing
+- **Change all default passwords** — `LITELLM_MASTER_KEY`, `WEBUI_SECRET_KEY`, `PIPELINES_API_KEY`, `KHOJ_ADMIN_PASSWORD` must not be defaults
+- **Backup files** — If you create `.env.backup`, it's gitignored, but verify with `git status`
+- **Network exposure** — All services bind to all interfaces by default. Put a reverse proxy with TLS in front for production
+
+---
+
+## Next Steps
+
+| Guide | What it covers |
+|-------|----------------|
+| [post-install.md](post-install.md) | Open WebUI admin panel setup (connections, tools, pipelines) |
+| [model-guide.md](model-guide.md) | Model recommendations for Intel Arc iGPU, Smart Router routing |
+| [khoj-setup.md](khoj-setup.md) | Khoj / Obsidian vault integration |
+| [troubleshooting.md](troubleshooting.md) | Common issues and how to fix them |
@@ -8,6 +8,9 @@ Recommendations for Intel Arc iGPU with your available RAM.
 
 | Model | Size | Use case |
 |-------|------|----------|
+| `gemma4:27b` | ~16 GB | Heavy lifting, large context, complex analysis |
+| `mistral-small3.2:24b` | ~15 GB | Strong function calling, instruction following, 128K context |
+| `qwen3.5:14b` | ~8.5 GB | Latest Qwen, improved reasoning + tool calling (recommended default) |
 | `qwen2.5:14b` | ~8.3 GB | Tool calling, health checks, diagnostics, general sysadmin |
 | `qwen2.5-coder:14b` | ~8.3 GB | Scripts, configs, code, debugging |
 | `deepseek-r1:14b` | ~8.3 GB | Complex reasoning, root cause analysis, architecture decisions |
@@ -18,7 +21,13 @@ Recommendations for Intel Arc iGPU with your available RAM.
 
 ## Why these models
 
-**qwen2.5:14b** — Most reliable at tool calling in Open WebUI. The base variant is better at actually invoking tools vs writing code about them. The Smart Model Router defaults to this for sysadmin queries.
+**gemma4:27b** — Google's latest, strong at long-context reasoning and complex analysis. Requires ~16 GB GPU memory — best on systems with 48 GB+ RAM. Load on demand rather than keeping resident.
+
+**mistral-small3.2:24b** — Updated Mistral Small with improved function calling, instruction following, and fewer repetition errors. 128K context window. Good middle ground between 14b and 27b models.
+
+**qwen3.5:14b** — The latest Qwen generation with improved reasoning and tool calling. Recommended as the default model if your RAM allows alongside the rest of the stack.
+
+**qwen2.5:14b** — Still solid at tool calling in Open WebUI. Falls back to this if qwen3.5 isn't available or you need the smaller footprint.
 
 **qwen2.5-coder:14b** — Optimised for code and config work. Understands YAML, Dockerfiles, systemd units, bash. Better than the base model for anything involving file structure or shell commands.
 
@@ -47,6 +56,9 @@ Running two 14b models simultaneously will likely cause one to be paged out. Use
 ```bash
 # Pull the full recommended stack
 docker exec ollama-arc ollama pull deepseek-r1:14b
+docker exec ollama-arc ollama pull gemma4:27b
+docker exec ollama-arc ollama pull mistral-small3.2:24b
+docker exec ollama-arc ollama pull qwen3.5:14b
 docker exec ollama-arc ollama pull qwen2.5-coder:14b
 docker exec ollama-arc ollama pull gemma3:12b
 docker exec ollama-arc ollama pull qwen2.5:14b