markdown-rag/docker-compose.yml at main · codeafix/markdown-rag · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
services:
  rag:
    build:
      context: ./app
    container_name: markdown-rag
    environment:
      TIMEZONE: Europe/London
      TZ: Europe/London
      # Ollama runs on the host (Metal GPU on macOS) rather than in a container.
      # host.containers.internal resolves to the host from inside Podman containers.
      OLLAMA_BASE_URL: http://host.containers.internal:11434
      # Model names default to the Makefile variables (GENERATOR_MODEL / EMBED_MODEL).
      # When started via 'make up' those are exported into the environment and
      # picked up here.  The :- fallbacks ensure 'podman compose up' works directly
      # without going through make.
      GENERATOR_MODEL: ${GENERATOR_MODEL:-gemma4-26b-q4xl:latest}
      EMBED_MODEL: ${EMBED_MODEL:-nomic-embed-text}
      VAULT_PATH: /vault
      INDEX_PATH: /index/chroma
      SYSTEM_PROMPT_FILE: /app/system_prompt.txt
      CHUNK_SIZE: "900"
      CHUNK_OVERLAP: "150"
      TOP_K: "5"
      TEMPERATURE: "0.0"
      NUM_CTX: "8192"
      WATCH_DEBOUNCE_SECS: "3"
      NUM_PREDICT: "-1"
      ANONYMIZED_TELEMETRY: "False"
      CHROMA_TELEMETRY: "False"
      REINDEX_ON_START: "true"
      RETRIEVAL_POOL: "400"
    volumes:
      - ${HOST_VAULT_PATH}:/vault:ro
      - chroma_index:/index
    ports:
      - "8000:8000"
    command: ["/bin/bash", "/app/run.sh"]

  watcher:
    build:
      context: ./app
    container_name: markdown-rag-watcher
    depends_on:
      rag:
        condition: service_started
    environment:
      WATCH_PATH: /vault
      RAG_URL: http://rag:8000/reindex
      WATCH_DEBOUNCE_SECS: "3"
    volumes:
      - ${HOST_VAULT_PATH}:/vault:ro
    command: ["python", "watcher.py"]

volumes:
  chroma_index: