-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
138 lines (110 loc) · 4.89 KB
/
Makefile
File metadata and controls
138 lines (110 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# ── model configuration ───────────────────────────────────────────────────────
# These are the Ollama model names used by both the running stack and the
# bootstrap target. Override on the command line to switch models without
# editing any file:
#
# make ollama-bootstrap GENERATOR_MODEL=llama3.2:latest
# make up GENERATOR_MODEL=llama3.2:latest
#
# The values are exported so podman compose inherits them as environment
# variables, and docker-compose.yml references them as ${GENERATOR_MODEL} /
# ${EMBED_MODEL} (with the same defaults as fallback for direct compose runs).
GENERATOR_MODEL ?= gemma4-26b-q4xl:latest
EMBED_MODEL ?= nomic-embed-text
export GENERATOR_MODEL EMBED_MODEL
.PHONY: up down logs logs-watcher ollama-bootstrap ollama-status reindex reindex-scan reindex-files reindex-status retrieve retrieve-dated parse-dates ask ask-stream chat shell check ps restart machine-start machine-init test-install test
up:
podman compose -f docker-compose.yml up -d --build
down:
podman compose -f docker-compose.yml down
logs:
podman logs -f markdown-rag
logs-watcher:
podman logs -f markdown-rag-watcher
ollama-bootstrap:
# Verify host Ollama is reachable before attempting model pulls.
@curl -sf http://localhost:11434/api/version >/dev/null || \
{ echo "ERROR: Ollama not reachable at localhost:11434."; \
echo " Start it with: ollama serve"; exit 1; }
# Pull models via the Ollama CLI on the host. 'ollama pull' is idempotent:
# it checks the local digest against the registry and skips the download if
# the model is already current, so this target is safe to re-run at any time.
@echo "Pulling generator model: $(GENERATOR_MODEL)"
ollama pull $(GENERATOR_MODEL)
@echo "Pulling embed model: $(EMBED_MODEL)"
ollama pull $(EMBED_MODEL)
@echo "Bootstrap complete. Run 'make ollama-status' to verify."
ollama-status:
# Show host Ollama version and list all pulled models, highlighting whether
# the models required by this stack are present.
@curl -sf http://localhost:11434/api/version \
| python3 -c "import sys,json; print('Ollama', json.load(sys.stdin).get('version','?'))" \
|| { echo "ERROR: Ollama not reachable at localhost:11434"; exit 1; }
@echo ""
@echo "Pulled models:"
@ollama list
@echo ""
@echo "Required by this stack (Makefile defaults, override with make var):"
@echo " GENERATOR_MODEL = $(GENERATOR_MODEL)"
@echo " EMBED_MODEL = $(EMBED_MODEL)"
reindex:
curl -s -X POST http://localhost:8000/reindex | jq .
reindex-scan:
curl -s -X POST http://localhost:8000/reindex/scan | jq .
reindex-files:
@read -p "Comma-separated files (relative to vault): " FILES; \
JSON=$$(printf "%s" "$$FILES" | jq -R 'split(",")|map(gsub("^\\s+|\\s+$"; "")) | {files: .}'); \
curl -s -X POST http://localhost:8000/reindex/files -H "Content-Type: application/json" -d "$$JSON" | jq .
reindex-status:
curl -s -X GET http://localhost:8000/reindex/status | jq .
retrieve:
@read -p "Query: " Q; \
curl -s -G "http://localhost:8000/retrieve" \
--data-urlencode "q=$$Q" \
--data-urlencode "k=5" | jq .
retrieve-dated:
@read -p "Query: " Q; \
curl -s -G "http://localhost:8000/retrieve/dated" \
--data-urlencode "q=$$Q" \
--data-urlencode "k=5" | jq .
parse-dates:
@read -p "Query: " Q; \
curl -s -G "http://localhost:8000/utils/parse-dates" \
--data-urlencode "q=$$Q" | jq .
ask:
@read -p "Q: " Q; \
curl -s -X POST http://localhost:8000/query \
-H "Content-Type: application/json" \
-d "$$(jq -n --arg q "$$Q" '{question:$$q}')" | jq -r '.answer'
ask-stream:
@read -p "Q: " Q; \
curl --no-buffer -s -X POST http://localhost:8000/query/stream \
-H "Content-Type: application/json" \
-d "$$(jq -n --arg q "$$Q" '{question:$$q}')" ; echo
chat:
.venv/bin/python ./chat.py
shell:
podman exec -it markdown-rag bash
check:
podman compose version || true
podman version || true
ps:
podman compose -f docker-compose.yml ps
restart:
podman compose -f docker-compose.yml restart
machine-start:
podman machine start
machine-init:
podman machine init --cpus 4 --memory 8192 --disk-size 50
# ── unit tests ────────────────────────────────────────────────────────────────
# Tests run locally (outside the container) against app/ source code.
# chromadb/spacy/langchain_ollama are stubbed in conftest.py because their
# pydantic-v1 native extensions are incompatible with the host Python.
test-install:
# Create a local virtualenv and install pinned test dependencies.
python3 -m venv .venv
.venv/bin/pip install -q --upgrade pip
.venv/bin/pip install -q -r requirements-dev.lock
test:
# Run the full test suite with per-module coverage report.
.venv/bin/python -m pytest tests/ --cov=app --cov-report=term-missing