-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscript.nmd.sh
More file actions
executable file
·321 lines (291 loc) · 9.41 KB
/
script.nmd.sh
File metadata and controls
executable file
·321 lines (291 loc) · 9.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#!/usr/bin/env bash
set -euo pipefail
# TODO: focused on docker task driver: will likely need exec in the near future
######################## SETUP
DOCS_URI='https://github.com/nirv-ai/docs/blob/main/nomad/README.md'
SCRIPTS_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]%/}")" &>/dev/null && pwd)"
SCRIPTS_DIR_PARENT="$(dirname $SCRIPTS_DIR)"
# PLATFORM UTILS
for util in $SCRIPTS_DIR/utils/*.sh; do
source $util
done
######################## INTERFACE
# grouped by increasing order of dependency
APP_IAC_NOMAD_DIR="${APP_IAC_NOMAD_DIR:-${APP_IAC_PATH}/nomad}"
export NOMAD_CACERT="${NOMAD_CACERT:-${CERTS_DIR_HOST}/${MAD_HOSTNAME}/ca.pem}"
export NOMAD_CLIENT_CERT="${NOMAD_CLIENT_CERT:-${CERTS_DIR_HOST}/${MAD_HOSTNAME}/cli-0.pem}"
export NOMAD_CLIENT_KEY="${NOMAD_CLIENT_KEY:-${CERTS_DIR_HOST}/${MAD_HOSTNAME}/cli-0-key.pem}"
JAIL_MAD_KEYS="${JAIL}/nomad/keys"
JAIL_MAD_TOKENS="${JAIL}/nomad/tokens"
NOMAD_CONF_CLIENT="${CONFIGS_DIR}/nomad/client"
NOMAD_CONF_GLOBALS="${CONFIGS_DIR}/nomad/global"
NOMAD_CONF_SERVER="${CONFIGS_DIR}/nomad/server"
NOMAD_CONF_STACKS="${CONFIGS_DIR}/nomad/stacks"
NOMAD_GOSSIP_FILENAME='server.gossip.key'
NOMAD_SERVER_PORT="${NOMAD_SERVER_PORT:-4646}"
NOMAD_DATA_DIR_BASE=/tmp/nomad
export NOMAD_ADDR="${NOMAD_ADDR:-https://${MAD_HOSTNAME}:${NOMAD_SERVER_PORT}}"
JAIL_KEY_GOSSIP="${JAIL_MAD_KEYS}/${NOMAD_GOSSIP_FILENAME}"
# add vars that should be printed when NIRV_SCRIPT_DEBUG=1
declare -A EFFECTIVE_INTERFACE=(
[APP_IAC_PATH]=$APP_IAC_PATH
[DOCS_URI]=$DOCS_URI
[JAIL_KEY_GOSSIP]=$JAIL_KEY_GOSSIP
[NOMAD_ADDR]=$NOMAD_ADDR
[NOMAD_CACERT]=$NOMAD_CACERT
[NOMAD_CLIENT_CERT]=$NOMAD_CLIENT_CERT
[NOMAD_CLIENT_KEY]=$NOMAD_CLIENT_KEY
[NOMAD_CONF_CLIENT]=$NOMAD_CONF_CLIENT
[NOMAD_CONF_GLOBALS]=$NOMAD_CONF_GLOBALS
[NOMAD_CONF_SERVER]=$NOMAD_CONF_SERVER
[NOMAD_CONF_STACKS]=$NOMAD_CONF_STACKS
[SCRIPTS_DIR_PARENT]=$SCRIPTS_DIR_PARENT
[NOMAD_DATA_DIR_BASE]=$NOMAD_DATA_DIR_BASE
)
######################## CREDIT CHECK
echo_debug_interface
# add aditional checks and balances below this line
# use standard http response codes
throw_missing_dir $SCRIPTS_DIR_PARENT 500 "somethings wrong: cant find myself in filesystem"
throw_missing_file $NOMAD_CACERT 400 "all cmds require cert auth pem"
throw_missing_file $NOMAD_CLIENT_CERT 400 "all cmds require cli pem"
throw_missing_file $NOMAD_CLIENT_KEY 400 "all cmds require cli key pem"
######################## FNS
kill_nomad_service() {
# requires shell-init/services.sh
# TODO: this doesnt seem to kill the client which is weird
request_sudo 'kill service with name nomad'
kill_service_by_name nomad || true
}
sync_local_configs() {
use_hashi_fmt ${CONFIGS_DIR}/nomad nomad
local client_configs=(
$NOMAD_CONF_CLIENT
$NOMAD_CONF_GLOBALS
)
local server_configs=(
$NOMAD_CONF_GLOBALS
$NOMAD_CONF_SERVER
)
echo_debug 'syncing nomad server confs'
local iac_server_dir="${APP_IAC_NOMAD_DIR}/server"
mkdir -p $iac_server_dir
for server_conf in "${server_configs[@]}"; do
cp_to_dir $server_conf $iac_server_dir
done
echo_info "validating confs: $iac_server_dir"
nomad config validate $iac_server_dir || true
echo_debug 'syncing nomad client confs'
local iac_client_dir="${APP_IAC_NOMAD_DIR}/client"
mkdir -p $iac_client_dir
for client_conf in "${client_configs[@]}"; do
cp_to_dir $client_conf $iac_client_dir
done
echo_info "validating confs: $iac_client_dir"
nomad config validate $iac_client_dir || true
echo_debug 'copying nomad stacks'
local iac_stacks_dir="${APP_IAC_NOMAD_DIR}/stacks"
mkdir -p $iac_stacks_dir
cp_to_dir $NOMAD_CONF_STACKS $iac_stacks_dir
}
create_gossip_key() {
echo_debug 'creating gossip key'
mkdir -p $JAIL_MAD_KEYS
nomad operator gossip keyring generate >$JAIL_KEY_GOSSIP
}
create_new_stack() {
name=${1:?stack name required}
echo_debug "creating new stack $name.nomad"
nomad job init -short "$name.nomad"
echo_debug "updating stack name in $name.nomad"
sed -i "/job \"example\"/c\job \"$name\" {" "$name.nomad"
echo_debug "moving stack $name.nomad to configs"
mv $name.nomad $NOMAD_CONF_STACKS/$name.nomad
echo_debug "syncing nomad configs"
sync_local_configs
}
get_stack_plan() {
name=${1:?stack name required}
# TODO: this should be APP_IAC_PATH or whaetver when working
stack_file="${NOMAD_CONF_STACKS}/${name}.nomad"
env_file="${SCRIPTS_DIR_PARENT}/$name/.env.compose.json"
throw_missing_file $stack_file 404 'stack file doesnt exist'
throw_missing_file $env_file 404 'env file doesnt exist'
echo_debug "creating job plan for $name"
echo_info "execute this plan: run $name indexNumber"
nomad plan -var-file=$env_file "$stack_file"
}
run_stack() {
name=${1:?stack name required}
index=${2:?index required}
# TODO: this should be APP_IAC_PATH or whatever when working
stack_file="${NOMAD_CONF_STACKS}/${name}.nomad"
env_file="${SCRIPTS_DIR_PARENT}/$name/.env.compose.json"
throw_missing_file $stack_file 404 'stack file doesnt exist'
throw_missing_file $env_file 404 'env file doesnt exist'
echo_debug "running stack $name at index $index"
echo_debug '\t job failures? get the allocation id from the job status'
echo_debug '\t execute: get status job jobName'
echo_debug '\t execute: get status loc allocId\n\n'
nomad job run -check-index $index -var-file=$env_file "$stack_file"
}
start_agent() {
type=${1:?either server|client agent must be specified}
total=${2:-1}
conf_dir="$APP_IAC_NOMAD_DIR/$type"
throw_missing_dir $conf_dir 400 "$conf_dir doesnt exist"
mkdir -p $NOMAD_DATA_DIR_BASE
# request_sudo "chowning $NOMAD_DATA_DIR_BASE"
# sudo chown -R nomad:nomad $NOMAD_DATA_DIR_BASE
# TODO: we need to add -dev-connect
case $type in
server)
request_sudo "starting $total nomad $type agent(s)"
declare -i i=0
while [ $i -lt $total ]; do
name=s$i
sudo -b nomad agent \
-bootstrap-expect=$total \
-config=$conf_dir \
-data-dir=$NOMAD_DATA_DIR_BASE/$name \
-encrypt=$(cat $JAIL_KEY_GOSSIP) \
-node=$type-$name.$(hostname) \
-server
i=$((i + 1))
done
;;
client)
request_sudo "starting $total nomad $type agent(s)"
declare -i i=0
while [ $i -lt $total ]; do
name=c$i
sudo -b nomad agent \
-client \
-config=$conf_dir \
-data-dir=$NOMAD_DATA_DIR_BASE/$name \
-node=$type-$name.$(hostname)
i=$((i + 1))
done
;;
*) invalid_request ;;
esac
}
######################## EXECUTE
# nomad alloc fs locId [dirName]
# nomad alloc exec
# nomad acl policy apply
# nomad operator autopilot get-config
# nomad job history -p job_name # todo
# add this: https://github.com/hashicorp/damon
cmd=${1:-''}
case $cmd in
sync-confs) sync_local_configs ;;
kill) kill_nomad_service ;;
gc) nomad system gc ;;
start) start_agent ${2:?'agent type server|client required'} ;;
create)
what=${2:-""}
case $what in
gossipkey) create_gossip_key ;;
stack) create_new_stack ${3:?stack name required} ;;
plan) get_stack_plan ${3:?stack name required} ;;
*) invalid_request ;;
esac
;;
get)
cmdname=${2:-''}
case $2 in
self) nomad agent-info -json ;;
service)
srvc_name=${3:-''}
if test -z $srvc_name; then
nomad service list -json
else
nomad service info -verbose -json $srvc_name
fi
;;
server) nomad server members -verbose ;;
client)
nodeid=${3:-''}
if test -z $nodeid; then
echo_debug 'retrieving client(s) status'
nomad node status -verbose -json
else
# $nodeid can be -self if executed on a client agent
echo_debug "retrieving status for client $nodeid"
nomad node status -verbose -json $nodeid
fi
;;
stack)
stack_name=${3:-''}
if test -z $stack_name; then
nomad status -verbose
else
nomad job status -verbose $stack_name
fi
;;
loc)
id=${3:-''}
if test -z $id; then
nomad alloc status -json
else
nomad alloc status -verbose -stats -json $id
fi
;;
eval)
id=${3:-''}
if test -z $id; then
nomad eval list -json
else
nomad eval status -verbose -json $id
fi
;;
dep)
id=${3:-''}
if test -z $id; then
nomad deployment list -verbose -json
else
nomad deployment status -verbose -json $id
fi
;;
logs)
name=${3:?task name required}
id=${4:?allocation id required}
echo_debug "fetching logs for task $name in allocation $id"
nomad alloc logs -f $id $name
;;
*) invalid_request ;;
esac
;;
run) run_stack ${2:?stack name required} ${3:?job index required} ;;
rm) # this purges the job, but doesnt stop the running containers
name=${2:?stack name required}
nomad job stop -purge -yes $name || true
;;
stop)
name=${2:?stack name is required}
echo -e "stopping job $name"
nomad job stop $name
;;
# TODO: move these to the dockerlogs.sh file
dockerlogs)
# @see https://stackoverflow.com/questions/36756751/view-logs-for-all-docker-containers-simultaneously
echo_debug 'following logs for all running containers'
mkdir -p /tmp/dockerlogs
for c in $(docker ps -a --format="{{.Names}}"); do
docker logs -f $c >/tmp/dockerlogs/$c.log 2>/tmp/dockerlogs/$c.err &
echo "$!" >/tmp/dockerlogs/$c.pid
done
tail -f /tmp/dockerlogs/*.{log,err}
;;
dockerlogs-kill)
for pidfile in /tmp/dockerlogs/*.pid; do
test -f $pidfile || break
this_pid=$(cat $pidfile)
echo_info "killing docker -f: pid $this_pid"
kill -9 $this_pid || true
done
rm /tmp/dockerlogs/*
;;
*) invalid_request ;;
esac