-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathsubmit.yml.erb
More file actions
153 lines (146 loc) · 4.45 KB
/
submit.yml.erb
File metadata and controls
153 lines (146 loc) · 4.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
<%-
base_slurm_args = ["--nodes", "1", "--ntasks-per-node", "#{num_cores.to_i}"]
slurm_args = case node_type
when "gpu-40core"
base_slurm_args += ["--constraint", "40core"]
when "gpu-48core"
base_slurm_args += ["--constraint", "48core"]
when "any-40core"
base_slurm_args += ["--constraint", "40core"]
when "any-48core"
base_slurm_args += ["--constraint", "48core"]
when "hugemem"
base_slurm_args += ["--partition", "hugemem"]
when "largemem"
base_slurm_args += ["--partition", "largemem"]
when "debug"
base_slurm_args += ["--partition", "debug"]
else
base_slurm_args
end
-%>
---
batch_connect:
template: "basic"
conn_params:
- csrf_token
<% if cluster !~ /kubernetes/ -%>
script:
<% if node_type =~ /gpu/ -%>
gpus_per_node: 1
<% end -%>
native:
<%- slurm_args.each do |arg| %>
- "<%= arg %>"
<%- end %>
<% elsif cluster =~ /kubernetes/
if node_type =~ /pitzer/
compute_cluster = "pitzer"
apps_path = "/apps"
# Memory per core with hyperthreading enabled
memory_mb = num_cores.to_i * 4000
end
mounts = {
'home' => OodSupport::User.new.home,
'support' => OodSupport::User.new('support').home,
'project' => '/fs/project',
'scratch' => '/fs/scratch',
'ess' => '/fs/ess',
}
-%>
script:
wall_time: "<%= bc_num_hours.to_i * 3600 %>"
<%- if node_type =~ /gpu/ -%>
gpus_per_node: 1
<%- end -%>
native:
container:
name: "rstudio-server"
image: "docker-registry.osc.edu/ondemand/ondemand-base-rhel7:0.11.1"
image_pull_policy: "IfNotPresent"
command: ["/bin/bash","-l","<%= staged_root %>/job_script_content.sh"]
restart_policy: 'OnFailure'
env:
CLUSTER: "<%= compute_cluster %>"
KUBECONFIG: "/dev/null"
TMPDIR: "/tmp"
labels:
osc.edu/cluster: "<%= compute_cluster %>"
port: "8080"
cpu: "<%= num_cores %>"
memory: "<%= memory_mb %>Mi"
configmap:
files:
- filename: "logging.conf"
data: |
[*]
log-level=debug
logger-type=file
log-dir=<%= Pathname.new(staged_root).join('logs') %>
mount_path: '/etc/rstudio/logging.conf'
sub_path: 'logging.conf'
- filename: "database.conf"
data: |
directory=/tmp/lib/rstudio-server
mount_path: '/etc/rstudio/database.conf'
sub_path: 'database.conf'
- filename: 'k8_helper'
data: |
#!/usr/bin/env bash
set -x
KEY=$1
VALUE=$(echo -n $2 | base64)
CFG="$(hostname)-secret"
kubectl get secret ${CFG} -o json | jq --arg key $KEY --arg value $VALUE '.data[$key] = $value' | kubectl apply -f -
mount_path: '/opt/open_ondemand/helpers'
mounts:
<%- mounts.each_pair do |name, mount| -%>
- type: host
name: <%= name %>
host_type: Directory
path: <%= mount %>
destination_path: <%= mount %>
<%- end -%>
- type: host
name: munge-socket
host_type: Socket
path: /var/run/munge/munge.socket.2
destination_path: /var/run/munge/munge.socket.2
- type: host
name: slurm-conf
host_type: Directory
path: /etc/slurm
destination_path: /etc/slurm
- type: host
name: sssd-pipes
host_type: Directory
path: /var/lib/sss/pipes
destination_path: /var/lib/sss/pipes
- type: host
name: sssd-conf
host_type: Directory
path: /etc/sssd
destination_path: /etc/sssd
- type: host
name: nsswitch
host_type: File
path: /etc/nsswitch.conf
destination_path: /etc/nsswitch.conf
- type: host
name: lmod-init
host_type: File
path: /apps/<%= compute_cluster %>/lmod/lmod.sh
destination_path: /etc/profile.d/lmod.sh
- type: host
name: intel
host_type: Directory
path: /nfsroot/<%= compute_cluster %>/opt/intel
destination_path: /opt/intel
- type: host
name: apps
host_type: Directory
path: /apps/<%= compute_cluster %>
destination_path: <%= apps_path %>
node_selector:
node-role.kubernetes.io/ondemand: ''
<% end -%>