-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprinceQoS.lua
More file actions
150 lines (119 loc) · 3.12 KB
/
princeQoS.lua
File metadata and controls
150 lines (119 loc) · 3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/env lua
local princeQoS = { }
local princeUtils = require "princeUtils"
local princeStakeholders = require "princeStakeholders"
local two_days = princeUtils.two_days
local seven_days = princeUtils.seven_days
local unlimited_time = princeUtils.unlimited_time
local slurm_log = princeUtils.slurm_log
local user_log = princeUtils.user_log
local qos_all = {
cpu48 = {
time_min = 0,
time_max = two_days
},
cpu168 = {
time_min = two_days,
time_max = seven_days
},
gpu48 = {
time_min = 0,
time_max = two_days
},
gpu168 = {
time_min = two_days,
time_max = seven_days
},
gpu48 = {
time_min = 0,
time_max = two_days
},
-- special QoS with user access control
cpuplus = {
time_min = 0,
time_max = seven_days,
users = { "rg187", "ll1488" }
},
cpu365 = {
time_min = seven_days,
time_max = unlimited_time,
users = princeStakeholders.users_with_unlimited_wall_time
},
gpuplus = {
time_min = 0,
time_max = seven_days,
users = { "wang" }
},
knl = {
time_min = 0,
time_max = seven_days,
users = { }
},
sysadm = {
time_min = 0,
time_max = seven_days,
users = { "wang", "hpcadmin" }
}
}
local time_limit = 0
local user_netid = nil
local gpu_job = false
local function assign_qos()
local qos = nil
if time_limit <= princeUtils.two_days then
if gpu_job then qos = "gpu48" else qos = "cpu48" end
elseif time_limit <= princeUtils.seven_days then
if gpu_job then qos = "gpu168" else qos = "cpu168" end
end
return qos
end
local function qos_is_valid(qos_name)
if qos_name == nil then
user_log("No QoS setup")
return false
end
if user_netid == nil then
user_log("No user netid available")
return false
end
local qos = qos_all[qos_name]
if qos == nil then
user_log("QoS '%s' is not valid", qos_name)
return false
else
local users = qos.users
if users ~= nil and not princeUtils.in_table(users, user_netid) then
user_log("No authorized QoS '%s'", qos_name)
return false
end
if qos_name == "sysadm" then return true end
if gpu_job then
if string.sub(qos_name, 1, 3) ~= "gpu" then
user_log("Invalid QoS '%s' for GPU jobs", qos_name)
return false
end
else
if string.sub(qos_name, 1, 3) ~= "cpu" then
user_log("Invalid QoS '%s' for CPU jobs", qos_name)
return false
end
end
if time_limit <= qos.time_min or time_limit > qos.time_max then
user_log("Job time limit does not match QoS '%s', it should between %d and %d mins, job wall time is %d mins",
qos_name, qos.time_min, qos.time_max, time_limit)
return false
end
end
return true
end
local function setup_parameters(args)
time_limit = args.time_limit or 1
user_netid = args.user_netid or nil
gpu_job = args.gpu_job or false
end
-- functions
princeQoS.setup_parameters = setup_parameters
princeQoS.assign_qos = assign_qos
princeQoS.qos_is_valid = qos_is_valid
slurm_log("To load princeQoS.lua")
return princeQoS