-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeep_model_adni_wrapper.py
More file actions
381 lines (300 loc) · 13.8 KB
/
deep_model_adni_wrapper.py
File metadata and controls
381 lines (300 loc) · 13.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
############################
## Wrapper for Deep Model ##
############################
#import methods
from call_gpml import *
from get_activation import *
from compute_error import *
from call_deep_pgp import *
import os
import csv
import numpy as np
import itertools
import pathlib
#set to use GPU... '0' or '1'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
######################
##### PREP DATA ######
######################
print('----- PREPARING DATA -----')
#define directories
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
CSV_DATA_DIR = os.path.join(CURRENT_DIR, 'adni_data_all_norm_MMSE.csv') #data from Oggi
ID_DIR = os.path.join(CURRENT_DIR, 'Patient_RIDs_more_than_10_Visits_Less_Than_82_5_Perc_Missing_NoHeaders.csv')
#GT_MEAN_FOLDER_DIR = os.path.join(CURRENT_DIR, 'gt_mean')
#pathlib.Path(GT_MEAN_FOLDER_DIR).mkdir(parents=True, exist_ok=True) #makes folder
#
#GT_MEAN_EXTRACTED_FOLDER_DIR = os.path.join(CURRENT_DIR, 'gt_mean_extracted')
#pathlib.Path(GT_MEAN_EXTRACTED_FOLDER_DIR).mkdir(parents=True, exist_ok=True) #makes folder
#
#ERROR_FOLDER_DIR = os.path.join(CURRENT_DIR, 'mean_squared_error')
#pathlib.Path(ERROR_FOLDER_DIR).mkdir(parents=True, exist_ok=True) #makes folder
#create csv files
gt_mean_csv_name = 'ground_truth_mean.csv'
GT_MEAN_DIR = os.path.join(CURRENT_DIR, gt_mean_csv_name)
gt_mean_extracted_csv_name = 'ground_truth_mean_extracted.csv'
GT_MEAN_EXTRACTED_DIR = os.path.join(CURRENT_DIR, gt_mean_extracted_csv_name)
error_csv_name = 'mean_squared_error.csv'
ERROR_DIR = os.path.join(CURRENT_DIR, error_csv_name)
#write csv headers
with open(GT_MEAN_DIR, 'w') as mean_csv_file:
w = csv.writer(mean_csv_file)
w.writerow(['ID', 'ground truth', 'base model mu', 'source model mu', 'adapted model mu', 'target model mu'])
with open(GT_MEAN_EXTRACTED_DIR, 'w') as mean_extracted_csv_file:
w = csv.writer(mean_extracted_csv_file)
w.writerow(['ID', 'ground truth', 'base model mu', 'source model mu', 'adapted model mu', 'target model mu'])
with open(ERROR_DIR, 'w') as error_csv_file:
w = csv.writer(error_csv_file)
w.writerow(['ID', 'base model error', 'source model error', 'adapted model error', 'target model error'])
#create list of IDs
with open(ID_DIR, 'r') as f:
reader = csv.reader(f)
ID_all = list(reader)
ID_all = list(itertools.chain.from_iterable(ID_all)) #flatten list
ID_all = list(map(int, ID_all)) #cast each element as int
#loop for 10 folds
#for i in range(0, 1):
for i in range(0, 10): #0 to 9
tst_ind = ID_all[i*10:i*10+10]
tr_ind_source = np.setdiff1d(ID_all, tst_ind)
#create X_all, Y_all, value_all
with open(CSV_DATA_DIR, 'r', encoding = 'utf8') as csvfile:
reader = csv.reader(csvfile, delimiter = " ", quotechar = '|')
reader = list(reader)[0:]
X_all = {}
Y_all = {}
value_all = {}
for line in reader:
line = line[0].split(',')
line = list(map(float, line))
ID = int(line[0])
x_line = list(map(float, line[3:-2]))
y_line = [line[-2]]
value_line = [line[-1]]
if ID in ID_all: #check if ID is in ID_all
if ID in X_all:
X_all[ID] = np.vstack((X_all[ID], x_line))
Y_all[ID] = np.vstack((Y_all[ID], y_line))
value_all[ID] = np.vstack((value_all[ID], value_line))
else:
X_all[ID] = x_line
Y_all[ID] = y_line
value_all[ID] = value_line
# #make each value a list of floats
# X_all = {k: v.tolist() for k, v in X_all.items()}
# Y_all = {k: v.tolist() for k, v in Y_all.items()}
# value_all = {k: v.tolist() for k, v in value_all.items()}
#create x_s, y_s
x_dict_s = {key:value for key, value in X_all.items() if key in tr_ind_source}
y_dict_s = {key:value for key, value in Y_all.items() if key in tr_ind_source}
x_list_s = tuple(x_dict_s.values())
y_list_s = tuple(y_dict_s.values())
x_s = np.vstack(x_list_s)
y_s = np.vstack(y_list_s)
#create x_a, y_a
x_a = {key:value for key, value in X_all.items() if key in tst_ind}
y_a = {key:value for key, value in Y_all.items() if key in tst_ind}
#ground truth
g_t = y_a
#create g_t_all
g_list_t = tuple(g_t.values())
g_t_all = np.vstack(g_list_t)
g_t_all = list(g_t_all.flatten())
#create xtest_all, ytest_all
x_dict_test_all = {key:value for key, value in X_all.items() if key in tst_ind}
y_dict_test_all = {key:value for key, value in Y_all.items() if key in tst_ind}
x_list_test_all = tuple(x_dict_test_all.values())
y_list_test_all = tuple(y_dict_test_all.values())
xtest_all = np.vstack(x_list_test_all)
ytest_all = np.vstack(y_list_test_all)
#create indices_all
indices_loop = 0
indices_all = []
add_value = 0
for ID in tst_ind:
print('ID:', ID)
values = value_all[ID]
values_array = np.array(values)
values_array = np.insert(values_array, 0, 0)
values_array = values_array[:-1]
values_array = np.reshape(values_array, (len(g_t[ID]),1))
indices = np.where(values_array == 0)
indices = np.ndarray.tolist(indices[0])
add_indices = list(map(lambda i:i + add_value, indices))
indices_all.extend(add_indices)
add_value = add_value + len(g_t[ID])
indices_loop+=1
######################
#### BASE MODEL ######
######################
print('----- BUILDING BASE MODEL -----')
from keras.optimizers import Adadelta
from keras.layers import Input, Dense
from keras.models import Model as kerasModel
from keras.callbacks import EarlyStopping
from kgp.models import Model as kgpModel
from kgp.layers import GP
#build model
inputs = Input(shape = (x_s.shape[1], ))
deep_layer = Dense(128, activation = 'relu')(inputs)
outputs = Dense(1, activation = 'linear')(deep_layer) #last layer - units equal output dimension
base_model = kerasModel(inputs, outputs)
base_model.compile(optimizer = Adadelta(), loss = 'mse', metrics = ['accuracy'])
#fit on training data
cb = [EarlyStopping(monitor = 'loss',
min_delta = 0,
patience = 2,
verbose = 1,
mode = 'auto')]
base_model.fit(x = x_s, y = y_s,
validation_data = (xtest_all, ytest_all),
batch_size = 100,
epochs = 5,
callbacks = cb,
verbose = 1)
#get modified input --> z
x_s_activations = get_activations(base_model, x_s)
z = x_s_activations[-2]
######################
# OPTIMIZE WITH GPML #
######################
print('----- OPTIMIZING WITH GPML -----')
#calculate initial parameters
max_x_s = np.amax(z, axis = 0)
min_x_s = np.amin(z, axis = 0)
initial_lik = np.log(np.sqrt(0.1*np.var(y_s)))
initial_cov = np.array([[np.log(np.median(max_x_s - min_x_s))], [np.log(np.sqrt(np.var(y_s)))]])
#input features for GP
dimensions = z.shape[1]
initial_hyp = {'lik': initial_lik, 'mean': [], 'cov': initial_cov}
train_opt = {}
inf_method = 'infExact'
mean_fxn = 'meanZero'
cov_fxn = 'covSEiso'
lik_fxn = 'likGauss'
dlik_fxn = 'dlikExact'
#train model until error minimizes
loop = 1
all_error = []
avg_error_diff = 0
#while loop < 4 or avg_error_diff > -0.005:
while loop < 3: #will add error condition later...
print('LOOP COUNT:', loop)
if loop == 1:
z_modified = z
hyp = initial_hyp
gpml_model, optimized_params = trainGP(z_modified, y_s, dimensions, hyp, train_opt, inf_method, mean_fxn, cov_fxn, lik_fxn, dlik_fxn)
######################
##### FIT MODEL ######
######################
print('----- FITTING MODEL -----')
#prepare GP layer with optimized parameters
gp_layer = GP(
hyp = optimized_params,
inf = 'infExact',
dlik = 'dlikExact',
# opt = {'cg_maxit': 100, 'cg_tol': 1e-6}, #doesn't affect result
mean = 'meanZero',
# grid_kwargs={'eq': 1, 'k': 100.}, #doesn't affect result
cov = 'covSEiso',
batch_size = 100,
nb_train_samples = z.shape[0])
#build model with GP layer
outputs = gp_layer(deep_layer)
deep_model = kgpModel(inputs = inputs, outputs = outputs)
#compile model
deep_model.compile(optimizer = Adadelta(), loss = 'mse', metrics = ['accuracy'])
#fit model
deep_model.fit(x_s, y_s,
validation_data = (xtest_all, ytest_all),
batch_size = 100,
epochs = 5,
callbacks = cb,
verbose = 1)
#predict and compute error
y_predict, var_predict = deep_model.predict(xtest_all, x_s, y_s, return_var = True, verbose = 0)
y_predict = list(y_predict[0].flatten())
g_t_extracted = list(map(lambda i:g_t_all[i], indices_all))
y_predict_extracted = list(map(lambda i:y_predict[i], indices_all))
error = mean_absolute_error(y_predict_extracted, g_t_extracted)
all_error.append(error)
#get z for next iteration
x_s_activations = get_activations(deep_model, x_s)
z_modified = x_s_activations[-2]
#get hyperparameters for next iteration
hyp = optimized_params
print('parameters for next iteration', hyp)
loop += 1
#TRAIN ADAPTATION AND TARGET MODELS
print('----- TRAINING ADAPTATION AND TARGET MODELS -----')
error_all = {}
#make folders
# GT_MEAN_FOLD_FOLDER = os.path.join(GT_MEAN_FOLDER_DIR, fold[i]) #name of folder
# pathlib.Path(GT_MEAN_FOLD_FOLDER).mkdir(parents=True, exist_ok=True) #make folder
# GT_MEAN_EXTRACTED_FOLD_FOLDER = os.path.join(GT_MEAN_EXTRACTED_FOLDER_DIR, fold[i]) #name of folder
# pathlib.Path(GT_MEAN_EXTRACTED_FOLD_FOLDER).mkdir(parents=True, exist_ok=True) #make folder
# ERROR_FOLD_FOLDER_DIR = os.path.join(ERROR_FOLDER_DIR, fold[i]) #name of folder
# pathlib.Path(ERROR_FOLD_FOLDER_DIR).mkdir(parents=True, exist_ok=True) #make folder
#output mean and variance predictions for source, adaptation, and target models
#iterate over test patients
for ID in tst_ind:
print('----- TEST PATIENT: %s -----'%(ID))
x_a_patient = x_a[ID][:-1,:]
y_a_patient = y_a[ID][:-1,:]
xtest = x_a[ID]
#predictions for baseline model
m_b_patient = base_model.predict(xtest)
m_b_patient = m_b_patient.flatten().tolist()
predictions = call_deep_pgp(deep_model, gp_layer, x_a_patient, y_a_patient, x_s, y_s, xtest)
g_t_patient = g_t[ID]
g_t_patient = list(g_t_patient.flatten())
m_s_patient = predictions['source model mu']
m_a_patient = predictions['adapted model mu']
m_t_patient = predictions['target model mu']
values = value_all[ID]
values_array = np.array(values)
values_array = np.insert(values_array, 0, 0)
values_array = values_array[:-1]
values_array = np.reshape(values_array, (len(g_t_patient),1))
indices = np.where(values_array == 0)
indices = np.ndarray.tolist(indices[0])
#extract only values with valid data
g_t_extracted = list(map(lambda i:g_t_patient[i], indices))
m_b_extracted = list(map(lambda i:m_b_patient[i], indices))
m_s_extracted = list(map(lambda i:m_s_patient[i], indices))
m_a_extracted = list(map(lambda i:m_a_patient[i], indices))
m_t_extracted = list(map(lambda i:m_t_patient[i], indices))
#compute mean absolute error
e_b = mean_absolute_error(m_b_extracted, g_t_extracted)
e_s = mean_absolute_error(m_s_extracted, g_t_extracted)
e_a = mean_absolute_error(m_a_extracted, g_t_extracted)
e_t = mean_absolute_error(m_t_extracted, g_t_extracted)
error_all[ID] = [e_b, e_s, e_a, e_t]
#write ground truth and mu values to csv
with open(GT_MEAN_DIR, 'a') as mean_csv_file:
w = csv.writer(mean_csv_file)
for i in range(len(g_t_patient)):
w.writerow([ID, g_t_patient[i], m_b_patient[i], m_s_patient[i], m_a_patient[i], m_t_patient[i]])
#write ground truth and mu values to csv
with open(GT_MEAN_EXTRACTED_DIR, 'a') as mean_extracted_csv_file:
w = csv.writer(mean_extracted_csv_file)
for i in range(len(g_t_extracted)):
w.writerow([ID, g_t_extracted[i], m_b_extracted[i], m_s_extracted[i], m_a_extracted[i], m_t_extracted[i]])
#write error values to csv
with open(ERROR_DIR, 'a') as error_csv_file:
w = csv.writer(error_csv_file)
for key, value in error_all.items():
value = list(value)
w.writerow([key, value[0], value[1], value[2], value[3]])
#write average error values to csv
column_sums = None
with open (ERROR_DIR) as error_csv_file:
all_lines = error_csv_file.readlines()
lines = all_lines[1:]
rows_of_numbers = [map(float, line.split(',')) for line in lines]
sums = map(sum, zip(*rows_of_numbers))
averages = [sum_item / len(lines) for sum_item in sums]
averages[0] = 'average error'
with open (ERROR_DIR, 'a') as error_csv_file:
w = csv.writer(error_csv_file)
w.writerow(averages)