-
Notifications
You must be signed in to change notification settings - Fork 1
/
loyo_testing.py
158 lines (139 loc) · 6.65 KB
/
loyo_testing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import sys
import numpy as np
np.random.seed(int(sys.argv[2]))
import tensorflow as tf
tf.set_random_seed(int(sys.argv[3]))
import utils
import models
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import ShuffleSplit
import matplotlib.pyplot as plt
import pandas as pd
import os
import datetime
'''
run script:
python loyo_testing.py <architecture> <numpy seed> <tensorflow seed>
for example:
python loyo_testing.py mlp 1 1
'''
if __name__ == '__main__':
# input
architecture = sys.argv[1]
callbacks = [EarlyStopping(monitor='val_loss', min_delta=0.05, patience=10, restore_best_weights=True)]
fit_kwargs = {'epochs': 1000,
'verbose': 2,
'callbacks': callbacks}
# define plot lists
x_plot = []
y_plot = []
y_error = []
# write the csv header
colNames = ['Leave Out Year', 'MAE', 'RMSE', 'R^2']
# create directory
current_directory = os.getcwd()
final_directory = os.path.join(current_directory, r'LOYO_results', r'seeds')
if not os.path.exists(final_directory):
os.makedirs(final_directory)
#filename = 'LOYO_results/LOYO' + str(datetime.datetime.now()).replace(' ', '_').replace(':','.') + '.csv'
filename = 'LOYO_results/seeds/LOYO_np' +str(sys.argv[2]) +'_tf' +str(sys.argv[3]) + '.csv'
with open(filename, 'a+') as f:
line = ','.join(colNames) + ','
f.write(line + '\n')
for leave_out_year in range(2010, 2019): #(2020, 2021) <<<< update years here
# Load data
print(f'Loading data for year: {leave_out_year}...')
if architecture == 'mlp':
x_train, x_test, y_train, y_test, ids= utils.load_loyo_data(leave_out_year, scale=True, get_hand=True, remove_oprreadup=False,remove_oprfortraining=True)
elif architecture == 'cnn':
x_train, x_test, y_train, y_test, ids = utils.load_loyo_data(leave_out_year, get_images=True, scale=True)
elif architecture == 'cnn_augmented':
x_train, x_test, y_train, y_test, ids = utils.load_loyo_data(leave_out_year, get_images=True, get_hand=True, scale=True)
else:
raise Exception(f'Invalid architecture name: {architecture}')
train_hurricane_names = ids
# Init CV
n_splits = 1
ss = ShuffleSplit(n_splits=n_splits, test_size=0.1)
metrics = {'MAE': [],
'RMSE': [],
'R^2': []}
# Cross validation loop
for i, (train_idxs, val_idxs) in enumerate(ss.split(train_hurricane_names)):
print(f'\n--- Fold {i+1} of {n_splits} ---')
if architecture == 'mlp':
# Extract CV fold
x_train_cv = x_train[train_idxs]
y_train_cv = y_train[train_idxs]
x_val = x_train[val_idxs]
y_val = y_train[val_idxs]
# Train
model = models.mlp(input_shape=x_train_cv[0].shape)
model.fit(x_train_cv, y_train_cv, batch_size=32,
validation_data=(x_val, y_val),
**fit_kwargs)
#utils.save_model(model,'mlp_new.h5')
y_predict = model.predict(x_test)
elif architecture == 'cnn':
# Extract CV fold
x_train_cv = x_train[train_idxs]
y_train_cv = y_train[train_idxs]
x_val = x_train[val_idxs]
y_val = y_train[val_idxs]
# Train
model = models.cnn(input_shape=x_train_cv[0].shape, y_train_std=np.std(y_train_cv))
dataflow = ImageDataGenerator(horizontal_flip=True, vertical_flip=True).flow(x_train_cv, y_train_cv)
model.fit_generator(dataflow,
validation_data=(x_val, y_val),
steps_per_epoch=(len(y_train_cv) // 32) + 1,
**fit_kwargs)
y_predict = utils.predict_with_rotations(model, x_test, architecture=architecture)
elif architecture == 'cnn_augmented':
# Extract CV fold
x_train_cv = [x_train[0][train_idxs], x_train[1][train_idxs]]
y_train_cv = y_train[train_idxs]
x_val = [x_train[0][val_idxs], x_train[1][val_idxs]]
y_val = y_train[val_idxs]
# Train
model = models.cnn_augmented(image_shape=x_train_cv[0][0].shape, hand_shape=x_train_cv[1][0].shape, y_train_std=np.std(y_train_cv))
model.fit_generator(utils.image_generator(x_train_cv, y_train_cv),
validation_data=(x_val, y_val),
steps_per_epoch=(len(y_train_cv) // 32) + 1,
shuffle=False,
**fit_kwargs)
y_predict = utils.predict_with_rotations(model, x_test, architecture=architecture)
tmp_metrics = utils.compute_metrics(y_test, y_predict, print_them=True)
metrics = {k: v+[tmp_metrics[k]] for k, v in metrics.items()}
# output predicted labels along with true labels for the RI analysis
df_pred = pd.DataFrame({'y_test':list(y_test.reshape([-1,])), 'y_predict':list(y_predict.reshape([-1,]))})
pred_filename = 'LOYO_results/seeds/LOYO_pred_np' +str(sys.argv[2]) +'_tf' +str(sys.argv[3]) + str(leave_out_year) + '.csv'
df_pred.to_csv(pred_filename, index=False)
# Print metrics
print(f'\n--- Cross Validation Test Metrics for year: {leave_out_year} ---')
for k, v in metrics.items():
print(f'{k}: {np.mean(v):.2f} +/- {2 * np.std(v):.2f}')
# write results to csv
with open(filename, 'a+') as f:
for name in colNames:
if name == 'Leave Out Year':
line = leave_out_year
else:
line = f'{np.mean(metrics[name]):.2f} +/- {2 * np.std(metrics[name]):.2f}'
# update plot values
if name == "MAE":
x_plot.append(leave_out_year)
y_plot.append(np.mean(metrics[name]))
y_error.append(2 * np.std(metrics[name]))
f.write(str(line) + ',')
f.write('\n')
# plot values
x_labels = list(map(lambda x: str(x), x_plot))
plt.figure()
plt.errorbar(x_plot, y_plot, yerr=y_error)
plt.title("MAE vs Year Left Out")
plt.xlabel("Year Left Out")
plt.xticks(rotation=45)
plt.ylabel("Mean Absolute Error")
plt.xticks(x_plot, x_labels)
plt.savefig(filename[:-4] + ".png")