-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathevaluate.py
More file actions
121 lines (95 loc) · 4.62 KB
/
evaluate.py
File metadata and controls
121 lines (95 loc) · 4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
""" Trains and evaluates the model on the different emotions """
import argparse
import ConfigParser
import imp
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import utils
def load_model(model_name, X_train, y_train, optimization_parameters, sklearn_model=None):
"""
Loads the base model model with the specific parameters
:param model_name: the name of the model
:param X_train: training data (# of samples x # of features)
:param y_train: labels for the training data (# of samples * 1)
:return: model object
"""
model_source = imp.load_source(model_name, 'models/%s.py' % (model_name))
model = model_source.Model(X_train, y_train, optimization_parameters, sklearn_model)
return model
def get_labels(data):
"""
Returns the labels for each emotion
:param data: dictionary of training data (emotion: [emotion data])
:return: a dictionary from emotion to a list of labels for each example for that emotion
"""
return {emotion: np.array([val[-1] for val in values]) for emotion, values in data.iteritems()}
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", help="the model to evaluate data on")
parser.add_argument("-f", "--features", nargs="*", help="the type of features to use for the model")
parser.add_argument("-ems", "--metrics", nargs="*", help="the metrics to evaluate the model")
parser.add_argument('-opt', "--optimize", help="whether or not to optimize the model")
parser.add_argument("-sv" "--save", help="whether or not to save the model")
args = parser.parse_args()
if args.model == "baseline": # load the baseline parameters
print("Loading the baseline config file...")
config = ConfigParser.ConfigParser()
config.read("BASELINE.ini")
model_name = "baseline"
sklearn_model_name = config.get("BASELINE", "model")
features = config.get("BASELINE", "features").split(",")
metrics = [config.get("BASELINE", "metrics")]
optimize = False
else:
model_name = args.model
features = args.features
metrics = args.features
optimize = True if 'optimization' in args else False
save = True if 'save' in args else False
sklearn_model_name = ""
# training, testing data is a dictionary... {anger => tweets, fear => tweets, joy => tweets, sadness => tweets}
training_data = utils.load_data.load_training_data("data/train", "data/dev")
print("Train Data Statistics...\n")
print("Number of anger tweets {0}".format(len(training_data["anger"])))
print("Number of fear tweets {0}".format(len(training_data["fear"])))
print("Number of joy tweets {0}".format(len(training_data["joy"])))
print("Number of sadness tweets {0}".format(len(training_data["sadness"])))
print("\n")
y_train = get_labels(training_data)
train_corpus = {emotion: [" ".join(val[0]) for val in values]for emotion, values in training_data.iteritems()}
print("Featurizers being used: ")
for idx, feature in enumerate(features, start=1):
print(" {0}. {1}".format(idx, feature))
featurizer = utils.generate_features.Featurizer(features, train_corpus)
X_train = featurizer.generate_all_features()
print("Feature length: {0}".format(X_train["anger"].shape[1]))
print("\n")
optimization_parameters = {
'RandomForestRegressor': {
'n_estimators': [10, 20, 30, 50, 100, 500]
},
'SVR': {'C': [0.001, 0.01, 0.1, 1, 10]},
'': ''
}
if sklearn_model_name:
print("Using model: {0}".format(sklearn_model_name))
else:
print("Using model: {0}".format(model_name))
print("\n")
# load the model (you don't need to pass in a sklearn model)
if sklearn_model_name:
# load a model that uses a sklearn model
model = load_model(model_name, X_train, y_train, optimization_parameters[sklearn_model_name], eval(sklearn_model_name))
else:
# load a model that uses keras
model = load_model(model_name, X_train, y_train, optimization_parameters[sklearn_model_name], None)
if optimize == 'True':
best_model_params = model.optimize("pearson_correlation") # which metric to optimize models
emotions_cv_scores = model.train(best_model_params)
else:
emotions_cv_scores = model.train()
print("10-Fold CV Scores (Pearson Correlation) ")
for emotion, score in emotions_cv_scores.iteritems():
print("Emotion: {0}, Score: {1}".format(emotion, score))
print("AVG Pearson Correlation: ", np.mean(emotions_cv_scores.values()))