-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmajority_vote_classifier.py
More file actions
101 lines (89 loc) · 4.2 KB
/
majority_vote_classifier.py
File metadata and controls
101 lines (89 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""
Creating a majority vote classifier class for ensemble learning
Alternative sklearn.ensemble.VotingClassifier
"""
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.preprocessing import LabelEncoder
import six # pip install six to solve deprecation warning
from sklearn.pipeline import _name_estimators
import numpy as np
import operator
class MajorityVoteClassifier(BaseEstimator, ClassifierMixin):
"""A majority vote ensemble classifier
Parameters:
classifiers : array-like, shape = [n_classifiers]. Different classifiers for the ensemble
vote : str, {'classlabel', 'probability'}
Default : 'classlabel'
If 'classlabel' the prediction is based of argmax of class labels. Else if 'probability',
the argmax of the sum of probabilities is used to predict the class label (recommended
for calibrated classifiers).
weights : array-like, shape = [n_classifiers]
Optional, default: None
If a list of int or float values provided, the classifiers are weighted by importance.
Uses uniform weights if weights=None
"""
def __init__(self, classifiers, vote='classlabel', weights=None):
self.classifiers = classifiers
self.named_classifiers = {key: value for key, value in _name_estimators(classifiers[0])}
self.vote = vote
self.weights = weights
def fit(self, x, y):
"""Fit classifiers
Parameters:
x: {array-like, sparse matrix}, shape = [n_samples, n_features]
Matrix of training samples.
y: array-like, shape = [n_samples]
Vector of target class labels.
Returns:
self : object
"""
# Use LabelEncoder to ensure class labels start with 0, which is important for the
# np.argmax call in self.predict
self.labelenc_ = LabelEncoder()
self.labelenc_.fit(y)
self.classes_ = self.labelenc_.classes_
self.classifiers_ = []
for clf in self.classifiers[0]: # Take first element of classifier 2d array
fitted_classifier = clone(clf).fit(x, self.labelenc_.transform(y))
self.classifiers_.append(fitted_classifier)
return self
def predict(self, x):
"""Predict class labels for x
Parameters:
x: {array-like, sparse matrix}, shape = [n_samples, n_features]
Matrix of training samples.
Returns:
maj_vote : array-like, shape = [n_samples]
Predicted class labels.
"""
if self.vote == 'probability':
maj_vote = np.argmax(self.predict_proba(x), axis=1)
else: # 'classlabel' vote
# return results from classifier.predict calls
predictions = np.asarray([clf.predict(x) for clf in self.classifiers_]).T
maj_vote = np.apply_along_axis(lambda i: np.argmax(np.bincount(i, weights=self.weights)),
axis=1, arr=predictions)
maj_vote = self.labelenc_.inverse_transform(maj_vote)
return maj_vote
def predict_proba(self, x):
"""Predict class probabilities for x
Parameters:
x: {array-like, sparse matrix}, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and n_features is
the number of features
Returns:
avg_proba: array-like, shape = [n_samples, n_features]
Weighted average probability for each class per sample.
"""
probabilities = np.asarray([clf.predict_proba(x) for clf in self.classifiers_])
avg_proba = np.average(probabilities, axis=0, weights=self.weights)
return avg_proba
def get_params(self, deep=True):
"""Get classifier parameter names for GridSearch"""
if not deep:
return super(MajorityVoteClassifier, self).get_params(deep=False)
out = self.named_classifiers.copy()
for name, step in six.iteritems(self.named_classifiers):
for key, value in six.iteritems(step.get_params(deep=True)):
out['{0}__{1}'.format(name, key)] = value
return out