-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrainClassifier.py
More file actions
32 lines (25 loc) · 1.23 KB
/
Copy pathtrainClassifier.py
File metadata and controls
32 lines (25 loc) · 1.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from nltk.classify import NaiveBayesClassifier
from nltk.classify import MaxentClassifier
from nltk.classify import DecisionTreeClassifier
from featureFunctions import *
def trainClassifier():
print "Determining polarity of features..."
#NLTK classifiers work on "featstructs", simple dictionaries
#mapping a feature name to a feature value
#We use booleans to indicate that the set (a tweet) does (or doesn't) contain a feature
#For more information: http://www.nltk.org/howto/featstruct.html
#pos/negfeats are tuples (dict, label)
#Where dict is a dictionary of (word, boolean) key-val pairs
#label indicates positive or negative
posfeats = feature_tuple("corpora/finalPositiveCorpus.txt", "r", "pos")
negfeats = feature_tuple("corpora/finalNegativeCorpus.txt", "r", "neg")
#a list of (dict, label) tuples, one for each label
trainfeats=[posfeats,negfeats]
print "Training the classifier..."
#Classifier.train()
#input: list of (dict, label) tuples, one for each label (pos, neg)
#output: trained classifier that can identify each label
classifier = NaiveBayesClassifier.train(trainfeats)
# classifier = MaxentClassifier.train(trainfeats)
# classifier = DecisionTreeClassifier.train(trainfeats)
return classifier