-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcf.py
More file actions
83 lines (63 loc) · 1.97 KB
/
cf.py
File metadata and controls
83 lines (63 loc) · 1.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
from read_data import *
from scipy.spatial.distance import cosine
from scipy import stats
from math import *
np.set_printoptions(threshold=10000)
def cos_dist(X1, X2):
mode1 = 0
mode2 = 0
prod = 0
for i in range( len(X1) ):
if X1[i]==99.99 or X2[i]==99.99:
continue
else:
mode1 += X1[i]**2
mode2 += X2[i]**2
prod += X1[i] * X2[i]
return 1.0 - prod/(sqrt(mode1) * sqrt(mode2))
def jaccard_dist(X1, X2):
union = 0.0
intersection = 0.0
for i in range( len(X1) ):
if X1[i]==99.99 or X2[i]==99.99:
continue
elif X1[i] == X2[i]:
intersection += 1
union += 1
else:
union += 2
return 1 - intersection / union
def classify(X, X_train, Y_train):
index = 0
min_dist = 1
for i in range(len(X_train)):
#dist = cos_dist(X_train[i],X)
dist = jaccard_dist(X_train[i],X)
if dist < min_dist:
index = i
min_dist = dist
return Y_train[index]
def multi_classify(X, X_train, Y_train):
indices = []
min_dist = 1
for i in range(len(X_train)):
#dist = cos_dist(X_train[i], X)
dist = jaccard_dist(X_train[i],X)
indices.append((i, dist))
indices = sorted(indices, key = lambda x: x[1])
k = 29
top_k = indices[:k]
ys = []
for i in range(len(top_k)):
index, dist = top_k[i]
ys.append(Y_train[index])
return stats.mode(ys)[0][0]
def test(X_train, Y_train, X_test, Y_test):
error = 0
for i in range(len(X_test)):
if multi_classify(X_test[i], X_train, Y_train) != Y_test[i]:
error +=1
return 1 - error*1.0 / len(X_test)
X_train, Y_train, X_val, Y_val, X_test, Y_test, headers = get_split_data()
print(test(X_train, Y_train, X_test, Y_test))