-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathmain.py
More file actions
111 lines (89 loc) · 2.75 KB
/
main.py
File metadata and controls
111 lines (89 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# author: Bing-Cheng Chen
import numpy as np
from scipy.stats import multivariate_normal
from numpy import genfromtxt
def Estep(X, prior, mu, sigma):
N, D = X.shape
K = len(prior)
gama_mat = np.zeros((N,K))
for i in range(0,N):
xi = X[i, :]
sum = 0
for k in range(0, K):
p = multivariate_normal.pdf(xi, mu[k,:], sigma[k,:,:])
sum += prior[k] * p
for k in range(0,K):
gama_mat[i, k] = prior[k] * multivariate_normal.pdf(xi, mu[k,:], sigma[k,:,:]) / sum
return gama_mat
def Mstep(X, gama_mat):
(N, D) = X.shape
K = np.size(gama_mat, 1)
mu = np.zeros((K, D))
sigma = np.zeros((K, D, D))
prior = np.zeros(K)
for k in range(0, K):
N_k = np.sum(gama_mat, 0)[k]
for i in range(0,N):
mu[k] += gama_mat[i, k] * X[i]
mu[k] /= N_k
for i in range(0, N):
left = np.reshape((X[i] - mu[k]), (2,1))
right = np.reshape((X[i] - mu[k]), (1,2))
sigma[k] += gama_mat[i,k] * left * right
sigma[k] /= N_k
prior[k] = N_k/N
return mu, sigma, prior
def logLike(X, prior, Mu, Sigma):
K = len(prior)
N, M = np.shape(X)
# P is an NxK matrix where (i,j)th element represents the likelihood of
# the ith datapoint to be in jth Cluster
P = np.zeros([N, K])
for k in range(K):
for i in range(N):
P[i, k] = multivariate_normal.pdf(X[i], Mu[k, :], Sigma[k, :, :])
return np.sum(np.log(P.dot(prior)))
def main():
# Reading the data file
X = genfromtxt('TrainingData_GMM.csv', delimiter=',')
print('training data shape:', X.shape)
N, D = X.shape
K = 4
# initialization
mu = np.zeros((K, D))
sigma = np.zeros((K, D, D))
# mu[0] = [-0.5, -0.5]
# mu[1] = [0.3, 0.3]
# mu[2] = [-0.3, 0.3]
# mu[3] = [1.3, -1.3]
mu[0] = [1, 0]
mu[1] = [0, 1]
mu[2] = [0, 0]
mu[3] = [1, 1]
for k in range(0, K):
sigma[k] = [[1, 0], [0, 1]]
prior = np.ones(K) / K
iter = 0
prevll = -999999
ll_evol = []
print('initialization of the params:')
print('mu:\n', mu)
print('sigma:\n', sigma)
print('prior:', prior)
# Iterate with E-Step and M-step
while (True):
W = Estep(X, prior, mu, sigma)
mu, sigma, prior = Mstep(X, W)
ll_train = logLike(X, prior, mu, sigma)
print('iter:',iter, 'log likelihood:',ll_train)
ll_evol.append(ll_train)
iter = iter + 1
if (iter > 150 or abs(ll_train - prevll) < 0.01):
break
abs(ll_train - prevll)
prevll = ll_train
import pickle
with open('results.pkl', 'wb') as f:
pickle.dump([prior, mu, sigma, ll_evol], f)
if __name__ == '__main__':
main()