-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathlogistic_regression.py
More file actions
204 lines (166 loc) · 6.33 KB
/
logistic_regression.py
File metadata and controls
204 lines (166 loc) · 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import functools
from typing import Callable, Tuple
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import auto_diff as ad
def logistic_regression(X: ad.Node, W: ad.Node, b: ad.Node) -> ad.Node:
"""Construct the computational graph of a logistic regression model.
Parameters
----------
X: ad.Node
A node in shape (batch_size, in_features), denoting the input data.
W: ad.Node
A node in shape (in_features, num_classes), denoting the the weight
in logistic regression.
b: ad.Node
A node in shape (num_classes,), denoting the bias term in
logistic regression.
Returns
-------
logits: ad.Node
The logits predicted for the batch of input.
When evaluating, it should have shape (batch_size, num_classes).
"""
"""TODO: Your code here"""
def softmax_loss(Z: ad.Node, y_one_hot: ad.Node, batch_size: int) -> ad.Node:
"""Construct the computational graph of average softmax loss over
a batch of logits.
Parameters
----------
Z: ad.Node
A node in of shape (batch_size, num_classes), containing the
logits for the batch of instances.
y_one_hot: ad.Node
A node in of shape (batch_size, num_classes), containing the
one-hot encoding of the ground truth label for the batch of instances.
batch_size: int
The size of the mini-batch.
Returns
-------
loss: ad.Node
Average softmax loss over the batch.
When evaluating, it should be a zero-rank array (i.e., shape is `()`).
Note
----
1. In this assignment, you do not have to implement a numerically
stable version of softmax loss.
2. You may find that in other machine learning frameworks, the
softmax loss function usually does not take the batch size as input.
Try to think about why our softmax loss may need the batch size.
"""
"""TODO: Your code here"""
def sgd_epoch(
f_run_model: Callable[
[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
],
X: np.ndarray,
y: np.ndarray,
W: np.ndarray,
b: np.ndarray,
batch_size: int,
lr: float,
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Run an epoch of SGD for the logistic regression model
on training data with regard to the given mini-batch size
and learning rate.
Parameters
----------
f_run_model: Callable[
[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
]
The function to run the forward and backward computation
at the same time for logistic regression model.
It takes the training data, training label, model weight
and bias as inputs, and returns the logits, loss value,
weight gradient and bias gradient in order.
Please check `f_run_model` in the `train_model` function below.
X: np.ndarray
The training data in shape (num_examples, in_features).
y: np.ndarray
The training labels in shape (num_examples,).
W: np.ndarray
The weight of the logistic regression model.
b: np.ndarray
The bias of the logistic regression model.
batch_size: int
The mini-batch size.
lr: float
The learning rate.
Returns
-------
W_updated: np.ndarray
The model weight after update in this epoch.
b_updated: np.ndarray
The model weight after update in this epoch.
loss: np.ndarray
The average training loss of this epoch.
"""
"""TODO: Your code here"""
def train_model():
"""Train a logistic regression model with handwritten digit dataset.
Note
----
Your implementation should NOT make changes to this function.
"""
# - Set up the training settings.
num_epochs = 100
batch_size = 50
lr = 0.05
# - Define the forward graph.
x = ad.Variable(name="x")
W = ad.Variable(name="W")
b = ad.Variable(name="b")
y_predict = logistic_regression(x, W, b)
# - Construct the backward graph.
y_groundtruth = ad.Variable(name="y")
loss = softmax_loss(y_predict, y_groundtruth, batch_size)
grad_W, grad_b = ad.gradients(loss, nodes=[W, b])
# - Create the evaluator.
evaluator = ad.Evaluator([y_predict, loss, grad_W, grad_b])
test_evaluator = ad.Evaluator([y_predict])
# - Load the dataset.
# Take 80% of data for training, and 20% for testing.
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
digits.data, digits.target, test_size=0.2, random_state=0
)
num_classes = 10
in_features = functools.reduce(lambda x1, x2: x1 * x2, digits.images[0].shape, 1)
# - Initialize model weights.
np.random.seed(0)
stdv = 1.0 / np.sqrt(num_classes)
W_val = np.random.uniform(-stdv, stdv, (in_features, num_classes))
b_val = np.random.uniform(-stdv, stdv, (num_classes,))
def f_run_model(X_val, y_val, W_val, b_val):
"""The function to compute the forward and backward graph.
It returns the logits, loss, and gradients for model weights.
"""
z_val, loss_val, grad_W_val, grad_b_val = evaluator.run(
input_values={x: X_val, y_groundtruth: y_val, W: W_val, b: b_val}
)
return z_val, loss_val, grad_W_val, grad_b_val
def f_eval_model(X_val, W_val, b_val):
"""The function to compute the forward graph only and returns the prediction."""
logits = test_evaluator.run({x: X_val, W: W_val, b: b_val})
return np.argmax(logits[0], axis=1)
# - Train the model.
for epoch in range(num_epochs):
X_train, y_train = shuffle(X_train, y_train)
W_val, b_val, loss_val = sgd_epoch(
f_run_model, X_train, y_train, W_val, b_val, batch_size, lr
)
# - Evaluate the model on the test data.
predict_label = f_eval_model(X_test, W_val, b_val)
print(
f"Epoch {epoch}: test accuracy = {np.mean(predict_label == y_test)}, "
f"loss = {loss_val}"
)
# Return the final test accuracy.
predict_label = f_eval_model(X_test, W_val, b_val)
return np.mean(predict_label == y_test)
if __name__ == "__main__":
print(f"Final test accuracy: {train_model()}")