-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathSoftMax.hpp
More file actions
109 lines (88 loc) · 3.5 KB
/
SoftMax.hpp
File metadata and controls
109 lines (88 loc) · 3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#pragma once
#include "Matrix.hpp"
#include "Optimizer.hpp"
class SoftMax{
public:
SoftMax(){};
SoftMax(const int inputDim, const int classNum):
weight(MatD::Zero(inputDim, classNum)), bias(VecD::Zero(classNum))
{}
SoftMax(const int inputDim, const int classNum, const int exception_, const Real gamma_, const Real mPlus_, const Real mMinus_):
weight(MatD::Zero(inputDim, classNum)), bias(VecD::Zero(classNum)), exception(exception_), gamma(gamma_), mPlus(mPlus_), mMinus(mMinus_)
{}
class Grad;
MatD weight; VecD bias;
void calcDist(const VecD& input, VecD& output);
Real calcLoss(const VecD& output, const int label);
Real calcLoss(const VecD& output, const VecD& goldOutput);
void backward(const VecD& input, const VecD& output, const int label, VecD& deltaFeature, SoftMax::Grad& grad);
void backward(const VecD& input, const VecD& output, const VecD& goldOutput, VecD& deltaFeature, SoftMax::Grad& grad);
void backwardAttention(const VecD& input, const VecD& output, const VecD& deltaOut, VecD& deltaFeature, SoftMax::Grad& grad);
void sgd(const SoftMax::Grad& grad, const Real learningRate);
void save(std::ofstream& ofs);
void load(std::ifstream& ifs);
void operator += (const SoftMax& softmax);
void operator /= (const Real val);
//for ranking loss
int exception;
Real gamma, mPlus, mMinus;
void calcScore(const VecD& input, VecD& output);
Real calcRankingLoss(const VecD& output, const int label);
void backwardRankingLoss(const VecD& input, const VecD output, const int label, VecD& deltaFeature, SoftMax::Grad& grad);
};
class SoftMax::Grad{
public:
Grad(): gradHist(0){}
Grad(const SoftMax& softmax):
gradHist(0)
{
this->weight = MatD::Zero(softmax.weight.rows(), softmax.weight.cols());
this->bias = VecD::Zero(softmax.bias.rows());
}
SoftMax::Grad* gradHist;
MatD weight; VecD bias;
void init(){
this->weight.setZero();
this->bias.setZero();
}
Real norm(){
return this->weight.squaredNorm()+this->bias.squaredNorm();
}
void l2reg(const Real lambda, const SoftMax& s){
this->weight += lambda*s.weight;
}
void l2reg(const Real lambda, const SoftMax& s, const SoftMax& target){
this->weight += lambda*(s.weight-target.weight);
this->bias += lambda*(s.bias-target.bias);
}
void sgd(const Real learningRate, SoftMax& softmax){
Optimizer::sgd(this->weight, learningRate, softmax.weight);
Optimizer::sgd(this->bias, learningRate, softmax.bias);
}
void adagrad(const Real learningRate, SoftMax& softmax, const Real initVal = 1.0){
if (this->gradHist == 0){
this->gradHist = new SoftMax::Grad(softmax);
this->gradHist->weight.fill(initVal);
this->gradHist->bias.fill(initVal);
}
Optimizer::adagrad(this->weight, learningRate, this->gradHist->weight, softmax.weight);
Optimizer::adagrad(this->bias, learningRate, this->gradHist->bias, softmax.bias);
}
void momentum(const Real learningRate, const Real m, SoftMax& softmax){
if (this->gradHist == 0){
this->gradHist = new SoftMax::Grad(softmax);
this->gradHist->weight.fill(0.0);
this->gradHist->bias.fill(0.0);
}
Optimizer::momentum(this->weight, learningRate, m, this->gradHist->weight, softmax.weight);
Optimizer::momentum(this->bias, learningRate, m, this->gradHist->bias, softmax.bias);
}
void operator += (const SoftMax::Grad& grad){
this->weight += grad.weight;
this->bias += grad.bias;
}
void operator /= (const Real val){
this->weight /= val;
this->bias /= val;
}
};