forked from prajjwalmehta123/Lenet5
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubsampling.cpp
More file actions
123 lines (105 loc) · 5 KB
/
subsampling.cpp
File metadata and controls
123 lines (105 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include "subsampling.h"
using namespace std;
subsampling::subsampling() {
}
subsampling::subsampling(int kernel_size, int stride, int image_size, int num_feature_maps) {
#ifdef USE_CUDA
gpuImplementation = std::make_unique<SubsamplingGPU>(kernel_size, stride, image_size, num_feature_maps);
output_image_size = gpuImplementation->getOutputSize();
#else
this->kernel_size = kernel_size;
this->stride = stride;
this->image_size = image_size;
this->num_feature_maps = num_feature_maps;
this->inputHeight = image_size;
this->inputWidth = image_size;
this->pooledHeight = (image_size - kernel_size) / stride + 1;
this->pooledWidth = (image_size - kernel_size) / stride + 1;
this->output_image_size = pooledHeight;
#endif
}
std::vector<std::vector<float>> subsampling::average_pooling(const vector<vector<float>>& inputBatch) {
#ifdef USE_CUDA
return gpuImplementation->forward(inputBatch);
#else
inputDataBatch = inputBatch;
size_t batch_size = inputBatch.size();
int featureSize = (image_size) * (image_size); // Size of one feature map
int featureHeight = image_size;
int featureWidth = image_size;
int pooled_ht = (featureHeight - kernel_size) / stride + 1;
int pooled_wdth = (featureWidth - kernel_size) / stride + 1;
int pooledFeatureSize = pooled_ht * pooled_wdth;
output_image_size = pooled_ht;
int totalOutputSize = num_feature_maps * pooledFeatureSize;
std::vector<std::vector<float>> output(batch_size, std::vector<float>(totalOutputSize, 0.0f));
#pragma omp parallel for
for (int image_idx = 0; image_idx < batch_size; ++image_idx) {
const std::vector<float>& image = inputBatch[image_idx];
std::vector<float> pooled_image(totalOutputSize, 0.0f);
// Loop over each feature map
for (int feature = 0; feature < num_feature_maps; ++feature) {
int featureStartIndex = feature * featureSize;
std::vector<float> featureMap(image.begin() + featureStartIndex, image.begin() + featureStartIndex + featureSize);
// Perform average pooling on the feature map
for (int i = 0; i < pooled_ht; ++i) {
for (int j = 0; j < pooled_wdth; ++j) {
float sum = 0.0f;
for (int m = 0; m < kernel_size; ++m) {
for (int n = 0; n < kernel_size; ++n) {
int rowIndex = i * stride + m;
int colIndex = j * stride + n;
int index = rowIndex * featureWidth + colIndex;
sum += featureMap[index];
}
}
int pooledIndex = feature * pooledFeatureSize + i * pooled_wdth + j;
pooled_image[pooledIndex] = sum / (kernel_size * kernel_size);
}
}
}
output[image_idx] = pooled_image;
}
return output;
#endif
}
std::vector<std::vector<float>> subsampling::backward(const std::vector<std::vector<float>>& gradOutputBatch) {
size_t batchSize = gradOutputBatch.size();
size_t totalInputSize = inputDataBatch[0].size();
#ifdef USE_CUDA
return gpuImplementation->backward(gradOutputBatch);
#else
// Initialize gradInputBatch with zeros
std::vector<std::vector<float>> gradInputBatch(batchSize, std::vector<float>(totalInputSize, 0.0f));
// Perform backpropagation
for (size_t image_idx = 0; image_idx < batchSize; ++image_idx) {
const std::vector<float>& gradOutputFlat = gradOutputBatch[image_idx];
std::vector<float>& gradInputFlat = gradInputBatch[image_idx];
// Loop over each feature map
for (int feature = 0; feature < num_feature_maps; ++feature) {
int featureInputStartIdx = feature * inputHeight * inputWidth;
int featureOutputStartIdx = feature * pooledHeight * pooledWidth;
// Loop over pooled feature map dimensions
for (int ph = 0; ph < pooledHeight; ++ph) {
for (int pw = 0; pw < pooledWidth; ++pw) {
int outputIdx = featureOutputStartIdx + ph * pooledWidth + pw;
float gradOutputValue = gradOutputFlat[outputIdx];
// Distribute gradient equally to each input in the pooling window
float gradInputValue = gradOutputValue / (kernel_size * kernel_size);
// Loop over the pooling window
for (int kh = 0; kh < kernel_size; ++kh) {
for (int kw = 0; kw < kernel_size; ++kw) {
int h_in = ph * stride + kh;
int w_in = pw * stride + kw;
int inputIdx = featureInputStartIdx + h_in * inputWidth + w_in;
// Accumulate gradients
gradInputFlat[inputIdx] += gradInputValue;
}
}
}
}
}
}
return gradInputBatch;
#endif
}