Lenet5/subsampling.cpp at main · aakritipp/Lenet5 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#include "subsampling.h"

using namespace std;
subsampling::subsampling() {
}

subsampling::subsampling(int kernel_size, int stride, int image_size, int num_feature_maps) {
#ifdef USE_CUDA
    gpuImplementation = std::make_unique<SubsamplingGPU>(kernel_size, stride, image_size, num_feature_maps);
    output_image_size = gpuImplementation->getOutputSize();
#else
    this->kernel_size = kernel_size;
    this->stride = stride;
    this->image_size = image_size;
    this->num_feature_maps = num_feature_maps;
    this->inputHeight = image_size;
    this->inputWidth = image_size;
    this->pooledHeight = (image_size - kernel_size) / stride + 1;
    this->pooledWidth = (image_size - kernel_size) / stride + 1;
    this->output_image_size = pooledHeight;
#endif
}
std::vector<std::vector<float>> subsampling::average_pooling(const vector<vector<float>>& inputBatch) {
    #ifdef USE_CUDA
    return gpuImplementation->forward(inputBatch);
    #else
    inputDataBatch = inputBatch;
    size_t batch_size = inputBatch.size();

    int featureSize = (image_size) * (image_size); // Size of one feature map
    int featureHeight = image_size;
    int featureWidth = image_size;

    int pooled_ht = (featureHeight - kernel_size) / stride + 1;
    int pooled_wdth = (featureWidth - kernel_size) / stride + 1;
    int pooledFeatureSize = pooled_ht * pooled_wdth;
    output_image_size = pooled_ht;
    int totalOutputSize = num_feature_maps * pooledFeatureSize;

    std::vector<std::vector<float>> output(batch_size, std::vector<float>(totalOutputSize, 0.0f));

    #pragma omp parallel for
    for (int image_idx = 0; image_idx < batch_size; ++image_idx) {
        const std::vector<float>& image = inputBatch[image_idx];
        std::vector<float> pooled_image(totalOutputSize, 0.0f);

        // Loop over each feature map
        for (int feature = 0; feature < num_feature_maps; ++feature) {
            int featureStartIndex = feature * featureSize;
            std::vector<float> featureMap(image.begin() + featureStartIndex, image.begin() + featureStartIndex + featureSize);

            // Perform average pooling on the feature map
            for (int i = 0; i < pooled_ht; ++i) {
                for (int j = 0; j < pooled_wdth; ++j) {
                    float sum = 0.0f;
                    for (int m = 0; m < kernel_size; ++m) {
                        for (int n = 0; n < kernel_size; ++n) {
                            int rowIndex = i * stride + m;
                            int colIndex = j * stride + n;
                            int index = rowIndex * featureWidth + colIndex;
                            sum += featureMap[index];
                        }
                    }
                    int pooledIndex = feature * pooledFeatureSize + i * pooled_wdth + j;
                    pooled_image[pooledIndex] = sum / (kernel_size * kernel_size);
                }
            }
        }
        output[image_idx] = pooled_image;
    }
    return output;
    #endif
}


std::vector<std::vector<float>> subsampling::backward(const std::vector<std::vector<float>>& gradOutputBatch) {
    size_t batchSize = gradOutputBatch.size();
    size_t totalInputSize = inputDataBatch[0].size();
    #ifdef USE_CUDA
    return gpuImplementation->backward(gradOutputBatch);
    #else
    // Initialize gradInputBatch with zeros
    std::vector<std::vector<float>> gradInputBatch(batchSize, std::vector<float>(totalInputSize, 0.0f));

    // Perform backpropagation
    for (size_t image_idx = 0; image_idx < batchSize; ++image_idx) {
        const std::vector<float>& gradOutputFlat = gradOutputBatch[image_idx];
        std::vector<float>& gradInputFlat = gradInputBatch[image_idx];

        // Loop over each feature map
        for (int feature = 0; feature < num_feature_maps; ++feature) {
            int featureInputStartIdx = feature * inputHeight * inputWidth;
            int featureOutputStartIdx = feature * pooledHeight * pooledWidth;

            // Loop over pooled feature map dimensions
            for (int ph = 0; ph < pooledHeight; ++ph) {
                for (int pw = 0; pw < pooledWidth; ++pw) {
                    int outputIdx = featureOutputStartIdx + ph * pooledWidth + pw;
                    float gradOutputValue = gradOutputFlat[outputIdx];

                    // Distribute gradient equally to each input in the pooling window
                    float gradInputValue = gradOutputValue / (kernel_size * kernel_size);

                    // Loop over the pooling window
                    for (int kh = 0; kh < kernel_size; ++kh) {
                        for (int kw = 0; kw < kernel_size; ++kw) {
                            int h_in = ph * stride + kh;
                            int w_in = pw * stride + kw;
                            int inputIdx = featureInputStartIdx + h_in * inputWidth + w_in;

                            // Accumulate gradients
                            gradInputFlat[inputIdx] += gradInputValue;
                        }
                    }
                }
            }
        }
    }
    return gradInputBatch;
    #endif
}