From fd05441c718f2f6716517547fd0d889b9cf79b0a Mon Sep 17 00:00:00 2001 From: 0xSooki <0xsooki@gmail.com> Date: Thu, 5 Jun 2025 11:05:08 +0200 Subject: [PATCH 1/5] add OpenMP Parallel Implementation for Belief Propagation Decoder --- cpp_test/TestBPDecoder.cpp | 168 ++++++------ src_cpp/bp.hpp | 281 ++++++++++----------- src_python/ldpc/bp_decoder/__init__.pyi | 20 ++ src_python/ldpc/bp_decoder/_bp_decoder.pxd | 1 + src_python/ldpc/bp_decoder/_bp_decoder.pyx | 66 ++++- 5 files changed, 304 insertions(+), 232 deletions(-) diff --git a/cpp_test/TestBPDecoder.cpp b/cpp_test/TestBPDecoder.cpp index 13ce6f49..ffd0dfbb 100644 --- a/cpp_test/TestBPDecoder.cpp +++ b/cpp_test/TestBPDecoder.cpp @@ -4,6 +4,7 @@ #include "gf2codes.hpp" using namespace std; +using namespace std::chrono; TEST(BpEntry, init) { auto e = ldpc::bp::BpEntry(); @@ -15,7 +16,6 @@ TEST(BpEntry, init) { } TEST(BpSparse, init) { - int n = 3; auto pcm = ldpc::bp::BpSparse(n - 1, n); for (int i = 0; i < (n - 1); i++) { @@ -74,8 +74,7 @@ TEST(BpDecoderTest, InitializationWithOptionalParametersTest) { int random_schedule = 0; // Initialize decoder using input arguments and optional parameters - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, bp_method, bp_schedule, - min_sum_scaling_factor, omp_threads, serial_schedule, random_schedule); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, bp_method, bp_schedule, min_sum_scaling_factor, omp_threads, serial_schedule, random_schedule); // Check if member variables are set correctly EXPECT_TRUE(pcm == decoder.pcm); @@ -112,15 +111,13 @@ TEST(BpDecoderTest, InitialiseLogDomainBpTest) { for (int i = 0; i < decoder.bit_count; i++) { EXPECT_EQ(log((1 - channel_probabilities[i]) / channel_probabilities[i]), decoder.initial_log_prob_ratios[i]); - for (auto &e: decoder.pcm.iterate_column(i)) { + for (auto& e : decoder.pcm.iterate_column(i)) { EXPECT_EQ(decoder.initial_log_prob_ratios[i], e.bit_to_check_msg); } } } - TEST(BpDecoder, product_sum_parallel) { - int n = 3; auto pcm = ldpc::bp::BpSparse(n - 1, n); for (int i = 0; i < (n - 1); i++) { @@ -131,8 +128,7 @@ TEST(BpDecoder, product_sum_parallel) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, - ldpc::bp::PARALLEL, 79879879); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, ldpc::bp::PARALLEL, 79879879); // Check if member variables are set correctly EXPECT_TRUE(pcm == decoder.pcm); @@ -155,12 +151,11 @@ TEST(BpDecoder, product_sum_parallel) { {0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ASSERT_EQ(expected_decoding[count], decoding); count++; } - } TEST(BpDecoder, ProdSumParallel_RepetitionCode5) { @@ -174,8 +169,7 @@ TEST(BpDecoder, ProdSumParallel_RepetitionCode5) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, - ldpc::bp::PARALLEL, 4324234); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, ldpc::bp::PARALLEL, 4324234); auto syndromes = vector>{{0, 0, 0, 0}, {0, 0, 0, 1}, @@ -189,14 +183,13 @@ TEST(BpDecoder, ProdSumParallel_RepetitionCode5) { {0, 1, 0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ASSERT_EQ(expected_decoding[count], decoding); count++; } } - TEST(BpDecoder, MinSum_RepetitionCode5) { int n = 5; auto pcm = ldpc::bp::BpSparse(n - 1, n); @@ -208,8 +201,7 @@ TEST(BpDecoder, MinSum_RepetitionCode5) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, - ldpc::bp::PARALLEL, 1); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 1); auto syndromes = vector>{{0, 0, 0, 0}, {0, 0, 0, 1}, @@ -223,14 +215,13 @@ TEST(BpDecoder, MinSum_RepetitionCode5) { {0, 1, 0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ASSERT_EQ(expected_decoding[count], decoding); count++; } } - TEST(BpDecoder, ProdSumSerial_RepetitionCode5) { int n = 5; auto pcm = ldpc::bp::BpSparse(n - 1, n); @@ -242,8 +233,7 @@ TEST(BpDecoder, ProdSumSerial_RepetitionCode5) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, - ldpc::bp::SERIAL, 4324234); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, ldpc::bp::SERIAL, 4324234); auto syndromes = vector>{{0, 0, 0, 0}, {0, 0, 0, 1}, @@ -257,14 +247,13 @@ TEST(BpDecoder, ProdSumSerial_RepetitionCode5) { {0, 1, 0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ASSERT_EQ(expected_decoding[count], decoding); count++; } } - TEST(BpDecoder, MinSum_Serial_RepetitionCode5) { int n = 5; auto pcm = ldpc::bp::BpSparse(n - 1, n); @@ -276,8 +265,7 @@ TEST(BpDecoder, MinSum_Serial_RepetitionCode5) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, - ldpc::bp::SERIAL, 1); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 1); auto syndromes = vector>{{0, 0, 0, 0}, {0, 0, 0, 1}, @@ -291,7 +279,7 @@ TEST(BpDecoder, MinSum_Serial_RepetitionCode5) { {0, 1, 0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ASSERT_EQ(expected_decoding[count], decoding); count++; @@ -299,7 +287,6 @@ TEST(BpDecoder, MinSum_Serial_RepetitionCode5) { } TEST(BpDecoder, min_sum_parallel) { - int n = 3; auto pcm = ldpc::bp::BpSparse(n - 1, n); for (int i = 0; i < (n - 1); i++) { @@ -310,8 +297,7 @@ TEST(BpDecoder, min_sum_parallel) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, - ldpc::bp::PARALLEL, 0.625); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625); // Check if member variables are set correctly EXPECT_TRUE(pcm == decoder.pcm); @@ -334,17 +320,15 @@ TEST(BpDecoder, min_sum_parallel) { {0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.decode(syndrome); ldpc::sparse_matrix_util::print_vector(decoder.log_prob_ratios); ASSERT_EQ(expected_decoding[count], decoding); count++; } - } TEST(BpDecoder, min_sum_single_scan) { - int n = 3; auto pcm = ldpc::bp::BpSparse(n - 1, n); for (int i = 0; i < (n - 1); i++) { @@ -355,8 +339,7 @@ TEST(BpDecoder, min_sum_single_scan) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, - ldpc::bp::PARALLEL, 0.625); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625); // Check if member variables are set correctly EXPECT_TRUE(pcm == decoder.pcm); @@ -379,13 +362,12 @@ TEST(BpDecoder, min_sum_single_scan) { {0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.bp_decode_single_scan(syndrome); ldpc::sparse_matrix_util::print_vector(decoder.log_prob_ratios); ASSERT_EQ(expected_decoding[count], decoding); count++; } - } TEST(BpDecoder, min_sum_relative_schedule) { @@ -399,8 +381,7 @@ TEST(BpDecoder, min_sum_relative_schedule) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, - ldpc::bp::SERIAL_RELATIVE, 0.625); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, 0.625); // Check if member variables are set correctly EXPECT_TRUE(pcm == decoder.pcm); @@ -422,7 +403,7 @@ TEST(BpDecoder, min_sum_relative_schedule) { {1, 0, 0}, {0, 1, 0}}; auto count = 0; - for (auto syndrome: syndromes) { + for (auto syndrome : syndromes) { auto decoding = decoder.bp_decode_serial(syndrome); ldpc::sparse_matrix_util::print_vector(decoder.log_prob_ratios); ASSERT_EQ(expected_decoding[count], decoding); @@ -433,79 +414,59 @@ TEST(BpDecoder, min_sum_relative_schedule) { TEST(BpDecoder, random_schedule_seed) { { auto pcm = ldpc::gf2codes::rep_code(4); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, - 100, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, - 0.625, 1, vector{2, 3, 1, 0}, - 1234); + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, 100, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625, 1, vector{2, 3, 1, 0}, 1234); auto expected_order = vector{2, 3, 1, 0}; ASSERT_EQ(bpd.random_schedule_seed, -1); ASSERT_EQ(expected_order, bpd.serial_schedule_order); - } { auto pcm = ldpc::gf2codes::rep_code(4); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, 100, ldpc::bp::MINIMUM_SUM, - ldpc::bp::SERIAL, 0.625, 1, ldpc::bp::NULL_INT_VECTOR, 0); + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, 100, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625, 1, ldpc::bp::NULL_INT_VECTOR, 0); auto expected_order = vector{0, 1, 2, 3}; ASSERT_EQ(bpd.random_schedule_seed, 0); ASSERT_EQ(expected_order, bpd.serial_schedule_order); - } { auto pcm = ldpc::gf2codes::rep_code(4); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, - 100, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, - 0.625, 1, ldpc::bp::NULL_INT_VECTOR, 4); + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.2, 0.3, 0.4}, 100, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625, 1, ldpc::bp::NULL_INT_VECTOR, 4); ASSERT_EQ(bpd.random_schedule_seed, 4); } - } TEST(BpDecoder, relative_serial_schedule_order) { - { // should order correctly + { // should order correctly auto pcm = ldpc::gf2codes::rep_code(3); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.2, 0.3, 0.1}, - 1, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, - 0.625, 1, ldpc::bp::NULL_INT_VECTOR, - -1); // todo what should be default here? + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.2, 0.3, 0.1}, 1, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, 0.625, 1, ldpc::bp::NULL_INT_VECTOR, + -1); // todo what should be default here? auto dummy_syndr = std::vector{0, 0}; bpd.decode(dummy_syndr); auto expected_order = vector{2, 0, 1}; ASSERT_EQ(bpd.random_schedule_seed, -1); ASSERT_EQ(expected_order, bpd.serial_schedule_order); - } - {// should overwrite initial schedule + { // should overwrite initial schedule auto pcm = ldpc::gf2codes::rep_code(3); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.3, 0.2, 0.1}, 1, - ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, - 0.625, 1, vector{0, 1, 2}, - -1); + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.3, 0.2, 0.1}, 1, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, 0.625, 1, vector{0, 1, 2}, -1); auto dummy_syndr = std::vector{0, 0}; bpd.decode(dummy_syndr); auto expected_order = vector{2, 1, 0}; ASSERT_EQ(bpd.random_schedule_seed, -1); ASSERT_EQ(expected_order, bpd.serial_schedule_order); - } - {// should order according to LLRs + { // should order according to LLRs auto pcm = ldpc::gf2codes::rep_code(3); - auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.01, 0.01}, 2, - ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, - 0.625, 1, vector{0, 1, 2}, - -1); + auto bpd = ldpc::bp::BpDecoder(pcm, vector{0.1, 0.01, 0.01}, 2, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL_RELATIVE, 0.625, 1, vector{0, 1, 2}, -1); auto dummy_syndr = std::vector{1, 0}; bpd.decode(dummy_syndr); auto expected_order = vector{1, 2, 0}; ASSERT_EQ(bpd.random_schedule_seed, -1); ASSERT_EQ(expected_order, bpd.serial_schedule_order); - } } -//received vector decoding +// received vector decoding TEST(BpDecoder, ProdSumSerial_RepCode5) { int n = 5; auto pcm = ldpc::gf2codes::rep_code(n); @@ -513,26 +474,85 @@ TEST(BpDecoder, ProdSumSerial_RepCode5) { auto channel_probabilities = vector(pcm.n, 0.1); // Initialize decoder using input arguments - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, - ldpc::bp::SERIAL, 4324234, ldpc::bp::AUTO); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, maximum_iterations, ldpc::bp::PRODUCT_SUM, ldpc::bp::SERIAL, 4324234, ldpc::bp::AUTO); auto received_vectors = vector>{{0, 0, 0, 0, 1}, {0, 1, 1, 0, 0}, {1, 0, 0, 1, 1}}; auto expected_decoding = vector>{{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, - {1, 1, 1, 1, 1}}; // todo I'm not sure I understand these cases + {1, 1, 1, 1, 1}}; // todo I'm not sure I understand these cases auto count = 0; - for (auto received_vector: received_vectors) { + for (auto received_vector : received_vectors) { auto decoding = decoder.decode(received_vector); ASSERT_EQ(expected_decoding[count], decoding); count++; } } +TEST(BpDecoderParallel, ThreadScaling) { + int n = 1200; + int m = 600; + auto pcm = create_random_ldpc(m, n, 5); + auto channel_probabilities = vector(n, 0.04); + auto syndrome = create_random_syndrome(m, 0.08); + + std::vector thread_counts = {1, 2, 4, 8}; + std::vector times; + + for (int threads : thread_counts) { + auto decoder_serial = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); + + auto start = high_resolution_clock::now(); + auto result = decoder.decode(syndrome); + auto end = high_resolution_clock::now(); + auto duration = duration_cast(end - start); + times.push_back(duration.count()); + std::cout << "Threads: " << threads << ", Time: " << duration.count() << " μs\n"; + + start = high_resolution_clock::now(); + auto result2 = decoder_serial.decode(syndrome); + end = high_resolution_clock::now(); + duration = duration_cast(end - start); + std::cout << "Serial Time: " << duration.count() << " μs\n"; + EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; + } +} + +ldpc::bp::BpSparse create_random_ldpc(int m, int n, int avg_row_weight = 4) { + auto pcm = ldpc::bp::BpSparse(m, n); + std::uniform_int_distribution col_dist(0, n - 1); + std::mt19937 rng; + rng.seed(42); + for (int i = 0; i < m; i++) { + std::set selected_cols; + while (selected_cols.size() < avg_row_weight) { + int col = col_dist(rng); + if (selected_cols.find(col) == selected_cols.end()) { + selected_cols.insert(col); + pcm.insert_entry(i, col); + } + } + } + return pcm; +} + +std::vector create_random_syndrome(int m, double error_prob = 0.1) { + std::mt19937 rng; + rng.seed(42); + + std::vector syndrome(m); + std::bernoulli_distribution error_dist(error_prob); + + for (int i = 0; i < m; i++) { + syndrome[i] = error_dist(rng) ? 1 : 0; + } + return syndrome; +} -int main(int argc, char **argv) { +int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } \ No newline at end of file diff --git a/src_cpp/bp.hpp b/src_cpp/bp.hpp index e5561c06..e4ab81c5 100644 --- a/src_cpp/bp.hpp +++ b/src_cpp/bp.hpp @@ -9,7 +9,7 @@ #include #include #include -#include // required for std::runtime_error +#include // required for std::runtime_error #include #include "math.h" @@ -40,7 +40,7 @@ namespace ldpc { const std::vector NULL_INT_VECTOR = {}; class BpEntry : public ldpc::sparse_matrix_base::EntryBase { - public: + public: double bit_to_check_msg = 0.0; double check_to_bit_msg = 0.0; @@ -50,8 +50,8 @@ namespace ldpc { class BpDecoder { // TODO properties should be private and only accessible via getters and setters - public: - BpSparse &pcm; + public: + BpSparse& pcm; std::vector channel_probabilities; int check_count; int bit_count; @@ -75,23 +75,19 @@ namespace ldpc { ldpc::rng::RandomListShuffle rng_list_shuffle; BpDecoder( - BpSparse &parity_check_matrix, - std::vector channel_probabilities, - int maximum_iterations = 0, - BpMethod bp_method = PRODUCT_SUM, - BpSchedule schedule = PARALLEL, - double min_sum_scaling_factor = 0.625, - int omp_threads = 1, - const std::vector &serial_schedule = NULL_INT_VECTOR, - int random_schedule_seed = -1, // TODO what should be default here? 0 is set but -1 is checked in decode method? - bool random_schedule_at_every_iteration = true, - BpInputType bp_input_type = AUTO) : - pcm(parity_check_matrix), channel_probabilities(std::move(channel_probabilities)), - check_count(pcm.m), bit_count(pcm.n), maximum_iterations(maximum_iterations), bp_method(bp_method), - schedule(schedule), ms_scaling_factor(min_sum_scaling_factor), - iterations(0) //the parity check matrix is passed in by reference + BpSparse& parity_check_matrix, + std::vector channel_probabilities, + int maximum_iterations = 0, + BpMethod bp_method = PRODUCT_SUM, + BpSchedule schedule = PARALLEL, + double min_sum_scaling_factor = 0.625, + int omp_threads = 1, + const std::vector& serial_schedule = NULL_INT_VECTOR, + int random_schedule_seed = -1, // TODO what should be default here? 0 is set but -1 is checked in decode method? + bool random_schedule_at_every_iteration = true, + BpInputType bp_input_type = AUTO) + : pcm(parity_check_matrix), channel_probabilities(std::move(channel_probabilities)), check_count(pcm.m), bit_count(pcm.n), maximum_iterations(maximum_iterations), bp_method(bp_method), schedule(schedule), ms_scaling_factor(min_sum_scaling_factor), iterations(0) // the parity check matrix is passed in by reference { - this->initial_log_prob_ratios.resize(bit_count); this->log_prob_ratios.resize(bit_count); this->candidate_syndrome.resize(check_count); @@ -102,10 +98,9 @@ namespace ldpc { this->random_schedule_at_every_iteration = random_schedule_at_every_iteration; this->bp_input_type = bp_input_type; - if (this->channel_probabilities.size() != this->bit_count) { throw std::runtime_error( - "Channel probabilities vector must have length equal to the number of bits"); + "Channel probabilities vector must have length equal to the number of bits"); } if (serial_schedule != NULL_INT_VECTOR) { this->serial_schedule_order = serial_schedule; @@ -118,9 +113,9 @@ namespace ldpc { this->rng_list_shuffle.seed(this->random_schedule_seed); } - //Initialise OMP thread pool - // this->omp_thread_count = omp_threads; - // this->set_omp_thread_count(this->omp_thread_count); + // Initialise OMP thread pool + // this->omp_thread_count = omp_threads; + // this->set_omp_thread_count(this->omp_thread_count); } ~BpDecoder() = default; @@ -135,17 +130,15 @@ namespace ldpc { // initialise BP for (int i = 0; i < this->bit_count; i++) { this->initial_log_prob_ratios[i] = std::log( - (1 - this->channel_probabilities[i]) / this->channel_probabilities[i]); + (1 - this->channel_probabilities[i]) / this->channel_probabilities[i]); - for (auto &e: this->pcm.iterate_column(i)) { + for (auto& e : this->pcm.iterate_column(i)) { e.bit_to_check_msg = this->initial_log_prob_ratios[i]; } } } - std::vector decode(std::vector &input_vector) { - - + std::vector decode(std::vector& input_vector) { if ((this->bp_input_type == AUTO && input_vector.size() == this->bit_count) || this->bp_input_type == RECEIVED_VECTOR) { auto syndrome = pcm.mulvec(input_vector); @@ -163,128 +156,128 @@ namespace ldpc { } return this->decoding; - } - if (schedule == PARALLEL) { return bp_decode_parallel(input_vector); } if (schedule == SERIAL || schedule == SERIAL_RELATIVE) { return bp_decode_serial(input_vector); - } else { throw std::runtime_error("Invalid BP schedule"); } - + } else { + throw std::runtime_error("Invalid BP schedule"); + } } - std::vector &bp_decode_parallel(std::vector &syndrome) { - + std::vector& bp_decode_parallel(std::vector& syndrome) { this->converge = 0; this->initialise_log_domain_bp(); - //main interation loop + // main interation loop for (int it = 1; it <= this->maximum_iterations; it++) { - if (this->bp_method == PRODUCT_SUM) { +#pragma omp parallel for num_threads(this->omp_thread_count) for (int i = 0; i < this->check_count; i++) { - this->candidate_syndrome[i] = 0; - double temp = 1.0; - for (auto &e: this->pcm.iterate_row(i)) { + for (auto& e : this->pcm.iterate_row(i)) { e.check_to_bit_msg = temp; temp *= std::tanh(e.bit_to_check_msg / 2); } temp = 1; - for (auto &e: this->pcm.reverse_iterate_row(i)) { + for (auto& e : this->pcm.reverse_iterate_row(i)) { e.check_to_bit_msg *= temp; - int message_sign = syndrome[i] != 0u ? -1.0 : 1.0; - e.check_to_bit_msg = - message_sign * std::log((1 + e.check_to_bit_msg) / (1 - e.check_to_bit_msg)); + int const message_sign = syndrome[i] != 0U ? -1.0 + : 1.0; + e.check_to_bit_msg = message_sign * std::log((1 + e.check_to_bit_msg) / + (1 - e.check_to_bit_msg)); temp *= std::tanh(e.bit_to_check_msg / 2); } } } else if (this->bp_method == MINIMUM_SUM) { - - double alpha; - if(this->ms_scaling_factor == 0.0) { - alpha = 1.0 - std::pow(2.0, -1.0*it); - } - else { + double alpha = NAN; + if (this->ms_scaling_factor == 0.0) { + alpha = 1.0 - std::pow(2.0, -1.0 * it); + } else { alpha = this->ms_scaling_factor; } - //check to bit updates +// check to bit updates +#pragma omp parallel for num_threads(this->omp_thread_count) for (int i = 0; i < check_count; i++) { - - this->candidate_syndrome[i] = 0; int total_sgn = 0; int sgn = 0; total_sgn = syndrome[i]; double temp = std::numeric_limits::max(); - for (auto &e: this->pcm.iterate_row(i)) { + for (auto& e : this->pcm.iterate_row(i)) { if (e.bit_to_check_msg <= 0) { total_sgn += 1; } e.check_to_bit_msg = temp; - double abs_bit_to_check_msg = std::abs(e.bit_to_check_msg); - if (abs_bit_to_check_msg < temp) { - temp = abs_bit_to_check_msg; - } + double const abs_bit_to_check_msg = + std::abs(e.bit_to_check_msg); + temp = std::min(abs_bit_to_check_msg, temp); } temp = std::numeric_limits::max(); - for (auto &e: this->pcm.reverse_iterate_row(i)) { + for (auto& e : this->pcm.reverse_iterate_row(i)) { sgn = total_sgn; if (e.bit_to_check_msg <= 0) { sgn += 1; } - if (temp < e.check_to_bit_msg) { - e.check_to_bit_msg = temp; - } + e.check_to_bit_msg = std::min(temp, + e.check_to_bit_msg); - int message_sign = (sgn % 2 == 0) ? 1.0 : -1.0; - - e.check_to_bit_msg *= message_sign * alpha; + int const message_sign = (sgn % 2 == 0) ? 1.0 : -1.0; - - double abs_bit_to_check_msg = std::abs(e.bit_to_check_msg); - if (abs_bit_to_check_msg < temp) { - temp = abs_bit_to_check_msg; - } + e.check_to_bit_msg *= message_sign * alpha; + double const abs_bit_to_check_msg = + std::abs(e.bit_to_check_msg); + temp = std::min(abs_bit_to_check_msg, temp); } - } } + std::fill(this->candidate_syndrome.begin(), + this->candidate_syndrome.end(), + 0); - //compute log probability ratios - for (int i = 0; i < this->bit_count; i++) { - double temp = initial_log_prob_ratios[i]; - for (auto &e: this->pcm.iterate_column(i)) { - e.bit_to_check_msg = temp; - temp += e.check_to_bit_msg; - // if(isnan(temp)) temp = e.bit_to_check_msg; +#pragma omp parallel num_threads(this->omp_thread_count) + { + std::vector local_syndrome(this->check_count, 0); +#pragma omp for + for (int i = 0; i < this->bit_count; i++) { + double temp = initial_log_prob_ratios[i]; + for (auto& e : this->pcm.iterate_column(i)) { + e.bit_to_check_msg = temp; + temp += e.check_to_bit_msg; + } + this->log_prob_ratios[i] = temp; + if (temp <= 0) { + this->decoding[i] = 1; + for (auto& e : this->pcm.iterate_column(i)) { + local_syndrome[e.row_index] ^= 1; + } + } else { + this->decoding[i] = 0; + } } - //make hard decision on basis of log probability ratio for bit i - this->log_prob_ratios[i] = temp; - // if(isnan(log_prob_ratios[i])) log_prob_ratios[i] = initial_log_prob_ratios[i]; - if (temp <= 0) { - this->decoding[i] = 1; - for (auto &e: this->pcm.iterate_column(i)) { - this->candidate_syndrome[e.row_index] ^= 1; +#pragma omp critical + { + for (int i = 0; i < this->check_count; i++) { + this->candidate_syndrome[i] ^= local_syndrome[i]; } - } else { - this->decoding[i] = 0; } } - if (std::equal(candidate_syndrome.begin(), candidate_syndrome.end(), syndrome.begin())) { + if (std::equal(candidate_syndrome.begin(), + candidate_syndrome.end(), + syndrome.begin())) { this->converge = true; } @@ -294,25 +287,21 @@ namespace ldpc { return this->decoding; } - - //compute bit to check update +// compute bit to check update +#pragma omp parallel for num_threads(this->omp_thread_count) for (int i = 0; i < bit_count; i++) { double temp = 0; - for (auto &e: this->pcm.reverse_iterate_column(i)) { + for (auto& e : this->pcm.reverse_iterate_column(i)) { e.bit_to_check_msg += temp; temp += e.check_to_bit_msg; } } - } - return this->decoding; - } - std::vector &bp_decode_single_scan(std::vector &syndrome) { - + std::vector& bp_decode_single_scan(std::vector& syndrome) { converge = 0; int CONVERGED = 0; @@ -321,16 +310,14 @@ namespace ldpc { for (int i = 0; i < bit_count; i++) { this->initial_log_prob_ratios[i] = std::log( - (1 - this->channel_probabilities[i]) / this->channel_probabilities[i]); + (1 - this->channel_probabilities[i]) / this->channel_probabilities[i]); this->log_prob_ratios[i] = this->initial_log_prob_ratios[i]; - } // initialise_log_domain_bp(); - //main interation loop + // main interation loop for (int it = 1; it <= maximum_iterations; it++) { - if (CONVERGED != 0) { continue; } @@ -343,9 +330,8 @@ namespace ldpc { this->log_prob_ratios = this->initial_log_prob_ratios; } - //check to bit updates + // check to bit updates for (int i = 0; i < check_count; i++) { - this->candidate_syndrome[i] = 0; int total_sgn = 0; @@ -355,7 +341,7 @@ namespace ldpc { double bit_to_check_msg = NAN; - for (auto &e: pcm.iterate_row(i)) { + for (auto& e : pcm.iterate_row(i)) { if (it == 1) { e.check_to_bit_msg = 0; } @@ -371,7 +357,7 @@ namespace ldpc { } temp = std::numeric_limits::max(); - for (auto &e: pcm.reverse_iterate_row(i)) { + for (auto& e : pcm.reverse_iterate_row(i)) { sgn = total_sgn; if (it == 1) { e.check_to_bit_msg = 0; @@ -388,24 +374,18 @@ namespace ldpc { e.check_to_bit_msg = message_sign * ms_scaling_factor * e.bit_to_check_msg; this->log_prob_ratios[e.col_index] += e.check_to_bit_msg; - double abs_bit_to_check_msg = std::abs(bit_to_check_msg); if (abs_bit_to_check_msg < temp) { temp = abs_bit_to_check_msg; } - } - - } - - - //compute hard decisions and calculate syndrome + // compute hard decisions and calculate syndrome for (int i = 0; i < bit_count; i++) { if (this->log_prob_ratios[i] <= 0) { this->decoding[i] = 1; - for (auto &e: pcm.iterate_column(i)) { + for (auto& e : pcm.iterate_column(i)) { this->candidate_syndrome[e.row_index] ^= 1; } } else { @@ -426,28 +406,23 @@ namespace ldpc { converge = (CONVERGED != 0); return decoding; } - } - converge = (CONVERGED != 0); return decoding; - } - std::vector &bp_decode_serial(std::vector &syndrome) { + std::vector& bp_decode_serial(std::vector& syndrome) { int check_index = 0; this->converge = false; // initialise BP this->initialise_log_domain_bp(); for (int it = 1; it <= maximum_iterations; it++) { - double alpha; - if(this->ms_scaling_factor == 0.0) { - alpha = 1.0 - std::pow(2.0, -1.0*it); - } - else { + if (this->ms_scaling_factor == 0.0) { + alpha = 1.0 - std::pow(2.0, -1.0 * it); + } else { alpha = this->ms_scaling_factor; } @@ -455,28 +430,27 @@ namespace ldpc { this->rng_list_shuffle.shuffle(this->serial_schedule_order); } else if (this->schedule == BpSchedule::SERIAL_RELATIVE) { // resort by LLRs in each iteration to ensure that the most reliable bits are considered first - std::sort(this->serial_schedule_order.begin(), this->serial_schedule_order.end(), - [this, it](int bit1, int bit2) { - if (it != 1) { - return this->log_prob_ratios[bit1] > this->log_prob_ratios[bit2]; - } else { - return std::log( - (1 - channel_probabilities[bit1]) / channel_probabilities[bit1]) > - std::log((1 - channel_probabilities[bit2]) / - channel_probabilities[bit2]); - } - }); + std::sort(this->serial_schedule_order.begin(), this->serial_schedule_order.end(), [this, it](int bit1, int bit2) { + if (it != 1) { + return this->log_prob_ratios[bit1] > this->log_prob_ratios[bit2]; + } else { + return std::log( + (1 - channel_probabilities[bit1]) / channel_probabilities[bit1]) > + std::log((1 - channel_probabilities[bit2]) / + channel_probabilities[bit2]); + } + }); } - for (int bit_index: this->serial_schedule_order) { + for (int bit_index : this->serial_schedule_order) { double temp = NAN; this->log_prob_ratios[bit_index] = std::log( - (1 - channel_probabilities[bit_index]) / channel_probabilities[bit_index]); + (1 - channel_probabilities[bit_index]) / channel_probabilities[bit_index]); if (this->bp_method == 0) { - for (auto &e: this->pcm.iterate_column(bit_index)) { + for (auto& e : this->pcm.iterate_column(bit_index)) { check_index = e.row_index; e.check_to_bit_msg = 1.0; - for (auto &g: this->pcm.iterate_row(check_index)) { + for (auto& g : this->pcm.iterate_row(check_index)) { if (&g != &e) { e.check_to_bit_msg *= tanh(g.bit_to_check_msg / 2); } @@ -487,11 +461,11 @@ namespace ldpc { this->log_prob_ratios[bit_index] += e.check_to_bit_msg; } } else if (this->bp_method == 1) { - for (auto &e: pcm.iterate_column(bit_index)) { + for (auto& e : pcm.iterate_column(bit_index)) { check_index = e.row_index; int sgn = syndrome[check_index]; temp = std::numeric_limits::max(); - for (auto &g: this->pcm.iterate_row(check_index)) { + for (auto& g : this->pcm.iterate_row(check_index)) { if (&g != &e) { double abs_bit_to_check_msg = std::abs(g.bit_to_check_msg); if (abs_bit_to_check_msg < temp) { @@ -514,7 +488,7 @@ namespace ldpc { this->decoding[bit_index] = 0; } temp = 0; - for (auto &e: this->pcm.reverse_iterate_column(bit_index)) { + for (auto& e : this->pcm.reverse_iterate_column(bit_index)) { e.bit_to_check_msg += temp; temp += e.check_to_bit_msg; } @@ -531,8 +505,8 @@ namespace ldpc { return this->decoding; } - std::vector & - soft_info_decode_serial(std::vector &soft_info_syndrome, double cutoff, double sigma) { + std::vector& + soft_info_decode_serial(std::vector& soft_info_syndrome, double cutoff, double sigma) { // compute the syndrome log-likelihoods and initialize hard syndrome std::vector syndrome; this->soft_syndrome = soft_info_syndrome; @@ -559,21 +533,20 @@ namespace ldpc { } if (this->random_schedule_at_every_iteration && omp_thread_count == 1) { // reorder schedule elements randomly - shuffle(serial_schedule_order.begin(), serial_schedule_order.end(), - std::default_random_engine(random_schedule_seed)); + shuffle(serial_schedule_order.begin(), serial_schedule_order.end(), std::default_random_engine(random_schedule_seed)); } check_indices_updated.clear(); - for (auto bit_index: serial_schedule_order) { + for (auto bit_index : serial_schedule_order) { double temp = NAN; log_prob_ratios[bit_index] = std::log( - (1 - channel_probabilities[bit_index]) / channel_probabilities[bit_index]); - for (auto &check_nbr: pcm.iterate_column(bit_index)) { + (1 - channel_probabilities[bit_index]) / channel_probabilities[bit_index]); + for (auto& check_nbr : pcm.iterate_column(bit_index)) { // first, we compute the min absolute value of neighbours excluding the current recipient check_index = check_nbr.row_index; int sgn = 0; temp = std::numeric_limits::max(); - for (auto &g: pcm.iterate_row(check_index)) { + for (auto& g : pcm.iterate_row(check_index)) { if (&g != &check_nbr) { if (std::abs(g.bit_to_check_msg) < temp) { temp = std::abs(g.bit_to_check_msg); @@ -599,11 +572,11 @@ namespace ldpc { if (check_node_sgn == syndrome[check_index]) { if (std::abs(check_nbr.bit_to_check_msg) < min_bit_to_check_msg) { this->soft_syndrome[check_index] = - pow(-1, syndrome[check_index]) * - std::abs(check_nbr.bit_to_check_msg); + pow(-1, syndrome[check_index]) * + std::abs(check_nbr.bit_to_check_msg); } else { this->soft_syndrome[check_index] = - pow(-1, syndrome[check_index]) * min_bit_to_check_msg; + pow(-1, syndrome[check_index]) * min_bit_to_check_msg; } } else { syndrome[check_index] ^= 1; @@ -623,7 +596,7 @@ namespace ldpc { decoding[bit_index] = 0; } temp = 0; - for (auto &e: pcm.reverse_iterate_column(bit_index)) { + for (auto& e : pcm.reverse_iterate_column(bit_index)) { e.bit_to_check_msg += temp; temp += e.check_to_bit_msg; } @@ -651,7 +624,7 @@ namespace ldpc { return decoding; } }; - } -} // namespace ldpc::bp + } // namespace bp +} // namespace ldpc #endif \ No newline at end of file diff --git a/src_python/ldpc/bp_decoder/__init__.pyi b/src_python/ldpc/bp_decoder/__init__.pyi index e3ee8f35..188b128b 100644 --- a/src_python/ldpc/bp_decoder/__init__.pyi +++ b/src_python/ldpc/bp_decoder/__init__.pyi @@ -337,6 +337,26 @@ class BpDecoder(BpDecoderBase): ValueError If the length of the input input_vector does not match the number of rows in the parity check matrix. """ + + def bp_decode_parallel(self, input_vector: np.ndarray) -> np.ndarray: + """ + Parallel belief propagation decoder using OpenMP. + + This method performs belief propagation decoding using OpenMP parallelization + for improved performance on multi-core systems. The number of threads used + is controlled by the omp_thread_count parameter. + + Parameters + ---------- + input_vector : np.ndarray + The input vector for decoding. Can be either a syndrome vector (for syndrome decoding) + or a received vector (for received vector decoding). + + Returns + ------- + numpy.ndarray + A 1D numpy array of length equal to the number of columns in the parity check matrix. + """ @property diff --git a/src_python/ldpc/bp_decoder/_bp_decoder.pxd b/src_python/ldpc/bp_decoder/_bp_decoder.pxd index 41493aea..68a656f9 100644 --- a/src_python/ldpc/bp_decoder/_bp_decoder.pxd +++ b/src_python/ldpc/bp_decoder/_bp_decoder.pxd @@ -77,6 +77,7 @@ cdef extern from "bp.hpp" namespace "ldpc::bp": int random_schedule_seed bool random_schedule_at_every_iteration vector[uint8_t] decode(vector[uint8_t]& syndrome) + vector[uint8_t] bp_decode_parallel(vector[uint8_t]& syndrome) vector[uint8_t] soft_info_decode_serial(vector[double]& soft_syndrome, double cutoff, double sigma) void set_omp_thread_count(int count) BpInputType bp_input_type diff --git a/src_python/ldpc/bp_decoder/_bp_decoder.pyx b/src_python/ldpc/bp_decoder/_bp_decoder.pyx index ca665c5f..89dececc 100644 --- a/src_python/ldpc/bp_decoder/_bp_decoder.pyx +++ b/src_python/ldpc/bp_decoder/_bp_decoder.pyx @@ -502,8 +502,7 @@ cdef class BpDecoderBase: Returns: int: The number of threads used. """ - if self.bpd.omp_thread_count != 1: - warnings.warn("The OpenMP functionality is not yet implemented") + # Note: OpenMP is implemented for bp_decode_parallel method return self.bpd.omp_thread_count @omp_thread_count.setter @@ -520,8 +519,7 @@ cdef class BpDecoderBase: raise TypeError("The omp_thread_count must be specified as a\ positive integer.") self.bpd.set_omp_thread_count(value) - if self.bpd.omp_thread_count != 1: - warnings.warn("The OpenMP functionality is not yet implemented") + # Note: OpenMP is implemented for bp_decode_parallel method @property def random_schedule_seed(self) -> int: @@ -661,6 +659,66 @@ cdef class BpDecoder(BpDecoderBase): out = np.zeros(self.n,dtype=DTYPE) for i in range(self.n): out[i] = self.bpd.decoding[i] return out + + def bp_decode_parallel(self, input_vector: np.ndarray) -> np.ndarray: + """ + Parallel belief propagation decoder using OpenMP acceleration. + + This method performs belief propagation decoding using OpenMP parallelization + for improved performance on multi-core systems. The number of threads used + is controlled by the omp_thread_count parameter. + + Parameters + ---------- + input_vector : np.ndarray + The input vector for decoding. Can be either a syndrome vector (for syndrome decoding) + or a received vector (for received vector decoding). + + Returns + ------- + numpy.ndarray + A 1D numpy array of length equal to the number of columns in the parity check matrix. + + Raises + ------ + ValueError + If the length of the input input_vector does not match the number of rows in the parity check matrix. + """ + + if(self.bpd.bp_input_type == SYNDROME and not len(input_vector)==self.m): + raise ValueError(f"The input_vector must have length {self.m} (for syndrome decoding). Not length {len(input_vector)}.") + elif(self.bpd.bp_input_type == RECEIVED_VECTOR and not len(input_vector)==self.n): + raise ValueError(f"The input_vector must have length {self.n} (for received vector decoding). Not length {len(input_vector)}.") + elif(self.bpd.bp_input_type == AUTO and not (len(input_vector)==self.m or len(input_vector)==self.n)): + raise ValueError(f"The input_vector must have length {self.m} (for syndrome decoding) or length {self.n} (for received vector decoding). Not length {len(input_vector)}.") + + cdef int i + cdef bool zero_input_vector = True + DTYPE = input_vector.dtype + + cdef int len_input_vector = len(input_vector) + + if(self.bpd.bp_input_type == SYNDROME or (self.bpd.bp_input_type == AUTO and len(input_vector)==self.m)): + for i in range(len_input_vector): + self._syndrome[i] = input_vector[i] + if self._syndrome[i]: zero_input_vector = False + if zero_input_vector: + self.bpd.converge = True + return np.zeros(self.bit_count,dtype=DTYPE) + self.bpd.bp_decode_parallel(self._syndrome) + + elif(self.bpd.bp_input_type == RECEIVED_VECTOR or (self.bpd.bp_input_type == AUTO and len(input_vector)==self.n)): + for i in range(len_input_vector): + self._received_vector[i] = input_vector[i] + if self._received_vector[i]: zero_input_vector = False + if zero_input_vector: + self.bpd.converge = True + return np.zeros(self.bit_count,dtype=DTYPE) + self.bpd.bp_decode_parallel(self._received_vector) + + out = np.zeros(self.n,dtype=DTYPE) + for i in range(self.n): out[i] = self.bpd.decoding[i] + return out @property From f3f8cdd61e4259f3ddd7d8a0c47a6f56d97fdcb4 Mon Sep 17 00:00:00 2001 From: 0xSooki <0xsooki@gmail.com> Date: Thu, 5 Jun 2025 15:39:09 +0200 Subject: [PATCH 2/5] update benchmark --- cpp_test/TestBPDecoder.cpp | 60 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp_test/TestBPDecoder.cpp b/cpp_test/TestBPDecoder.cpp index ffd0dfbb..9cf70646 100644 --- a/cpp_test/TestBPDecoder.cpp +++ b/cpp_test/TestBPDecoder.cpp @@ -491,36 +491,6 @@ TEST(BpDecoder, ProdSumSerial_RepCode5) { } } -TEST(BpDecoderParallel, ThreadScaling) { - int n = 1200; - int m = 600; - auto pcm = create_random_ldpc(m, n, 5); - auto channel_probabilities = vector(n, 0.04); - auto syndrome = create_random_syndrome(m, 0.08); - - std::vector thread_counts = {1, 2, 4, 8}; - std::vector times; - - for (int threads : thread_counts) { - auto decoder_serial = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); - - auto start = high_resolution_clock::now(); - auto result = decoder.decode(syndrome); - auto end = high_resolution_clock::now(); - auto duration = duration_cast(end - start); - times.push_back(duration.count()); - std::cout << "Threads: " << threads << ", Time: " << duration.count() << " μs\n"; - - start = high_resolution_clock::now(); - auto result2 = decoder_serial.decode(syndrome); - end = high_resolution_clock::now(); - duration = duration_cast(end - start); - std::cout << "Serial Time: " << duration.count() << " μs\n"; - EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; - } -} - ldpc::bp::BpSparse create_random_ldpc(int m, int n, int avg_row_weight = 4) { auto pcm = ldpc::bp::BpSparse(m, n); std::uniform_int_distribution col_dist(0, n - 1); @@ -552,6 +522,36 @@ std::vector create_random_syndrome(int m, double error_prob = 0.1) { return syndrome; } +TEST(BpDecoder, Benchmark) { + int n = 1200; + int m = 600; + auto pcm = create_random_ldpc(m, n, 5); + auto channel_probabilities = vector(n, 0.04); + auto syndrome = create_random_syndrome(m, 0.08); + + std::vector thread_counts = {1, 2, 4, 8}; + std::vector times; + + for (int threads : thread_counts) { + auto decoder_serial = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); + auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); + + auto start = high_resolution_clock::now(); + auto result = decoder.decode(syndrome); + auto end = high_resolution_clock::now(); + auto duration = duration_cast(end - start); + times.push_back(duration.count()); + std::cout << "Threads: " << threads << ", Time: " << duration.count() << " μs\n"; + + start = high_resolution_clock::now(); + auto result2 = decoder_serial.decode(syndrome); + end = high_resolution_clock::now(); + duration = duration_cast(end - start); + std::cout << "Serial Time: " << duration.count() << " μs\n"; + EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; + } +} + int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); From 7a934c8636c1f1df72614d8bc1de6a38780a3d92 Mon Sep 17 00:00:00 2001 From: 0xSooki <0xsooki@gmail.com> Date: Thu, 5 Jun 2025 21:43:14 +0200 Subject: [PATCH 3/5] add cycles parameter for bp benchmarks --- cpp_test/TestBPDecoder.cpp | 52 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/cpp_test/TestBPDecoder.cpp b/cpp_test/TestBPDecoder.cpp index 9cf70646..2656940d 100644 --- a/cpp_test/TestBPDecoder.cpp +++ b/cpp_test/TestBPDecoder.cpp @@ -523,32 +523,46 @@ std::vector create_random_syndrome(int m, double error_prob = 0.1) { } TEST(BpDecoder, Benchmark) { - int n = 1200; - int m = 600; + int n = 10000; + int m = 5000; + int cycles = 10; auto pcm = create_random_ldpc(m, n, 5); auto channel_probabilities = vector(n, 0.04); auto syndrome = create_random_syndrome(m, 0.08); std::vector thread_counts = {1, 2, 4, 8}; - std::vector times; + std::vector ptimes; + std::vector stimes; for (int threads : thread_counts) { - auto decoder_serial = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); - auto decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); - - auto start = high_resolution_clock::now(); - auto result = decoder.decode(syndrome); - auto end = high_resolution_clock::now(); - auto duration = duration_cast(end - start); - times.push_back(duration.count()); - std::cout << "Threads: " << threads << ", Time: " << duration.count() << " μs\n"; - - start = high_resolution_clock::now(); - auto result2 = decoder_serial.decode(syndrome); - end = high_resolution_clock::now(); - duration = duration_cast(end - start); - std::cout << "Serial Time: " << duration.count() << " μs\n"; - EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; + auto serial_decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); + auto parallel_decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); + std::vector serial_times(1000); + std::vector parallel_times(1000); + for (int i = 0; i < cycles; i++) { + auto start = high_resolution_clock::now(); + auto result = parallel_decoder.decode(syndrome); + auto end = high_resolution_clock::now(); + auto duration = duration_cast(end - start); + parallel_times.push_back(duration.count()); + + start = high_resolution_clock::now(); + auto result2 = serial_decoder.decode(syndrome); + end = high_resolution_clock::now(); + duration = duration_cast(end - start); + serial_times.push_back(duration.count()); + EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; + } + ptimes.push_back((long)std::accumulate(parallel_times.begin(), parallel_times.end(), 0)/cycles); + stimes.push_back((long)std::accumulate(serial_times.begin(), serial_times.end(), 0)/cycles); + parallel_times.clear(); + serial_times.clear(); + } + for (size_t i = 0; i < thread_counts.size(); i++) { + std::cout << "Threads: " << thread_counts[i] + << ", Parallel avg: " << ptimes[i] << " μs" + << ", Serial avg: " << stimes[i] << " μs" + << ", Speedup: " << (double)stimes[i] / ptimes[i] << "x" << std::endl; } } From 346da4f9db60d5078c40925e436f8178d4c6501a Mon Sep 17 00:00:00 2001 From: 0xSooki <0xsooki@gmail.com> Date: Thu, 5 Jun 2025 22:36:29 +0200 Subject: [PATCH 4/5] refactor bp benchmark --- cpp_test/TestBPDecoder.cpp | 67 +++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/cpp_test/TestBPDecoder.cpp b/cpp_test/TestBPDecoder.cpp index 2656940d..9648998a 100644 --- a/cpp_test/TestBPDecoder.cpp +++ b/cpp_test/TestBPDecoder.cpp @@ -523,46 +523,67 @@ std::vector create_random_syndrome(int m, double error_prob = 0.1) { } TEST(BpDecoder, Benchmark) { - int n = 10000; - int m = 5000; - int cycles = 10; + const int n = 10000; + const int m = 5000; + const int cycles = 10; + const int max_iterations = 40; + const double channel_prob = 0.04; + const double syndrome_error_prob = 0.08; + const double scaling_factor = 0.625; + auto pcm = create_random_ldpc(m, n, 5); - auto channel_probabilities = vector(n, 0.04); - auto syndrome = create_random_syndrome(m, 0.08); + auto channel_probabilities = vector(n, channel_prob); + auto syndrome = create_random_syndrome(m, syndrome_error_prob); std::vector thread_counts = {1, 2, 4, 8}; - std::vector ptimes; - std::vector stimes; + std::vector parallel_times; + std::vector serial_times; for (int threads : thread_counts) { - auto serial_decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, 0.625); - auto parallel_decoder = ldpc::bp::BpDecoder(pcm, channel_probabilities, 40, ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, 0.625, threads); - std::vector serial_times(1000); - std::vector parallel_times(1000); + auto serial_decoder = ldpc::bp::BpDecoder( + pcm, channel_probabilities, max_iterations, + ldpc::bp::MINIMUM_SUM, ldpc::bp::SERIAL, scaling_factor + ); + auto parallel_decoder = ldpc::bp::BpDecoder( + pcm, channel_probabilities, max_iterations, + ldpc::bp::MINIMUM_SUM, ldpc::bp::PARALLEL, scaling_factor, threads + ); + + std::vector parallel_durations; + std::vector serial_durations; + parallel_durations.reserve(cycles); + serial_durations.reserve(cycles); + for (int i = 0; i < cycles; i++) { auto start = high_resolution_clock::now(); - auto result = parallel_decoder.decode(syndrome); + auto parallel_result = parallel_decoder.decode(syndrome); auto end = high_resolution_clock::now(); auto duration = duration_cast(end - start); - parallel_times.push_back(duration.count()); + parallel_durations.push_back(duration.count()); start = high_resolution_clock::now(); - auto result2 = serial_decoder.decode(syndrome); + auto serial_result = serial_decoder.decode(syndrome); end = high_resolution_clock::now(); duration = duration_cast(end - start); - serial_times.push_back(duration.count()); - EXPECT_EQ(result, result2) << "Decoding results should match for serial and parallel versions"; + serial_durations.push_back(duration.count()); + + EXPECT_EQ(parallel_result, serial_result) + << "Decoding results should match for serial and parallel versions"; } - ptimes.push_back((long)std::accumulate(parallel_times.begin(), parallel_times.end(), 0)/cycles); - stimes.push_back((long)std::accumulate(serial_times.begin(), serial_times.end(), 0)/cycles); - parallel_times.clear(); - serial_times.clear(); + + long avg_parallel = std::accumulate(parallel_durations.begin(), parallel_durations.end(), 0L) / cycles; + long avg_serial = std::accumulate(serial_durations.begin(), serial_durations.end(), 0L) / cycles; + + parallel_times.push_back(avg_parallel); + serial_times.push_back(avg_serial); } + for (size_t i = 0; i < thread_counts.size(); i++) { + double speedup = static_cast(serial_times[i]) / parallel_times[i]; std::cout << "Threads: " << thread_counts[i] - << ", Parallel avg: " << ptimes[i] << " μs" - << ", Serial avg: " << stimes[i] << " μs" - << ", Speedup: " << (double)stimes[i] / ptimes[i] << "x" << std::endl; + << ", Parallel avg: " << parallel_times[i] << " μs" + << ", Serial avg: " << serial_times[i] << " μs" + << ", Speedup: " << speedup << "x" << std::endl; } } From 9d5752f78db17e548dcc65851480b521a29df51e Mon Sep 17 00:00:00 2001 From: 0xSooki <0xsooki@gmail.com> Date: Wed, 11 Jun 2025 08:31:14 +0200 Subject: [PATCH 5/5] add conditional openmp include --- src_cpp/bp.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src_cpp/bp.hpp b/src_cpp/bp.hpp index e4ab81c5..23c0d428 100644 --- a/src_cpp/bp.hpp +++ b/src_cpp/bp.hpp @@ -11,6 +11,9 @@ #include #include // required for std::runtime_error #include +#ifdef _OPENMP +#include +#endif #include "math.h" #include "sparse_matrix_base.hpp"