From 6499d8dc489c80e7e0fdbed0c87f74012417ffaa Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:04:53 +0100 Subject: [PATCH 01/13] Potential Runtime Improvement of GaussTraceFilter.cpp^ --- .../source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..9dbe24853a8 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -1007,7 +1007,7 @@ namespace OpenMS { //store map of abort reasons for failed seeds FeatureMap abort_map; - abort_map.reserve(abort_reasons_.size()); + abort_map.reserve( abort_reasons_.size()); Size counter = 0; for (std::map::iterator it2 = abort_reasons_.begin(); it2 != abort_reasons_.end(); ++it2, ++counter) { From 60442f0554a8d029bdc1848679da8c205353f897 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:26:16 +0100 Subject: [PATCH 02/13] patched From 44fefeb34fa59767483b4e92356ae419371bcdf0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 9 Apr 2025 12:34:56 +0200 Subject: [PATCH 03/13] modified XMLHandler.h/cpp Changed Implementation of appendASCII added some helping functions to class StringManager --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 19 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 101 +++++++++++++++--- 2 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..38279edbb9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -224,8 +224,18 @@ namespace OpenMS // Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) - { - return String(unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + { + String r; + XMLSize_t l = xercesc::XMLString::stringLen(str); + if(isASCII(str, l)) + { + appendASCII(str,l,r); + } + else + { + r = (unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + } + return r; } // Converts from a wide-character string to a narrow-character string. @@ -283,7 +293,12 @@ namespace OpenMS { return toNative_(str); } + /// Checks if supplied if chars in XMLCh* can be encoded with ASCII + static bool isASCII(const XMLCh * chars, const XMLSize_t length); + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64 (const XMLCh * input_it, char* output_it); + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String * diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 6af312971b6..8298d78afa5 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -302,7 +303,7 @@ namespace OpenMS::Internal } // no value, although there should be a numerical value else if (term.xref_type != ControlledVocabulary::CVTerm::NONE && term.xref_type != ControlledVocabulary::CVTerm::XSD_STRING && // should be numerical - !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the binary data array, not the 'value=' attribute! + !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the bits data array, not the 'value=' attribute! ) { warning(LOAD, String("The CV term '") + accession + " - " + term.name + "' used in tag '" + parent_tag + "' should have a numerical value. The value is '" + value + "'."); @@ -425,34 +426,100 @@ namespace OpenMS::Internal StringManager::~StringManager() = default; + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { + alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + } + + bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + + const XMLCh* it = chars; + const XMLCh* end = it + (quotient * 8); + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; + while (it != end && bitmask){ + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOP = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); + bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + // bitmask = simde_mm_testz_si128(bits, mask); + it+=8; + } + + end += remainder; + while (it != end && bitmask){ + bitmask = *it & 0xFF00; + it++; + } + + return bitmask; + } + void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). + // XMLCh are characters in UTF16 (usually stored as 16bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + const XMLCh* it = chars; - const XMLCh* end = it + length; + const XMLCh* end = it + (quotient * 8); + // std::cout << "Anzahl der Elemente zwischen it1 und it2: " + // << std::distance(it, end) << std::endl; size_t curr_size = result.size(); result.resize(curr_size + length); std::string::iterator str_it = result.begin(); std::advance(str_it, curr_size); + // int i = 0; + + //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks while (it!=end) - { - *str_it = (char)*it; - ++str_it; - ++it; + { + // std::cout << "Aktueller Wert: " << *it << std::endl; + + compress64(it, &(*str_it)); + // printf("loop: %d\n", i); + str_it += 8; + it += 8; + // i++; } - // This is ca. 50 % faster than - // for (size_t i = 0; i < length; i++) - // { - // result[curr_size + i] = (char)chars[i]; - // } + + + end = it + remainder; + + while (it!=end) + { + *str_it = static_cast(*it & 0xFF); + // std::cout << "Aktueller Wert: " << *str_it << std::endl; + str_it ++; + it ++; + // i++; + } } From d35a0435d973e749e83035d7240d802678df3af0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 11 Apr 2025 15:28:52 +0200 Subject: [PATCH 04/13] added test for XMLHeader, Debugged it --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 16 +- .../class_tests/openms/executables.cmake | 1 + .../openms/source/XMLHandler_test.cpp | 154 ++++++++++++++++++ 3 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 src/tests/class_tests/openms/source/XMLHandler_test.cpp diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8298d78afa5..b9094061287 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -438,14 +438,20 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; + std::cout << "Remainer: " << remainder << std::endl; + std::cout << "Quotient: " << quotient << std::endl; + std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; + + if (length == 0) + { + return false; + } + while (it != end && bitmask){ simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); simde__m128i zero = simde_mm_setzero_si128(); @@ -458,10 +464,10 @@ namespace OpenMS::Internal end += remainder; while (it != end && bitmask){ - bitmask = *it & 0xFF00; + bitmask = !(*it & 0xFF00); it++; } - + std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 656625cea4c..83b2508d6b0 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -251,6 +251,7 @@ set(format_executables_list UnimodXMLFile_test XMassFile_test XMLFile_test + XMLHandler_test XMLValidator_test XQuestResultXMLFile_test XTandemInfile_test diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp new file mode 100644 index 00000000000..41232f755e4 --- /dev/null +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -0,0 +1,154 @@ + +#include +#include +#include +#include +#include + +#include + +using namespace OpenMS::Internal; + + + + + +START_TEST(StringManager, "$Id$") + + +const XMLCh russianHello[] = { + 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, + 0x0438, 0x0440, // "Привет мир" (Hello World in Russian) +}; +XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); + +const XMLCh ascii[] = { + 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, + 0x0072,0x006C,0x0064,0x0021}; +XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); + +const XMLCh mixed[] = { + 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, + 0x0072,0x006C,0x0064, 0x0021 }; +XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); + +const XMLCh empty[] = {0}; +XMLSize_t e_length = xercesc::XMLString::stringLen(empty); +std::cout << e_length << std::endl; + +const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); + +bool isAscii = false; + +START_SECTION(if input is ascii) + isAscii = StringManager::isASCII(ascii,a_length); + std::cout << "1 \n"; + TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(russianHello,r_length); + std::cout << "2 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(mixed,m_length); + std::cout << "3 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(empty,e_length); + std::cout << "4 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(upperBoundary,u_length); + std::cout << "5 \n"; + TEST_TRUE(isAscii) +END_SECTION + +const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; + +const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; + +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; + +const XMLCh eight_block_kadabra[] = { + 0x004B, // K + 0x0041, // A + 0x0044, // D + 0x0041, // A + 0x0042, // B + 0x0052, // R + 0x0041, // A + 0x0021 // ! +}; + +START_SECTION(if Utf16 to Ascii Compression works right) + char* output1 = new char [9]; + output1[8] = '\0'; + StringManager::compress64(eight_block,output1); + std::string res1_str = "Hello,Wo"; + std::string o1_str (output1); + TEST_STRING_EQUAL(o1_str,res1_str); + delete[] output1; + + + char* output2 = new char [9]; + output2 [8] = '\0'; + char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + res2[8] = '\0'; + StringManager::compress64(eight_block_negative,output2); + std::string res2_str (res2); + std::string o2_str(output2); + TEST_STRING_EQUAL(o2_str, res2_str); + delete[] output2; + + char* output3 = new char [9]; + output3 [8] = '\0'; + char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; + res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,output3); + std::string res3_str (res3); + std::string o3_str(output3); + TEST_STRING_EQUAL(o3_str, res3_str); + delete[] output3; + + char* output4 = new char [13]; + output4 [0] ='A'; + output4 [1] ='B'; + output4 [2] ='R'; + output4 [3] ='A'; + output3 [12] = '\0'; + + StringManager::compress64(eight_block_kadabra,(output4+4)); + std::string res4_str = "ABRAKADABRA!"; + std::string o4_str(output4); + TEST_STRING_EQUAL(o4_str, res4_str); + delete[] output4; + +END_SECTION + +//Tests Number of Chars not Dividable by 8 +OpenMS::String o5_str; +std::string res5_str = "Hello,World!"; + +//Checks how the Function handles Data thats already stored in Output string +OpenMS::String o6_str = "Gruess Gott und "; +std::string res6_str = "Gruess Gott und Hello,World!"; + +OpenMS::String o7_str; +std::string res7_str = ""; + + +START_SECTION(if appendASCII works) + + StringManager::appendASCII(ascii,a_length,o5_str); + TEST_STRING_EQUAL(o5_str, res5_str); + + StringManager::appendASCII(ascii,a_length,o6_str); + TEST_STRING_EQUAL(o6_str, res6_str); + + StringManager::appendASCII(empty,e_length,o7_str); + TEST_STRING_EQUAL(o7_str, res7_str); + std::cout << o7_str.size() << std::endl; + +END_SECTION + +END_TEST + + + + From 9a2d0b4f8d258969b25a54ccf375dc8a79639ef1 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Mon, 14 Apr 2025 14:35:54 +0200 Subject: [PATCH 05/13] t rid of print statements in function --- src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index b9094061287..81c7862b30b 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -438,9 +438,9 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - std::cout << "Remainer: " << remainder << std::endl; - std::cout << "Quotient: " << quotient << std::endl; - std::cout << "length: " << length << endl; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); @@ -467,7 +467,7 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - std::cout << "bitmask: " << bitmask << std::endl; + // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } From ee6bd12d64111936733b172e0888f1d89f60cabf Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 15 Apr 2025 13:50:43 +0200 Subject: [PATCH 06/13] Moved the function in the right part of the String Manager Class --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 81c7862b30b..41225fbb038 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,14 +418,6 @@ namespace OpenMS::Internal } } - //******************************************************************************************************************* - - StringManager::StringManager() - = default; - - StringManager::~StringManager() - = default; - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); @@ -467,7 +459,6 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } @@ -526,7 +517,17 @@ namespace OpenMS::Internal it ++; // i++; } - } + //******************************************************************************************************************* + + StringManager::StringManager() + = default; + + StringManager::~StringManager() + = default; + + + + } // namespace OpenMS // namespace Internal From c3155c4f778ac7dbe304c9eac1583baf37194e54 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 13:42:47 +0200 Subject: [PATCH 07/13] fferent Impelmentation of append/isAscii with potentially silghtly less overhead --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 58 +++++++------------ 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 41225fbb038..01f4f335b09 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -427,15 +427,10 @@ namespace OpenMS::Internal bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // std::cout << "length: " << length << endl; - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); + size_t quotient = length / 8; // Ganzzahliger Quotient + size_t remainder = length % 8; + + const XMLCh* input_ptr = chars; simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; @@ -444,20 +439,19 @@ namespace OpenMS::Internal return false; } - while (it != end && bitmask){ - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + for (size_t i = 0; i < quotient && bitmask; i++) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; - // bitmask = simde_mm_testz_si128(bits, mask); - it+=8; + input_ptr+=8; } - end += remainder; - while (it != end && bitmask){ - bitmask = !(*it & 0xFF00); - it++; + for (size_t i = 0; i < remainder && bitmask; i++) + { + bitmask = !(input_ptr[i] & 0xFF00); } return bitmask; } @@ -471,7 +465,10 @@ namespace OpenMS::Internal // we can convert to char directly (only keeping the least // significant byte). + size_t quotient = length / 8; + size_t remainder = length % 8; + const XMLCh* input_ptr = chars; std::div_t quotient_and_remainder = std::div(length, 8); @@ -489,33 +486,20 @@ namespace OpenMS::Internal size_t curr_size = result.size(); result.resize(curr_size + length); - std::string::iterator str_it = result.begin(); - std::advance(str_it, curr_size); - // int i = 0; + char* output_ptr = &result[curr_size]; //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - while (it!=end) + for (size_t i = 0; i < quotient; i++) { - // std::cout << "Aktueller Wert: " << *it << std::endl; - - compress64(it, &(*str_it)); - // printf("loop: %d\n", i); - str_it += 8; - it += 8; - // i++; + compress64(input_ptr, output_ptr); + input_ptr += 8; + output_ptr += 8; } - - end = it + remainder; - - while (it!=end) + for (size_t i = 0; i < remainder; i++) { - *str_it = static_cast(*it & 0xFF); - // std::cout << "Aktueller Wert: " << *str_it << std::endl; - str_it ++; - it ++; - // i++; + output_ptr[i] = static_cast(input_ptr[i] & 0xFF); } } From 109411ae2a00539bba6eb448ff60e79685168ea0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 14:53:22 +0200 Subject: [PATCH 08/13] Test formatted to Conventions --- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 41232f755e4..d3fed30bd98 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -41,7 +41,7 @@ XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; -START_SECTION(if input is ascii) +START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) isAscii = StringManager::isASCII(ascii,a_length); std::cout << "1 \n"; TEST_TRUE(isAscii) @@ -76,7 +76,7 @@ const XMLCh eight_block_kadabra[] = { 0x0021 // ! }; -START_SECTION(if Utf16 to Ascii Compression works right) +START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) char* output1 = new char [9]; output1[8] = '\0'; StringManager::compress64(eight_block,output1); @@ -133,7 +133,7 @@ OpenMS::String o7_str; std::string res7_str = ""; -START_SECTION(if appendASCII works) +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) StringManager::appendASCII(ascii,a_length,o5_str); TEST_STRING_EQUAL(o5_str, res5_str); From 9229e841deb25821ae743ea3adcdc3b2b60810ae Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 22 Apr 2025 16:49:09 +0200 Subject: [PATCH 09/13] Changed the packus_epi16 function with the shuffle function for improved performance inside the compress64 function. Edited the the XMLHandler_test to work with new implemantation and got rid of new/delete: --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 5 +- .../openms/source/XMLHandler_test.cpp | 62 +++++++------------ 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 01f4f335b09..259eee24f23 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -420,7 +420,10 @@ namespace OpenMS::Internal void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1); + simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); simde_mm_storel_epi64((simde__m128i*)output_it, compressed); } diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index d3fed30bd98..19e56614144 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -24,19 +24,18 @@ XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); const XMLCh ascii[] = { 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, - 0x0072,0x006C,0x0064,0x0021}; + 0x0072,0x006C,0x0064,0x0021, 0x0000}; XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); const XMLCh mixed[] = { 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, - 0x0072,0x006C,0x0064, 0x0021 }; + 0x0072,0x006C,0x0064, 0x0021, 0x0000 }; XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); const XMLCh empty[] = {0}; XMLSize_t e_length = xercesc::XMLString::stringLen(empty); -std::cout << e_length << std::endl; -const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +const XMLCh upperBoundary [] = {0x00FF,0x00FF,0x0000}; XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; @@ -59,11 +58,11 @@ START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) TEST_TRUE(isAscii) END_SECTION -const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; +const XMLCh eight_block_negative[] = {0x0148,0x0165,0x016C,0x016C,0x016F,0x012C,0x0157,0x016F}; const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; -const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0x0142,0x0145,0x014C,0x0141}; const XMLCh eight_block_kadabra[] = { 0x004B, // K @@ -77,47 +76,34 @@ const XMLCh eight_block_kadabra[] = { }; START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) - char* output1 = new char [9]; - output1[8] = '\0'; - StringManager::compress64(eight_block,output1); + std::string o1_str(8,'\0'); + StringManager::compress64(eight_block,o1_str.data()); std::string res1_str = "Hello,Wo"; - std::string o1_str (output1); TEST_STRING_EQUAL(o1_str,res1_str); - delete[] output1; - char* output2 = new char [9]; - output2 [8] = '\0'; - char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; - res2[8] = '\0'; - StringManager::compress64(eight_block_negative,output2); - std::string res2_str (res2); - std::string o2_str(output2); + std::string o2_str(8,'\0'); + StringManager::compress64(eight_block_negative,o2_str.data()); + std::string res2_str = res1_str; TEST_STRING_EQUAL(o2_str, res2_str); - delete[] output2; - - char* output3 = new char [9]; - output3 [8] = '\0'; - char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; - res3[8] = '\0'; - StringManager::compress64(eight_block_mixed,output3); - std::string res3_str (res3); - std::string o3_str(output3); + + + std::string o3_str(8,'\0'); + // char res3 [9] = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + // res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,o3_str.data()); + std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; TEST_STRING_EQUAL(o3_str, res3_str); - delete[] output3; - - char* output4 = new char [13]; - output4 [0] ='A'; - output4 [1] ='B'; - output4 [2] ='R'; - output4 [3] ='A'; - output3 [12] = '\0'; + + std::string o4_str(12,'\0'); + o4_str [0] ='A'; + o4_str [1] ='B'; + o4_str [2] ='R'; + o4_str [3] ='A'; - StringManager::compress64(eight_block_kadabra,(output4+4)); + StringManager::compress64(eight_block_kadabra,((o4_str.data())+4)); std::string res4_str = "ABRAKADABRA!"; - std::string o4_str(output4); TEST_STRING_EQUAL(o4_str, res4_str); - delete[] output4; END_SECTION From 7faf6a151b3743713c2a271873de25304e8d6267 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 11:17:14 +0200 Subject: [PATCH 10/13] Implemented strLength function with simde for potential runtime improvement --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 4 ++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 25 +++++++++++++++++++ .../openms/source/XMLHandler_test.cpp | 9 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 38279edbb9f..6124c6cb206 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -226,7 +226,7 @@ namespace OpenMS inline static String toNative_(const XMLCh* str) { String r; - XMLSize_t l = xercesc::XMLString::stringLen(str); + XMLSize_t l = strLength(str); if(isASCII(str, l)) { appendASCII(str,l,r); @@ -252,6 +252,8 @@ namespace OpenMS /// Destructor ~StringManager(); + static int strLength(const XMLCh* input_ptr); + /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 259eee24f23..16546ba55ff 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,6 +418,31 @@ namespace OpenMS::Internal } } + int StringManager::strLength(const XMLCh* input_ptr) { + size_t processedChars = 0; + XMLCh* pos_ptr = const_cast(input_ptr); + + while (true) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + + if (zeroMask != 0x0000) { + int bytePosZero = __builtin_ctz(zeroMask); + int charPosZero = bytePosZero / 2; + pos_ptr += charPosZero; + return processedChars + charPosZero; + } + + pos_ptr += 8; + processedChars += 8; + } + + // Reached max length without finding null terminator + return 0; + } + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 19e56614144..b55c6ad27d7 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -133,6 +133,15 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & END_SECTION +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + int o_length = StringManager::strLength(ascii); + TEST_EQUAL(o_length, a_length); + o_length = StringManager::strLength(empty); + TEST_EQUAL(o_length, e_length); + o_length = StringManager::strLength(upperBoundary); + TEST_EQUAL(o_length, u_length); +END_SECTION + END_TEST From 8c2a1b6952ff7bbfb42d07cd9d7c80972ad61ac5 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:11:49 +0200 Subject: [PATCH 11/13] Added strLength Method using simde for Potential runtime improvement. Added a for loop to prevent this method from crossing page Boundaries --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + .../source/FORMAT/HANDLERS/MzMLHandler.cpp | 5 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 28 +++++++++++++++---- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 6124c6cb206..5f4dcc89777 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -21,6 +21,7 @@ #include #include +#include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index f643342819c..23b62e46c8a 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -18,8 +18,11 @@ #include #include + #include +using namespace std::literals; + namespace OpenMS::Internal { @@ -267,7 +270,7 @@ namespace OpenMS::Internal UInt meta_string_array_index = 0; for (Size i = 0; i < input_data.size(); i++) //loop over all binary data arrays { - if (input_data[i].meta.getName() != "m/z array" && input_data[i].meta.getName() != "intensity array") // is meta data array? + if (input_data[i].meta.getName() != "m/z array"sv && input_data[i].meta.getName() != "intensity array"sv) // is meta data array? { if (input_data[i].data_type == MzMLHandlerHelper::BinaryData::DT_FLOAT) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 16546ba55ff..8abee4b900e 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -421,8 +421,20 @@ namespace OpenMS::Internal int StringManager::strLength(const XMLCh* input_ptr) { size_t processedChars = 0; XMLCh* pos_ptr = const_cast(input_ptr); + size_t align = (size_t)pos_ptr % 16; + // Prevents Page boundary crossing + for (size_t i = 0; i < align; i++) + { + if (pos_ptr[i] == 0) + { + return processedChars + i; + } + processedChars++; + pos_ptr++; + }; - while (true) { + while (true) + { simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); @@ -443,8 +455,9 @@ namespace OpenMS::Internal return 0; } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { - alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + void StringManager::compress64 (const XMLCh* input_it, char* output_it) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1); @@ -467,13 +480,18 @@ namespace OpenMS::Internal return false; } - for (size_t i = 0; i < quotient && bitmask; i++) + for (size_t i = 0; i < quotient; i++) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) + { + bitmask = false; + } input_ptr+=8; } From c0e1284dc44499b825035c65a97a8b9687f61e82 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:44:33 +0200 Subject: [PATCH 12/13] Added Description of strLength method for documentation --- src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 5f4dcc89777..51ef962aa9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -253,6 +253,7 @@ namespace OpenMS /// Destructor ~StringManager(); + /// Calculates the length of a XMLCh* string using SIMDe static int strLength(const XMLCh* input_ptr); /// Transcode the supplied C string to a xerces string From fdf5629224201e84628e030cf6397bb02e75d58c Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 23:27:28 +0200 Subject: [PATCH 13/13] code is now up to the coding conventions --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 197 +++++++++--------- 2 files changed, 99 insertions(+), 100 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 51ef962aa9f..0563d765809 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -11,7 +11,7 @@ #include #include -#include // StringList + #include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8abee4b900e..6441a048bbc 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -418,134 +418,133 @@ namespace OpenMS::Internal } } - int StringManager::strLength(const XMLCh* input_ptr) { - size_t processedChars = 0; + int StringManager::strLength(const XMLCh* input_ptr) + { + size_t processed_chars = 0; XMLCh* pos_ptr = const_cast(input_ptr); size_t align = (size_t)pos_ptr % 16; - // Prevents Page boundary crossing - for (size_t i = 0; i < align; i++) + + // Prevent crossing page boundaries + for (size_t i = 0; i < align; ++i) { if (pos_ptr[i] == 0) { - return processedChars + i; + return processed_chars + i; } - processedChars++; - pos_ptr++; - }; + ++processed_chars; + ++pos_ptr; + } - while (true) + while (true) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); - uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); - if (zeroMask != 0x0000) { - int bytePosZero = __builtin_ctz(zeroMask); - int charPosZero = bytePosZero / 2; - pos_ptr += charPosZero; - return processedChars + charPosZero; - } + if (zero_mask != 0x0000) + { + int byte_pos_zero = __builtin_ctz(zero_mask); + int char_pos_zero = byte_pos_zero / 2; + pos_ptr += char_pos_zero; + return processed_chars + char_pos_zero; + } - pos_ptr += 8; - processedChars += 8; + pos_ptr += 8; + processed_chars += 8; } + } + + void StringManager::compress64(const XMLCh* inputIt, char* outputIt) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); + + // Select every second byte (little-endian lower byte of each UTF-16 character) + const simde__m128i shuffleMask = simde_mm_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1 + ); + + simde__m128i compressed = simde_mm_shuffle_epi8(bits, shuffleMask); - // Reached max length without finding null terminator - return 0; + // Store the lower 64 bits (8 ASCII characters) + simde_mm_storel_epi64(reinterpret_cast(outputIt), compressed); } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) + bool StringManager::isASCII(const XMLCh* chars, const XMLSize_t length) + { + if (length == 0) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); - const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, - -1, -1, -1, -1, -1, -1, -1, -1); - simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); - simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + return false; } - bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + Size quotient = length / 8; + Size remainder = length % 8; - - size_t quotient = length / 8; // Ganzzahliger Quotient - size_t remainder = length % 8; + const XMLCh* inputPtr = chars; + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; - const XMLCh* input_ptr = chars; - simde__m128i mask = simde_mm_set1_epi16(0xFF00); - bool bitmask = true; + // Process blocks of 8 UTF-16 characters using SIMD + for (Size i = 0; i < quotient; ++i) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOp = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOp, zero); - if (length == 0) + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) { - return false; + bitmask = false; + break; } - for (size_t i = 0; i < quotient; i++) - { - - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i andOP = simde_mm_and_si128(bits, mask); - simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - - if (simde_mm_movemask_epi8(cmp) != 0xFFFF) - { - bitmask = false; - } - input_ptr+=8; - } - - for (size_t i = 0; i < remainder && bitmask; i++) + inputPtr += 8; + } + + // Check remaining characters individually + for (Size i = 0; i < remainder && bitmask; ++i) + { + if (inputPtr[i] & 0xFF00) { - bitmask = !(input_ptr[i] & 0xFF00); + bitmask = false; + break; } - return bitmask; } - void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) + return bitmask; + } + + void StringManager::appendASCII(const XMLCh* chars, const XMLSize_t length, String& result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). - - size_t quotient = length / 8; - size_t remainder = length % 8; - - const XMLCh* input_ptr = chars; - - - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; - - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); - // std::cout << "Anzahl der Elemente zwischen it1 und it2: " - // << std::distance(it, end) << std::endl; - - size_t curr_size = result.size(); - result.resize(curr_size + length); - char* output_ptr = &result[curr_size]; - - //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - for (size_t i = 0; i < quotient; i++) - { - compress64(input_ptr, output_ptr); - input_ptr += 8; - output_ptr += 8; + // XMLCh are characters in UTF16 (usually stored as 16-bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + Size quotient = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + + Size currentSize = result.size(); + result.resize(currentSize + length); + char* outputPtr = &result[currentSize]; + + // Copy blocks of 8 characters at a time + for (Size i = 0; i < quotient; ++i) + { + compress64(inputPtr, outputPtr); + inputPtr += 8; + outputPtr += 8; } - - - for (size_t i = 0; i < remainder; i++) - { - output_ptr[i] = static_cast(input_ptr[i] & 0xFF); + + // Copy any remaining characters individually + for (Size i = 0; i < remainder; ++i) + { + outputPtr[i] = static_cast(inputPtr[i] & 0xFF); } }