From 75dd711e9e6e23dcd5fbffbabe3473243777bcdd Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Mon, 13 Jan 2025 09:51:01 +0100 Subject: [PATCH 01/34] propose new IM type --- src/openms/include/OpenMS/IONMOBILITY/IMTypes.h | 1 + src/openms/source/IONMOBILITY/IMTypes.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h index 80845d5c6f8..d30546140ca 100644 --- a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h +++ b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h @@ -46,6 +46,7 @@ namespace OpenMS CONCATENATED, ///< ion mobility frame is stacked in a single spectrum (i.e. has an IM float data array) MULTIPLE_SPECTRA,///< ion mobility is recorded as multiple spectra per frame (i.e. has one IM annotation per spectrum) MIXED, ///< an MSExperiment contains both CONCATENATED and MULTIPLE_SPECTRA + CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array) SIZE_OF_IMFORMAT }; /// Names of IMFormat diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 991d7d2b318..7bf21cd337a 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -16,7 +16,7 @@ namespace OpenMS { const std::string NamesOfDriftTimeUnit[] = {"", "ms", "1/K0", "FAIMS_CV"}; - const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed"}; + const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed", "centroided"}; DriftTimeUnit toDriftTimeUnit(const std::string& dtu_string) From a2727e04b4319e79985d7047b2d79bd27c021d11 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 11:26:19 +0100 Subject: [PATCH 02/34] add stub for PeakPickerIM --- .../PROCESSING/CENTROIDING/PeakPickerIM.h | 26 ++++++ .../PROCESSING/CENTROIDING/sources.cmake | 1 + .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 18 ++++ .../PROCESSING/CENTROIDING/sources.cmake | 1 + src/pyOpenMS/pxds/PeakPickerIM.pxd | 17 ++++ .../class_tests/openms/executables.cmake | 1 + .../openms/source/PeakPickerIM_test.cpp | 91 +++++++++++++++++++ 7 files changed, 155 insertions(+) create mode 100644 src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h create mode 100644 src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp create mode 100644 src/pyOpenMS/pxds/PeakPickerIM.pxd create mode 100644 src/tests/class_tests/openms/source/PeakPickerIM_test.cpp diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h new file mode 100644 index 00000000000..541116ba3d2 --- /dev/null +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -0,0 +1,26 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include + +namespace OpenMS +{ + /** + @brief Peak picking algorithm for ion mobility data + + @ingroup PeakPicking + */ + class OPENMS_DLLAPI PeakPickerIM + { + public: + static pickIMTraces(MSSpectrum& spectrum); + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake index 9b0eb7b73f3..89c08ace988 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake @@ -5,6 +5,7 @@ set(directory include/OpenMS/PROCESSING/CENTROIDING) set(sources_list_h PeakPickerHiRes.h PeakPickerIterative.h +PeakPickerIM.h ) ### add path to the filenames diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp new file mode 100644 index 00000000000..1d41aa91d8e --- /dev/null +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -0,0 +1,18 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include + +namespace OpenMS +{ + void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) + { + // Implementation here + } + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/PROCESSING/CENTROIDING/sources.cmake b/src/openms/source/PROCESSING/CENTROIDING/sources.cmake index 2b711b6d653..128169e8225 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/sources.cmake +++ b/src/openms/source/PROCESSING/CENTROIDING/sources.cmake @@ -5,6 +5,7 @@ set(directory source/PROCESSING/CENTROIDING) set(sources_list PeakPickerHiRes.cpp PeakPickerIterative.cpp +PeakPickerIM.cpp ) ### add path to the filenames diff --git a/src/pyOpenMS/pxds/PeakPickerIM.pxd b/src/pyOpenMS/pxds/PeakPickerIM.pxd new file mode 100644 index 00000000000..7600a0de9d6 --- /dev/null +++ b/src/pyOpenMS/pxds/PeakPickerIM.pxd @@ -0,0 +1,17 @@ +from libcpp cimport bool +from Types cimport * +from MSSpectrum cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass PeakPickerIM(DefaultParamHandler): + # wrap-doc: + # Peak picking algorithm for ion mobility data + + PeakPickerIM() except + nogil + PeakPickerIM(PeakPickerIM &) except + nogil + +cdef extern from "" namespace "OpenMS::PeakPickerIM": + + # static members + void pickIMTraces(MSSpectrum& s) except + nogil # wrap-attach:PeakPickerIM wrap-doc:use trace detection for IM peak picking. diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index ee14ae722f4..ba45be45a0d 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -569,6 +569,7 @@ set(transformations_executables_list ModelDescription_test PeakPickerHiRes_test PeakPickerIterative_test + PeakPickerIM_test PeakWidthEstimator_test SeedListGenerator_test TraceFitter_test diff --git a/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp new file mode 100644 index 00000000000..3575cc069bc --- /dev/null +++ b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp @@ -0,0 +1,91 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include + +/////////////////////////// +#include +/////////////////////////// + +using namespace OpenMS; +using namespace std; + +START_TEST(PeakPickerIM, "$Id$") + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + +PeakPickerIM* ptr = nullptr; +PeakPickerIM* nullPointer = nullptr; + +START_SECTION((PeakPickerIM())) + ptr = new PeakPickerIM(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((virtual ~PeakPickerIM())) + delete ptr; +END_SECTION + + +// create dummy ion mobility spectrum +{ + MSSpectrum input; + + const size_t points_per_im = 10; + const size_t num_im_values = 10; + + // For each ion mobility value (1.0 to 10.0) + for (double im = 1.0; im <= 10.0; im += 1.0) + { + double baseIntensity = (im <= 5.0) ? + 200.0 + ((im - 1.0) * 200.0) : // Increasing intensity from IM 1.0 to 5.0 + 1000.0 - ((im - 5.0) * 200.0); // Decreasing intensity from IM 6.0 to 10.0 + + // Create 10 data points for each ion mobility value + for (size_t i = 0; i < points_per_im; i++) + { + double mz = 100.0 + (i * 0.01); + double intensity = baseIntensity + (i * 20.0); + input.emplace_back(mz, intensity); + } + } + + // Set up the ion mobility array + input.getFloatDataArrays().resize(1); + input.getFloatDataArrays()[0].setName("Ion Mobility"); + + // Add ion mobility values (each repeated 10 times) + for (double im = 1.0; im <= 10.0; im += 1.0) + { + for (size_t i = 0; i < points_per_im; i++) + { + input.getFloatDataArrays()[0].push_back(im); + } + } +} + +START_SECTION(void pickIMTraces(MSSpectrum& spectrum)) +{ + PeakPickerIM pp_im; + pp_im.pickIMTraces(input); + // TODO adapt + TEST_EQUAL(output.size(), 10) + TEST_REAL_SIMILAR(output[0].getIntensity(), 450) + TEST_REAL_SIMILAR(output[0].getMZ(), 100.02) + + TEST_EQUAL(output.getFloatDataArrays().size(), 1) + TEST_EQUAL(output.getFloatDataArrays()[0].getName(), "Ion Mobility") + TEST_REAL_SIMILAR(output.getFloatDataArrays()[0][0], 150.0) + } +} +END_SECTION + +END_TEST \ No newline at end of file From 7d71674737621764088ab21b45729b388c1510ef Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 12:15:05 +0100 Subject: [PATCH 03/34] make it compile --- .../PROCESSING/CENTROIDING/PeakPickerIM.h | 4 ++-- .../openms/source/PeakPickerIM_test.cpp | 21 ++++++++----------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h index 541116ba3d2..faa0a8e7c8a 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -8,7 +8,7 @@ #pragma once -#include +#include namespace OpenMS { @@ -20,7 +20,7 @@ namespace OpenMS class OPENMS_DLLAPI PeakPickerIM { public: - static pickIMTraces(MSSpectrum& spectrum); + static void pickIMTraces(MSSpectrum& spectrum); }; } // namespace OpenMS \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp index 3575cc069bc..39afa73d7a0 100644 --- a/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp +++ b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include /////////////////////////// #include @@ -36,11 +36,9 @@ END_SECTION // create dummy ion mobility spectrum +MSSpectrum input; { - MSSpectrum input; - const size_t points_per_im = 10; - const size_t num_im_values = 10; // For each ion mobility value (1.0 to 10.0) for (double im = 1.0; im <= 10.0; im += 1.0) @@ -77,14 +75,13 @@ START_SECTION(void pickIMTraces(MSSpectrum& spectrum)) PeakPickerIM pp_im; pp_im.pickIMTraces(input); // TODO adapt - TEST_EQUAL(output.size(), 10) - TEST_REAL_SIMILAR(output[0].getIntensity(), 450) - TEST_REAL_SIMILAR(output[0].getMZ(), 100.02) - - TEST_EQUAL(output.getFloatDataArrays().size(), 1) - TEST_EQUAL(output.getFloatDataArrays()[0].getName(), "Ion Mobility") - TEST_REAL_SIMILAR(output.getFloatDataArrays()[0][0], 150.0) - } + TEST_EQUAL(input.size(), 10) + TEST_REAL_SIMILAR(input[0].getIntensity(), 450) + TEST_REAL_SIMILAR(input[0].getMZ(), 100.02) + + TEST_EQUAL(input.getFloatDataArrays().size(), 1) + TEST_EQUAL(input.getFloatDataArrays()[0].getName(), "Ion Mobility") + TEST_REAL_SIMILAR(input.getFloatDataArrays()[0][0], 150.0) } END_SECTION From 94adc43324f1425aae2b3960ce208ce32ee623ae Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 13:06:37 +0100 Subject: [PATCH 04/34] todo for IMTypes --- src/openms/source/IONMOBILITY/IMTypes.cpp | 24 ++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 7bf21cd337a..4f83065c2d6 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -73,24 +73,30 @@ namespace OpenMS if (occs.empty()) { return IMFormat::NONE; - } - if (occs.size() == 1 && (occs.find(IMFormat::CONCATENATED) != occs.end() || occs.find(IMFormat::MULTIPLE_SPECTRA) != occs.end())) + } + + if (occs.size() == 1) { - return *occs.begin(); + auto format = *occs.begin(); + if(!(format != IMFormat::CONCATENATED + || format != IMFormat::MULTIPLE_SPECTRA + || format != IMFormat::CENTROIDED)) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } + return format; } - if (occs.size() == 2 && occs.find(IMFormat::CONCATENATED) != occs.end() && occs.find(IMFormat::MULTIPLE_SPECTRA) != occs.end()) + else { return IMFormat::MIXED; - } - - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } } IMFormat IMTypes::determineIMFormat(const MSSpectrum& spec) { bool has_float_data = spec.containsIMData(); // cache value; query is 'expensive' bool has_drift_time = spec.getDriftTime() != DRIFTTIME_NOT_SET; - if (has_float_data && has_drift_time) + if (has_float_data && has_drift_time) // TODO: possible. CONCATENATED or CENTROIDED { const auto& fda = spec.getFloatDataArrays()[spec.getIMData().first]; String array_val = fda.empty() ? "[empty]" : String(fda[0]); @@ -99,7 +105,7 @@ namespace OpenMS if (has_float_data) { - return IMFormat::CONCATENATED; + return IMFormat::CONCATENATED; // TODO: or CENTROIDED } else if (has_drift_time) { From c82a56a919cfdbf5d0ac1c07b8381ad84e336c13 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 13:39:26 +0100 Subject: [PATCH 05/34] add UNKNOWN to IMFormat --- .../include/OpenMS/IONMOBILITY/IMTypes.h | 1 + .../OpenMS/METADATA/SpectrumSettings.h | 12 ++++++++++- src/openms/source/IONMOBILITY/IMTypes.cpp | 10 +++++++++- .../source/METADATA/SpectrumSettings.cpp | 11 ++++++++++ .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 20 +++++++++++++++++-- 5 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h index d30546140ca..c49360b9fa7 100644 --- a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h +++ b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h @@ -47,6 +47,7 @@ namespace OpenMS MULTIPLE_SPECTRA,///< ion mobility is recorded as multiple spectra per frame (i.e. has one IM annotation per spectrum) MIXED, ///< an MSExperiment contains both CONCATENATED and MULTIPLE_SPECTRA CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array) + UNKNOWN, ///< ion mobility format not yet determined SIZE_OF_IMFORMAT }; /// Names of IMFormat diff --git a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h index 6e7672245c4..8dfc807ad68 100644 --- a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h +++ b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Timo Sachsenberg $ -// $Authors: Marc Sturm $ +// $Authors: Marc Sturm, Timo Sachsenberg $ // -------------------------------------------------------------------------- #pragma once @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -78,6 +79,14 @@ namespace OpenMS ///sets the spectrum type void setType(SpectrumType type); + /// @brief sets the IMFormat of the spectrum + /// @param im_type + void setIMFormat(const IMFormat& im_type); + + /// @brief returns the IMFormat of the spectrum + /// @return IMFormat of the spectrum + IMFormat getIMFormat() const; + /// returns the native identifier for the spectrum, used by the acquisition software. const String & getNativeID() const; /// sets the native identifier for the spectrum, used by the acquisition software. @@ -142,6 +151,7 @@ namespace OpenMS protected: SpectrumType type_; + IMFormat im_type_; String native_id_; String comment_; InstrumentSettings instrument_settings_; diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 4f83065c2d6..a9e101b6953 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -16,7 +16,7 @@ namespace OpenMS { const std::string NamesOfDriftTimeUnit[] = {"", "ms", "1/K0", "FAIMS_CV"}; - const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed", "centroided"}; + const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed", "centroided", "unknown"}; DriftTimeUnit toDriftTimeUnit(const std::string& dtu_string) @@ -94,6 +94,14 @@ namespace OpenMS IMFormat IMTypes::determineIMFormat(const MSSpectrum& spec) { + // First check if format is already set and not UNKNOWN + IMFormat current_format = spec.getIMFormat(); + if (current_format != IMFormat::UNKNOWN) + { + return current_format; + } + + // If format is UNKNOWN, determine it bool has_float_data = spec.containsIMData(); // cache value; query is 'expensive' bool has_drift_time = spec.getDriftTime() != DRIFTTIME_NOT_SET; if (has_float_data && has_drift_time) // TODO: possible. CONCATENATED or CENTROIDED diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index e87db2c9bc2..c29a8080c59 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -21,6 +21,7 @@ namespace OpenMS SpectrumSettings::SpectrumSettings() : MetaInfoInterface(), type_(UNKNOWN), + im_type_(IMFormat::UNKNOWN), native_id_(), comment_(), instrument_settings_(), @@ -94,6 +95,16 @@ namespace OpenMS type_ = type; } + void SpectrumSettings::setIMFormat(const IMFormat& im_type) + { + im_type_ = im_type; + } + + IMFormat SpectrumSettings::getIMFormat() const + { + return im_type_; + } + const String & SpectrumSettings::getComment() const { return comment_; diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index 1d41aa91d8e..5d31edbe5b2 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -11,8 +11,24 @@ namespace OpenMS { void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) - { - // Implementation here + { + // determine IM format of spectrum + IMFormat format = IMTypes::determineIMFormat(spectrum); + switch (format) + { + case IMFormat::NONE: + return; // no IM data + case IMFormat::CENTROIDED: + return; // already centroided + case IMFormat::CONCATENATED: + // TODO call peak picking algorithm for concatenated IM data + + // set format to centroided + spectrum.setIMFormat(IMFormat::CENTROIDED); + break; + default: + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown IMFormat", String(NamesOfIMFormat[(size_t)format])); + } } } // namespace OpenMS \ No newline at end of file From e25c41d4d49d5c9079b157eb50efca11d572eba5 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 13:44:59 +0100 Subject: [PATCH 06/34] allow both drift time and IM arrays --- src/openms/source/IONMOBILITY/IMTypes.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index a9e101b6953..9cc58c931d7 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -104,16 +104,14 @@ namespace OpenMS // If format is UNKNOWN, determine it bool has_float_data = spec.containsIMData(); // cache value; query is 'expensive' bool has_drift_time = spec.getDriftTime() != DRIFTTIME_NOT_SET; - if (has_float_data && has_drift_time) // TODO: possible. CONCATENATED or CENTROIDED - { - const auto& fda = spec.getFloatDataArrays()[spec.getIMData().first]; - String array_val = fda.empty() ? "[empty]" : String(fda[0]); - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "MSSpectrum contains both an float-data-array and a single drift time. At most one is allowed per spectrum!", String("Array: ") + array_val + ", ... <> Spec: " + spec.getDriftTime()); - } if (has_float_data) { - return IMFormat::CONCATENATED; // TODO: or CENTROIDED + if (has_drift_time) + { + OPENMS_LOG_DEBUG << "both drift time and IM data array found in spectrum " << spec.getNativeID() << "\n. Support for both is experimental." << std::endl; + } + return IMFormat::CONCATENATED; // TODO: or CENTROIDED. for now assume that no picking was done (otherwise we would have annotated it) } else if (has_drift_time) { From d6b5ba65d0785e27c5a6a8ce3025c1caedc9d623 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 13:50:11 +0100 Subject: [PATCH 07/34] add to pxd --- src/pyOpenMS/pxds/MSSpectrum.pxd | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pyOpenMS/pxds/MSSpectrum.pxd b/src/pyOpenMS/pxds/MSSpectrum.pxd index 7797164d224..801d457e9cd 100644 --- a/src/pyOpenMS/pxds/MSSpectrum.pxd +++ b/src/pyOpenMS/pxds/MSSpectrum.pxd @@ -85,9 +85,12 @@ cdef extern from "" namespace "OpenMS": void setDriftTime(double) except + nogil # wrap-doc:Sets the drift time (-1 if not set) DriftTimeUnit getDriftTimeUnit() except + nogil String getDriftTimeUnitAsString() except + nogil - void setDriftTimeUnit(DriftTimeUnit dt) except + nogil + void setDriftTimeUnit(DriftTimeUnit dt) except + nogil - bool containsIMData() except + nogil + IMFormat getIMType() except + nogil # wrap-doc:Returns the ion mobility format + void setIMType(IMFormat im_type) except + nogil # wrap-doc:Sets the ion mobility format + + bool containsIMData() except + nogil libcpp_pair[Size, DriftTimeUnit] getIMData() except + nogil # wrap-ignore wrap-doc:Returns position of ion mobility float data array and drift time unit unsigned int getMSLevel() except + nogil # wrap-doc:Returns the MS level From dcab937163a9fb3d4adbac0575b25d924a1d5911 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 15:38:05 +0100 Subject: [PATCH 08/34] apply reviewer suggestions --- .../include/OpenMS/IONMOBILITY/IMTypes.h | 2 +- .../OpenMS/METADATA/SpectrumSettings.h | 14 ++++++++------ .../source/METADATA/SpectrumSettings.cpp | 19 ------------------- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h index c49360b9fa7..206dbb183a8 100644 --- a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h +++ b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h @@ -46,7 +46,7 @@ namespace OpenMS CONCATENATED, ///< ion mobility frame is stacked in a single spectrum (i.e. has an IM float data array) MULTIPLE_SPECTRA,///< ion mobility is recorded as multiple spectra per frame (i.e. has one IM annotation per spectrum) MIXED, ///< an MSExperiment contains both CONCATENATED and MULTIPLE_SPECTRA - CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array) + CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array); identical to CONCATENATED in terms of data layout. UNKNOWN, ///< ion mobility format not yet determined SIZE_OF_IMFORMAT }; diff --git a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h index 8dfc807ad68..32ae3bfb3be 100644 --- a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h +++ b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h @@ -53,13 +53,13 @@ namespace OpenMS static const std::string NamesOfSpectrumType[SIZE_OF_SPECTRUMTYPE]; /// Constructor - SpectrumSettings(); + SpectrumSettings() = default; /// Copy constructor SpectrumSettings(const SpectrumSettings &) = default; /// Move constructor - SpectrumSettings(SpectrumSettings&&) = default; + SpectrumSettings(SpectrumSettings&&) noexcept = default; /// Destructor - ~SpectrumSettings(); + ~SpectrumSettings() noexcept = default; // Assignment operator SpectrumSettings & operator=(const SpectrumSettings &) = default; @@ -83,7 +83,9 @@ namespace OpenMS /// @param im_type void setIMFormat(const IMFormat& im_type); - /// @brief returns the IMFormat of the spectrum + /// @brief returns the IMFormat of the spectrum if set. Otherwise UNKNOWN (default). + /// + /// Note: If UNKNOWN, use IMFormat::determineIMFormat to determine the IMFormat based on the data. /// @return IMFormat of the spectrum IMFormat getIMFormat() const; @@ -150,8 +152,8 @@ namespace OpenMS protected: - SpectrumType type_; - IMFormat im_type_; + SpectrumType type_ = UNKNOWN; + IMFormat im_type_ = IMFormat::UNKNOWN; String native_id_; String comment_; InstrumentSettings instrument_settings_; diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index c29a8080c59..94b9f6d1510 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -9,7 +9,6 @@ #include #include -#include // for equality using namespace std; @@ -18,24 +17,6 @@ namespace OpenMS const std::string SpectrumSettings::NamesOfSpectrumType[] = {"Unknown", "Centroid", "Profile"}; - SpectrumSettings::SpectrumSettings() : - MetaInfoInterface(), - type_(UNKNOWN), - im_type_(IMFormat::UNKNOWN), - native_id_(), - comment_(), - instrument_settings_(), - source_file_(), - acquisition_info_(), - precursors_(), - products_(), - identification_(), - data_processing_() - { - } - - SpectrumSettings::~SpectrumSettings() = default; - bool SpectrumSettings::operator==(const SpectrumSettings & rhs) const { return MetaInfoInterface::operator==(rhs) && From 97b65df7aa7bdf11c08b480166c950c76d26428a Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 15:39:47 +0100 Subject: [PATCH 09/34] fix logic --- src/openms/source/IONMOBILITY/IMTypes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 9cc58c931d7..3b41dc92a12 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -78,9 +78,9 @@ namespace OpenMS if (occs.size() == 1) { auto format = *occs.begin(); - if(!(format != IMFormat::CONCATENATED + if(format != IMFormat::CONCATENATED || format != IMFormat::MULTIPLE_SPECTRA - || format != IMFormat::CENTROIDED)) + || format != IMFormat::CENTROIDED) { throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); } From 7ea05eb456de31d548c723ec53a4aba3742d2c53 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 15:46:50 +0100 Subject: [PATCH 10/34] fix logic add test --- src/openms/source/IONMOBILITY/IMTypes.cpp | 12 ++++++------ src/tests/class_tests/openms/source/IMTypes_test.cpp | 10 ++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 3b41dc92a12..534405d6c3a 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -78,12 +78,12 @@ namespace OpenMS if (occs.size() == 1) { auto format = *occs.begin(); - if(format != IMFormat::CONCATENATED - || format != IMFormat::MULTIPLE_SPECTRA - || format != IMFormat::CENTROIDED) - { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); - } + if (format != IMFormat::CONCATENATED + && format != IMFormat::MULTIPLE_SPECTRA + && format != IMFormat::CENTROIDED) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } return format; } else diff --git a/src/tests/class_tests/openms/source/IMTypes_test.cpp b/src/tests/class_tests/openms/source/IMTypes_test.cpp index 3aa347c25c3..b7e7b4ccba6 100644 --- a/src/tests/class_tests/openms/source/IMTypes_test.cpp +++ b/src/tests/class_tests/openms/source/IMTypes_test.cpp @@ -125,6 +125,16 @@ START_SECTION(static IMFormat determineIMFormat(const MSExperiment& exp)) TEST_EQUAL(IMTypes::determineIMFormat(exp) == IMFormat::MIXED, true) } + { + // test invalid format validation + MSExperiment exp; + MSSpectrum spec; + spec.setIMType(IMFormat::MIXED); // this should trigger the validation check + exp.addSpectrum(spec); + TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(exp)) + ) + } + { // set both ... invalid! auto IMwithFDA2 = IMwithFDA; From 8eb8f27c443a236a1fa2a76efc17c6f9c71801b1 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 15:46:50 +0100 Subject: [PATCH 11/34] fix logic add test --- src/openms/source/IONMOBILITY/IMTypes.cpp | 12 ++++++------ src/tests/class_tests/openms/source/IMTypes_test.cpp | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index 3b41dc92a12..534405d6c3a 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -78,12 +78,12 @@ namespace OpenMS if (occs.size() == 1) { auto format = *occs.begin(); - if(format != IMFormat::CONCATENATED - || format != IMFormat::MULTIPLE_SPECTRA - || format != IMFormat::CENTROIDED) - { - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); - } + if (format != IMFormat::CONCATENATED + && format != IMFormat::MULTIPLE_SPECTRA + && format != IMFormat::CENTROIDED) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } return format; } else diff --git a/src/tests/class_tests/openms/source/IMTypes_test.cpp b/src/tests/class_tests/openms/source/IMTypes_test.cpp index 3aa347c25c3..9cdc22799d0 100644 --- a/src/tests/class_tests/openms/source/IMTypes_test.cpp +++ b/src/tests/class_tests/openms/source/IMTypes_test.cpp @@ -125,6 +125,15 @@ START_SECTION(static IMFormat determineIMFormat(const MSExperiment& exp)) TEST_EQUAL(IMTypes::determineIMFormat(exp) == IMFormat::MIXED, true) } + { + // test invalid format validation + MSExperiment exp; + MSSpectrum spec; + spec.setIMType(IMFormat::MIXED); // this should trigger the validation check + exp.addSpectrum(spec); + TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(exp)) + } + { // set both ... invalid! auto IMwithFDA2 = IMwithFDA; From d255658f90e5c21357e996f365f6bb26853ac5dc Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 15:56:57 +0100 Subject: [PATCH 12/34] check for MIXED already in setIMFormat --- src/openms/source/METADATA/SpectrumSettings.cpp | 4 ++++ src/tests/class_tests/openms/source/IMTypes_test.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index 94b9f6d1510..4f2f4f73cbd 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -78,6 +78,10 @@ namespace OpenMS void SpectrumSettings::setIMFormat(const IMFormat& im_type) { + if (im_type == IMFormat::MIXED) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Single spectrum can't have MIXED ion mobility format.", "SIZE_OF_IMFORMAT"); + } im_type_ = im_type; } diff --git a/src/tests/class_tests/openms/source/IMTypes_test.cpp b/src/tests/class_tests/openms/source/IMTypes_test.cpp index 9cdc22799d0..cc038ba93f3 100644 --- a/src/tests/class_tests/openms/source/IMTypes_test.cpp +++ b/src/tests/class_tests/openms/source/IMTypes_test.cpp @@ -129,7 +129,7 @@ START_SECTION(static IMFormat determineIMFormat(const MSExperiment& exp)) // test invalid format validation MSExperiment exp; MSSpectrum spec; - spec.setIMType(IMFormat::MIXED); // this should trigger the validation check + spec.setIMFormat(IMFormat::MIXED); // this should trigger the validation check because a single spectrum can't be mixed exp.addSpectrum(spec); TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(exp)) } From 478d48410a6b193282dd1f536b53a388ad91c89b Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 16:17:04 +0100 Subject: [PATCH 13/34] update tests --- src/openms/include/OpenMS/METADATA/MetaInfoInterface.h | 4 ++-- src/openms/source/METADATA/MetaInfoInterface.cpp | 6 ------ src/tests/class_tests/openms/source/IMTypes_test.cpp | 7 ++----- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h index 4cb6306f6d8..d8ce4c028bc 100644 --- a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h +++ b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h @@ -107,8 +107,8 @@ namespace OpenMS /// Creates the MetaInfo object if it does not exist inline void createIfNotExists_(); - /// Pointer to the MetaInfo object (0 by default) - MetaInfo* meta_; + /// Pointer to the MetaInfo object + MetaInfo* meta_ = nullptr; }; } // namespace OpenMS diff --git a/src/openms/source/METADATA/MetaInfoInterface.cpp b/src/openms/source/METADATA/MetaInfoInterface.cpp index ad0df11ef03..1e4e8ae8b29 100644 --- a/src/openms/source/METADATA/MetaInfoInterface.cpp +++ b/src/openms/source/METADATA/MetaInfoInterface.cpp @@ -14,12 +14,6 @@ using namespace std; namespace OpenMS { - - MetaInfoInterface::MetaInfoInterface() : - meta_(nullptr) - { - } - /// Copy constructor MetaInfoInterface::MetaInfoInterface(const MetaInfoInterface& rhs) : meta_(nullptr) diff --git a/src/tests/class_tests/openms/source/IMTypes_test.cpp b/src/tests/class_tests/openms/source/IMTypes_test.cpp index 3aa347c25c3..afdc9eceb91 100644 --- a/src/tests/class_tests/openms/source/IMTypes_test.cpp +++ b/src/tests/class_tests/openms/source/IMTypes_test.cpp @@ -126,14 +126,13 @@ START_SECTION(static IMFormat determineIMFormat(const MSExperiment& exp)) } { - // set both ... invalid! + // set both ... is valid (typically concatenated + some average value) auto IMwithFDA2 = IMwithFDA; IMwithFDA2.setDriftTime(123.4); MSExperiment exp; exp.addSpectrum(IMwithDrift); exp.addSpectrum(IMwithFDA); exp.addSpectrum(IMwithFDA2); - TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(exp)) } END_SECTION @@ -147,11 +146,9 @@ START_SECTION(static IMFormat determineIMFormat(const MSSpectrum& spec)) // convert to IM-Frame with float meta-data array TEST_EQUAL(IMTypes::determineIMFormat(IMwithFDA) == IMFormat::CONCATENATED, true) - // set both ... invalid! + // set both ... is valid (typically concatenated + some average value) auto IMwithFDA2 = IMwithFDA; IMwithFDA2.setDriftTime(123.4); - TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(IMwithFDA2)) - END_SECTION ///////////////////////////////////////////////////////////// From 60675b1b5eb832657e27139e16300ebcd357e794 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Tue, 14 Jan 2025 16:31:12 +0100 Subject: [PATCH 14/34] fix removed constructor --- src/openms/include/OpenMS/METADATA/MetaInfoInterface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h index d8ce4c028bc..53099f7a82a 100644 --- a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h +++ b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h @@ -36,7 +36,7 @@ namespace OpenMS public: /// Constructor - MetaInfoInterface(); + MetaInfoInterface() = default; /// Copy constructor MetaInfoInterface(const MetaInfoInterface& rhs); /// Move constructor From ba72039851e4db4d09a6cf2eb8cc0a5241987a45 Mon Sep 17 00:00:00 2001 From: Timo Sachsenberg Date: Wed, 15 Jan 2025 10:06:30 +0100 Subject: [PATCH 15/34] add some comments --- src/openms/include/OpenMS/IONMOBILITY/IMTypes.h | 6 +++++- src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h index 206dbb183a8..16321be2a99 100644 --- a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h +++ b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h @@ -40,6 +40,10 @@ namespace OpenMS OPENMS_DLLAPI const std::string& toString(const DriftTimeUnit value); /// Different ways to represent ion mobility data in a spectrum + /// Note: + /// 1. MIXED is only used for MSExperiment, not for MSSpectrum + /// 2. UNKNOWN should be used if the format is not yet determined. + /// FileHandler or e.g. IM peak picker should ideally set the format a known value. enum class IMFormat { NONE, ///< no ion mobility @@ -47,7 +51,7 @@ namespace OpenMS MULTIPLE_SPECTRA,///< ion mobility is recorded as multiple spectra per frame (i.e. has one IM annotation per spectrum) MIXED, ///< an MSExperiment contains both CONCATENATED and MULTIPLE_SPECTRA CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array); identical to CONCATENATED in terms of data layout. - UNKNOWN, ///< ion mobility format not yet determined + UNKNOWN, ///< ion mobility format not yet determined. SIZE_OF_IMFORMAT }; /// Names of IMFormat diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index 5d31edbe5b2..bb6b2006133 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -26,6 +26,8 @@ namespace OpenMS // set format to centroided spectrum.setIMFormat(IMFormat::CENTROIDED); break; + case IMFormat::UNKNOWN: + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IMFormat set to UNKNOWN after deterineIMFormat. This should never happen. Please contact the developers.", String(NamesOfIMFormat[(size_t)format])); default: throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown IMFormat", String(NamesOfIMFormat[(size_t)format])); } From b60a9de80e3466ddbb24d05a2ecc9e48349f362c Mon Sep 17 00:00:00 2001 From: Mohammed AlHigaylan Date: Sat, 22 Feb 2025 23:17:52 -0500 Subject: [PATCH 16/34] started to populate peakpickerIM with code --- contrib | 2 +- .../PROCESSING/CENTROIDING/PeakPickerIM.h | 38 +++++-- .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 66 ++++++++++++- .../class_tests/openms/executables.cmake | 1 + .../openms/source/PeakPickerIM_test_2.cpp | 98 +++++++++++++++++++ 5 files changed, 194 insertions(+), 11 deletions(-) create mode 100644 src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp diff --git a/contrib b/contrib index 3cdef5c7c7f..e6fde7cfed8 160000 --- a/contrib +++ b/contrib @@ -1 +1 @@ -Subproject commit 3cdef5c7c7f98032f7d43c59ed642ebe5a1d56b1 +Subproject commit e6fde7cfed8cde73c6625cd493ce3f82e21263cc diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h index faa0a8e7c8a..bd4497a233e 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -2,25 +2,49 @@ // SPDX-License-Identifier: BSD-3-Clause // // -------------------------------------------------------------------------- -// $Author: Timo Sachsenberg $ +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ // $Maintainer: Timo Sachsenberg $ // ------------------------------------------------------------------------------------------------------------------------------------------- #pragma once #include +#include namespace OpenMS { /** @brief Peak picking algorithm for ion mobility data - - @ingroup PeakPicking - */ - class OPENMS_DLLAPI PeakPickerIM + + @ingroup PeakPicking + */ + class OPENMS_DLLAPI PeakPickerIM { - public: - static void pickIMTraces(MSSpectrum& spectrum); + public: + /// Default constructor initializing parameters with default values. + PeakPickerIM(); + + /// Destructor. + virtual ~PeakPickerIM(); + + /// Picks ion mobility traces from the given spectrum. + void pickIMTraces(MSSpectrum& spectrum); + + /// Sets the parameters for peak picking. + void setParameters(const Param& param); + + /// Gets the current parameters. + Param getParameters() const; + + protected: + /// Updates internal member variables when parameters are changed. + void updateMembers_(); + + /// Returns the default parameters. + Param getDefaultParameters() const; + + /// Stores the parameters for peak picking. + Param parameters_; }; } // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index bb6b2006133..2fb45ab4939 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -2,14 +2,61 @@ // SPDX-License-Identifier: BSD-3-Clause // // -------------------------------------------------------------------------- -// $Author: Timo Sachsenberg $ +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ // $Maintainer: Timo Sachsenberg $ // ------------------------------------------------------------------------------------------------------------------------------------------- #include +#include +#include +#include +#include // Updated include +#include namespace OpenMS { + // Constructor: initialize the parameters_ member with default parameters. + PeakPickerIM::PeakPickerIM() : + parameters_(getDefaultParameters()) + { + } + + // Destructor + PeakPickerIM::~PeakPickerIM() + { + } + + // Returns a Param object with the default settings for peak picking. + Param PeakPickerIM::getDefaultParameters() const + { + Param p; + p.setValue("signal_to_noise", 0.0, "Signal to noise threshold for peak picking"); + p.setValue("spacing_difference_gap", 0.0, "The extension of a peak is stopped if the spacing between two subsequent data points exceeds 'spacing_difference_gap * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. '0' to disable the constraint. Not applicable to chromatograms."); + p.setValue("spacing_difference", 0.0, "Maximum allowed difference between points during peak extension, in multiples of the minimal difference between the peak apex and its two neighboring points. If this difference is exceeded a missing point is assumed (see parameter 'missing'). A higher value implies a less stringent peak definition, since individual signals within the peak are allowed to be further apart. '0' to disable the constraint. Not applicable to chromatograms."); + p.setValue("missing", 0, "Maximum number of missing points allowed when extending a peak to the left or to the right. A missing data point occurs if the spacing between two subsequent data points exceeds 'spacing_difference * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. Not applicable to chromatograms."); + p.setValue("signal_to_noise", 0.0, "Signal to noise threshold for peak picking"); + return p; + } + // Update internal members if any parameter changes require it. + void PeakPickerIM::updateMembers_() + { + // For this example, no extra member variables need updating. + // This function is a placeholder for potential future use. + } + + // Sets the parameters and updates internal members accordingly. + void PeakPickerIM::setParameters(const Param& param) + { + parameters_ = param; + updateMembers_(); + } + + // Retrieves the current parameters. + Param PeakPickerIM::getParameters() const + { + return parameters_; + } + void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) { // determine IM format of spectrum @@ -20,14 +67,27 @@ namespace OpenMS return; // no IM data case IMFormat::CENTROIDED: return; // already centroided - case IMFormat::CONCATENATED: + case IMFormat::CONCATENATED: { // TODO call peak picking algorithm for concatenated IM data + std::cout << "Processing concatenated IM data..." << std::endl; + std::cout << "Size of input spectrum..." << spectrum.size() << std::endl; + + PeakPickerHiRes picker; + // Forward the parameters from this object to the underlying picker + picker.setParameters(parameters_); + MSSpectrum picked_spectrum; + + picker.pick(spectrum, picked_spectrum); + std::cout << "Size of picked_spectrum..." << picked_spectrum.size() << std::endl; + + spectrum = picked_spectrum; // set format to centroided spectrum.setIMFormat(IMFormat::CENTROIDED); break; + } case IMFormat::UNKNOWN: - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IMFormat set to UNKNOWN after deterineIMFormat. This should never happen. Please contact the developers.", String(NamesOfIMFormat[(size_t)format])); + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IMFormat set to UNKNOWN after determineIMFormat. This should never happen. Please contact the developers.", String(NamesOfIMFormat[(size_t)format])); default: throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown IMFormat", String(NamesOfIMFormat[(size_t)format])); } diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index ba45be45a0d..69c1973fff1 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -570,6 +570,7 @@ set(transformations_executables_list PeakPickerHiRes_test PeakPickerIterative_test PeakPickerIM_test + PeakPickerIM_test_2 PeakWidthEstimator_test SeedListGenerator_test TraceFitter_test diff --git a/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp b/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp new file mode 100644 index 00000000000..6a996c030b4 --- /dev/null +++ b/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp @@ -0,0 +1,98 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include + +/////////////////////////// +#include +/////////////////////////// + +using namespace OpenMS; +using namespace std; + +START_TEST(PeakPickerIM, "$Id$") + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + +PeakPickerIM* ptr = nullptr; +PeakPickerIM* nullPointer = nullptr; + +START_SECTION((PeakPickerIM())) + ptr = new PeakPickerIM(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((virtual ~PeakPickerIM())) + delete ptr; +END_SECTION + + +// create dummy ion mobility spectrum +MSSpectrum input; +{ + double start_mz = 0.8; + double end_mz = 1.2; + double step = 0.01; + double max_intensity = 1000.0; // Maximum intensity at m/z = 1.0 + + for (double mz = start_mz; mz <= end_mz; mz += step) + { + double intensity = 0.0; + if (mz <= 1.0) + { + // Linearly increase intensity from start_mz to 1.0 + intensity = ((mz - start_mz) / (1.0 - start_mz)) * max_intensity; + } + else + { + // Linearly decrease intensity from 1.0 to end_mz + intensity = ((end_mz - mz) / (end_mz - 1.0)) * max_intensity; + } + input.emplace_back(mz, intensity); + } + // Set the IM format to CONCATENATED to force the peak picking branch + input.setIMFormat(IMFormat::CONCATENATED); +// input.getFloatDataArrays().resize(1); +// input.getFloatDataArrays()[0].setName("Ion Mobility"); +} + + +START_SECTION(void pickIMTraces(MSSpectrum& spectrum)) +{ + PeakPickerIM pp_im; + // print all peaks in our current input. + std::cout << "start printing dummy spectrum BEFORE picking! " << std::endl; + for (const auto& peak : input) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + + pp_im.pickIMTraces(input); + std::cout << "start printing dummy spectrum AFTER picking! " << std::endl; + for (const auto& peak : input) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + + // TODO adapt + TEST_EQUAL(input.size(), 10) + TEST_REAL_SIMILAR(input[0].getIntensity(), 450) + TEST_REAL_SIMILAR(input[0].getMZ(), 100.02) + +// TEST_EQUAL(input.getFloatDataArrays().size(), 1) + // TEST_EQUAL(input.getFloatDataArrays()[0].getName(), "Ion Mobility") + // TEST_REAL_SIMILAR(input.getFloatDataArrays()[0][0], 150.0) +} +END_SECTION + +END_TEST \ No newline at end of file From 48c247b659358b0edf2e2800b5b05d06907d0dd9 Mon Sep 17 00:00:00 2001 From: Mohammed AlHigaylan Date: Sun, 23 Feb 2025 14:59:47 -0500 Subject: [PATCH 17/34] included linear resampler --- .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index 2fb45ab4939..fb6ff2b977e 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -10,7 +10,8 @@ #include #include #include -#include // Updated include +#include +#include #include namespace OpenMS @@ -72,6 +73,26 @@ namespace OpenMS std::cout << "Processing concatenated IM data..." << std::endl; std::cout << "Size of input spectrum..." << spectrum.size() << std::endl; + // initialize resampling + // Set up custom parameters for the resampler: + double sampling_rate = 0.05; + bool ppm = false; + Param resampler_param; + resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); + resampler_param.setValue("ppm", ppm ? "true" : "false", "Whether spacing is in ppm or Th"); + + LinearResamplerAlign lin_resampler; + lin_resampler.setParameters(resampler_param); + + lin_resampler.raster(spectrum); + std::cout << "Size of resampled spectrum: " << spectrum.size() << std::endl; + std::cout << "Resampled spectrum printing...: " << std::endl; + for (const auto& peak : spectrum) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + PeakPickerHiRes picker; // Forward the parameters from this object to the underlying picker picker.setParameters(parameters_); From ee1f326c712895f95a0d9a6b3f7697ce85eb9047 Mon Sep 17 00:00:00 2001 From: Mohammed AlHigaylan Date: Thu, 6 Mar 2025 16:16:42 -0500 Subject: [PATCH 18/34] added topp tool to feed in real mobilograms --- .../PROCESSING/CENTROIDING/PeakPickerIM.h | 6 +- .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 157 +++++++++++++----- src/topp/PeakPickerIM.cpp | 69 ++++++++ src/topp/executables.cmake | 1 + 4 files changed, 187 insertions(+), 46 deletions(-) create mode 100644 src/topp/PeakPickerIM.cpp diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h index bd4497a233e..dfbc2380060 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -18,7 +18,7 @@ namespace OpenMS @ingroup PeakPicking */ - class OPENMS_DLLAPI PeakPickerIM + class OPENMS_DLLAPI PeakPickerIM { public: /// Default constructor initializing parameters with default values. @@ -45,6 +45,10 @@ namespace OpenMS /// Stores the parameters for peak picking. Param parameters_; + + private: + /// determine sampling rate for linear resampler + double computeOptimalSamplingRate(const MSSpectrum& spectrum); }; } // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index fb6ff2b977e..59b52afc7b2 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -16,7 +16,70 @@ namespace OpenMS { - // Constructor: initialize the parameters_ member with default parameters. + double PeakPickerIM::computeOptimalSamplingRate(const MSSpectrum& spectrum) + { + if (spectrum.size() < 2) + { + std::cerr << "Warning: Spectrum has too few points for resampling. Using fixed sampling rate of 0.001 1/k" << std::endl; + return 0.001; // Default fallback value + } + + std::vector mz_diffs; + mz_diffs.reserve(spectrum.size() - 1); + + for (size_t i = 1; i < spectrum.size(); ++i) + { + mz_diffs.push_back(spectrum[i].getMZ() - spectrum[i - 1].getMZ()); + } + + // A mobilogram may have two peaks spaced far apart but in the 'm/z' array, + // the peaks are adjacent to each other and will result in a big mz_dff. + // Take the 75% percentile to remove large m/z gaps. + std::vector filtered_mz_diffs = mz_diffs; + std::sort(filtered_mz_diffs.begin(), filtered_mz_diffs.end()); + double threshold = filtered_mz_diffs[filtered_mz_diffs.size() * 0.75]; + std::cerr << "75% percentile of ion mobility difference is determined to be... " << threshold << std::endl; + + std::vector small_mz_diffs; + for (double diff : mz_diffs) + { + if (diff <= threshold) + { + small_mz_diffs.push_back(diff); + } + } + + if (small_mz_diffs.empty()) + { + std::cerr << "Warning: No valid small m/z differences found. Using default sampling rate. Using fixed sampling rate of 0.001 1/k" << std::endl; + return 0.001; // Default fallback value + } + + // Step 2: Compute mode (most common spacing) + std::map freq_map; + for (double diff : small_mz_diffs) + { + freq_map[diff]++; + } + + double mode_sampling_rate = small_mz_diffs.front(); // Default to first value + int max_count = 0; + for (const auto& [diff, count] : freq_map) + { + if (count > max_count) + { + mode_sampling_rate = diff; + max_count = count; + } + } + + // Compute final sampling rate + double sampling_rate = mode_sampling_rate; + std::cout << "Computed sampling rate: " << sampling_rate << std::endl; + + return sampling_rate; + } + PeakPickerIM::PeakPickerIM() : parameters_(getDefaultParameters()) { @@ -59,8 +122,12 @@ namespace OpenMS } void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) - { - // determine IM format of spectrum + { + // Debugging: Print the input spectrum size + std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; + + /* + // IM format determination (Temporarily commented out) IMFormat format = IMTypes::determineIMFormat(spectrum); switch (format) { @@ -68,50 +135,50 @@ namespace OpenMS return; // no IM data case IMFormat::CENTROIDED: return; // already centroided - case IMFormat::CONCATENATED: { - // TODO call peak picking algorithm for concatenated IM data - std::cout << "Processing concatenated IM data..." << std::endl; - std::cout << "Size of input spectrum..." << spectrum.size() << std::endl; - - // initialize resampling - // Set up custom parameters for the resampler: - double sampling_rate = 0.05; - bool ppm = false; - Param resampler_param; - resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); - resampler_param.setValue("ppm", ppm ? "true" : "false", "Whether spacing is in ppm or Th"); - - LinearResamplerAlign lin_resampler; - lin_resampler.setParameters(resampler_param); - - lin_resampler.raster(spectrum); - std::cout << "Size of resampled spectrum: " << spectrum.size() << std::endl; - std::cout << "Resampled spectrum printing...: " << std::endl; - for (const auto& peak : spectrum) - { - std::cout << "m/z: " << peak.getMZ() - << ", intensity: " << peak.getIntensity() << std::endl; - } - - PeakPickerHiRes picker; - // Forward the parameters from this object to the underlying picker - picker.setParameters(parameters_); - MSSpectrum picked_spectrum; - - picker.pick(spectrum, picked_spectrum); - std::cout << "Size of picked_spectrum..." << picked_spectrum.size() << std::endl; - - spectrum = picked_spectrum; - - // set format to centroided - spectrum.setIMFormat(IMFormat::CENTROIDED); - break; - } case IMFormat::UNKNOWN: - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "IMFormat set to UNKNOWN after determineIMFormat. This should never happen. Please contact the developers.", String(NamesOfIMFormat[(size_t)format])); - default: - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Unknown IMFormat", String(NamesOfIMFormat[(size_t)format])); + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "IMFormat set to UNKNOWN after determineIMFormat. This should never happen.", + String(NamesOfIMFormat[(size_t)format])); } + */ + + // Initialize resampling + double sampling_rate = computeOptimalSamplingRate(spectrum) * 4; + std::cout << "Using sampling rate: " << sampling_rate << std::endl; + + // Set up custom parameters for the resampler + bool ppm = false; + Param resampler_param; + resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); + resampler_param.setValue("ppm", ppm ? "true" : "false", "Whether spacing is in ppm or Th"); + + LinearResamplerAlign lin_resampler; + lin_resampler.setParameters(resampler_param); + + lin_resampler.raster(spectrum); + std::cout << "Size of resampled spectrum: " << spectrum.size() << std::endl; + + // Print resampled peaks for debugging + for (const auto& peak : spectrum) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + + // Apply PeakPickerHiRes + PeakPickerHiRes picker; + picker.setParameters(parameters_); + MSSpectrum picked_spectrum; + + picker.pick(spectrum, picked_spectrum); + std::cout << "Size of picked spectrum: " << picked_spectrum.size() << std::endl; + + // Replace original spectrum with processed version + spectrum = picked_spectrum; + + // Temporarily skipping IM format setting since we commented out the check + // spectrum.setIMFormat(IMFormat::CENTROIDED); } + } // namespace OpenMS \ No newline at end of file diff --git a/src/topp/PeakPickerIM.cpp b/src/topp/PeakPickerIM.cpp new file mode 100644 index 00000000000..41092b802d5 --- /dev/null +++ b/src/topp/PeakPickerIM.cpp @@ -0,0 +1,69 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include + +using namespace OpenMS; +using namespace std; + +class TOPPPeakPickerIM : public TOPPBase +{ +public: + TOPPPeakPickerIM() : TOPPBase("PeakPickerIM", "Applies PeakPickerIM to an mzML file", false) {} + +protected: + void registerOptionsAndFlags_() override + { + registerInputFile_("in", "", "", "Input mzML file"); + setValidFormats_("in", ListUtils::create("mzML")); + + registerOutputFile_("out", "", "", "Output mzML file"); + setValidFormats_("out", ListUtils::create("mzML")); + } + + ExitCodes main_(int, const char**) override + { + // Get input and output file paths + String input_file = getStringOption_("in"); + String output_file = getStringOption_("out"); + + // Load input mzML file + PeakMap exp; + MzMLFile mzml; + mzml.load(input_file, exp); + + // Process each spectrum with PeakPickerIM + PeakPickerIM picker; + PeakMap processed_exp; + for (MSSpectrum& spectrum : exp) + { + std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; + picker.pickIMTraces(spectrum); + processed_exp.addSpectrum(spectrum); + std::cout << "Processed spectrum has " << spectrum.size() << " peaks." << std::endl; + } +// processed_exp.setExperimentalSettings(exp); + // Save output mzML file + mzml.store(output_file, processed_exp); + + return EXECUTION_OK; + } +}; + +int main(int argc, const char** argv) +{ + TOPPPeakPickerIM tool; + return tool.main(argc, argv); +} + diff --git a/src/topp/executables.cmake b/src/topp/executables.cmake index a56d472c322..2d561b7d2b7 100644 --- a/src/topp/executables.cmake +++ b/src/topp/executables.cmake @@ -100,6 +100,7 @@ OpenSwathFeatureXMLToTSV OpenSwathRTNormalizer PeakPickerHiRes PeakPickerIterative +PeakPickerIM PeptideIndexer PercolatorAdapter PhosphoScoring From 6cb5d4335164500c5c49bd2787c2cb074808dae8 Mon Sep 17 00:00:00 2001 From: Mohammed AlHigaylan Date: Sat, 22 Mar 2025 07:20:45 +0300 Subject: [PATCH 19/34] PeakPickerIM basic skeleton is complete. It accepts raw TimsTOF frame and output centroided mz and im peaks --- .../PROCESSING/CENTROIDING/PeakPickerIM.h | 19 +- .../PROCESSING/CENTROIDING/PeakPickerIM.cpp | 753 ++++++++++++++++-- src/topp/PeakPickerIM.cpp | 4 +- 3 files changed, 703 insertions(+), 73 deletions(-) diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h index dfbc2380060..79e40114563 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -48,7 +48,24 @@ namespace OpenMS private: /// determine sampling rate for linear resampler - double computeOptimalSamplingRate(const MSSpectrum& spectrum); + double computeOptimalSamplingRate(const std::vector& spectra); + + /// Sum up the intensity of data points with nearly identical float values + MSSpectrum SumFrame(const MSSpectrum& spectrum, double ppm_tolerance = 0.01); + + /// Compute lower and upper m/z bounds based on ppm + std::pair ppmBounds(double mz, double ppm); + + /// Extract ion mobility traces as MSSpectra from the raw TimsTOF frame + /// Ion mobility is temporarily written in place of m/z inside Peak1D object. + /// raw m/z values are allocated to float data arrays with the label 'raw_mz' + std::vector extractIonMobilityTraces( + const MSSpectrum& picked_spectrum, + const MSSpectrum& raw_spectrum); + + /// compute m/z and ion mobility centers for picked traces. Returns centroided spectrum. + MSSpectrum ComputeCenters(const std::vector& mobilogram_traces, + const std::vector& picked_traces); }; } // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp index 59b52afc7b2..d876e052fbe 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -8,38 +8,61 @@ #include #include +#include +#include #include #include #include #include +#include +#include #include namespace OpenMS { - double PeakPickerIM::computeOptimalSamplingRate(const MSSpectrum& spectrum) + double PeakPickerIM::computeOptimalSamplingRate(const std::vector& spectra) { - if (spectrum.size() < 2) + std::vector mz_diffs; + + for (size_t s = 0; s < spectra.size(); ++s) { - std::cerr << "Warning: Spectrum has too few points for resampling. Using fixed sampling rate of 0.001 1/k" << std::endl; - return 0.001; // Default fallback value - } + const MSSpectrum& spectrum = spectra[s]; + // The spectrum could have multiple peaks at the same x position. + // Sum the peak intensity + MSSpectrum summed_trace = SumFrame(spectrum, 7000.0); - std::vector mz_diffs; - mz_diffs.reserve(spectrum.size() - 1); + if (summed_trace.size() < 20) + { + std::cerr << "Skipping trace " << s << " because it has too few points (" + << summed_trace.size() << ")." << std::endl; + continue; // skip this spectrum + } + + for (size_t i = 1; i < summed_trace.size(); ++i) + { + double diff = summed_trace[i].getMZ() - summed_trace[i - 1].getMZ(); + mz_diffs.push_back(diff); + } + } - for (size_t i = 1; i < spectrum.size(); ++i) + // If we found no valid m/z differences + if (mz_diffs.empty()) { - mz_diffs.push_back(spectrum[i].getMZ() - spectrum[i - 1].getMZ()); + std::cerr << "Warning: No valid m/z differences found in any spectra. Using default sampling rate of 0.01" << std::endl; + return 0.01; // Fallback value } - // A mobilogram may have two peaks spaced far apart but in the 'm/z' array, - // the peaks are adjacent to each other and will result in a big mz_dff. - // Take the 75% percentile to remove large m/z gaps. - std::vector filtered_mz_diffs = mz_diffs; - std::sort(filtered_mz_diffs.begin(), filtered_mz_diffs.end()); - double threshold = filtered_mz_diffs[filtered_mz_diffs.size() * 0.75]; - std::cerr << "75% percentile of ion mobility difference is determined to be... " << threshold << std::endl; + // Sort the differences to compute the 75th percentile threshold + // This is needed in case there is a gap in the mobilogram. i+1 peak will skew the computed + // sampling rate. + std::sort(mz_diffs.begin(), mz_diffs.end()); + + size_t percentile_index = static_cast(mz_diffs.size() * 0.75); + double threshold = mz_diffs[percentile_index]; + std::cerr << "75th percentile of position differences is: " << threshold << std::endl; + + // Filter out large differences (keep diffs <= threshold) std::vector small_mz_diffs; for (double diff : mz_diffs) { @@ -51,19 +74,20 @@ namespace OpenMS if (small_mz_diffs.empty()) { - std::cerr << "Warning: No valid small m/z differences found. Using default sampling rate. Using fixed sampling rate of 0.001 1/k" << std::endl; - return 0.001; // Default fallback value + std::cerr << "Warning: No valid small m/z differences found after filtering. Using default sampling rate of 0.01" << std::endl; + return 0.01; } - // Step 2: Compute mode (most common spacing) + // Compute the mode std::map freq_map; for (double diff : small_mz_diffs) { freq_map[diff]++; } - double mode_sampling_rate = small_mz_diffs.front(); // Default to first value + double mode_sampling_rate = small_mz_diffs.front(); int max_count = 0; + for (const auto& [diff, count] : freq_map) { if (count > max_count) @@ -73,13 +97,476 @@ namespace OpenMS } } - // Compute final sampling rate - double sampling_rate = mode_sampling_rate; - std::cout << "Computed sampling rate: " << sampling_rate << std::endl; + std::cout << "Computed optimal sampling rate: " << mode_sampling_rate << std::endl; + + return mode_sampling_rate; + } + + // Function to compute the lower and upper m/z bounds based on ppm tolerance + std::pair PeakPickerIM::ppmBounds(double mz, double ppm) + { + ppm = ppm / 1e6; + double delta_mz = (ppm * mz) / 2.0; + + double low = mz - delta_mz; + double high = mz + delta_mz; + + return std::make_pair(low, high); + } + + // This function sums peaks if they are nearly identical + // OpenMS represents TimsTOF data in MSSpectrum() objects as one-array. + // There could be multiple 500.0 m/z peaks with different ion mobility values. + // Peak picking (such as HiRes) will not work properly if there are multiple y measurements at a given x m/z position. + MSSpectrum PeakPickerIM::SumFrame(const MSSpectrum& input_spectrum, double ppm_tolerance) + { + MSSpectrum export_spectrum; + + if (input_spectrum.empty()) return export_spectrum; + + MSSpectrum spectrum = input_spectrum; + + if (!spectrum.isSorted()) + { + std::cout << "Spectrum not sorted by m/z, sorting now..." << std::endl; + spectrum.sortByPosition(); + } + + double current_mz = spectrum[0].getMZ(); + double current_intensity = spectrum[0].getIntensity(); - return sampling_rate; + for (Size i = 1; i < spectrum.size(); ++i) + { + double next_mz = spectrum[i].getMZ(); + double next_intensity = spectrum[i].getIntensity(); + + double delta_mz = std::abs(next_mz - current_mz); + double ppm_diff = (delta_mz / current_mz) * 1e6; + + if (ppm_diff <= ppm_tolerance) + { + current_intensity += next_intensity; + } + else + { + Peak1D peak; + peak.setMZ(current_mz); + peak.setIntensity(current_intensity); + export_spectrum.push_back(peak); + + current_mz = next_mz; + current_intensity = next_intensity; + } + } + Peak1D last_peak; + last_peak.setMZ(current_mz); + last_peak.setIntensity(current_intensity); + export_spectrum.push_back(last_peak); + + return export_spectrum; } + // We use peak FWHM (from PeakPickerHiRes) to extract ion mobility traces. + // Given a picked m/z peak, we write a temporary MSSpectrum() object with ion mobility measurements + // in place of m/z in Peak1D object. This facilitates peak picking in the ion mobility dimension. + // To enable recomputing of m/z center after ion mobility peak picking, we tack raw m/z peak values + // in FloatDataArrays(). + + std::vector PeakPickerIM::extractIonMobilityTraces( + const MSSpectrum& picked_spectrum, + const MSSpectrum& raw_spectrum) + { + const auto& float_data_arrays = picked_spectrum.getFloatDataArrays(); + + // Find FWHM array in picked_spectrum + const MSSpectrum::FloatDataArray* fwhm_array = nullptr; + + for (const auto& array : float_data_arrays) + { + if (array.getName() == "FWHM_ppm") + { + fwhm_array = &array; + break; + } + } + + if (!fwhm_array) + { + std::cerr << "FWHM data array not found!" << std::endl; + return {}; + } + + if (fwhm_array->size() != picked_spectrum.size()) + { + std::cerr << "Size mismatch between FWHM array and picked peaks!" << std::endl; + return {}; + } + + // Get the Ion Mobility array from raw_spectrum + const auto& raw_float_data_arrays = raw_spectrum.getFloatDataArrays(); + const MSSpectrum::FloatDataArray* ion_mobility_array = nullptr; + + for (const auto& array : raw_float_data_arrays) + { + if (array.getName() == "Ion Mobility") + { + ion_mobility_array = &array; + break; + } + } + + if (!ion_mobility_array) + { + std::cerr << "Ion Mobility data array not found in raw_spectrum!" << std::endl; + return {}; + } + + // Vector of MSSpectra for each picked m/z peak (each spectrum is a mobilogram trace) + std::vector mobility_traces; + + for (size_t i = 0; i < picked_spectrum.size(); ++i) + { + double picked_mz = picked_spectrum[i].getMZ(); + double fwhm_ppm = (*fwhm_array)[i]; + + auto bounds = ppmBounds(picked_mz, fwhm_ppm); + double lower_bound = bounds.first; + double upper_bound = bounds.second; + + SignedSize center_idx = raw_spectrum.findNearest(picked_mz); + + if (center_idx == -1) + { + std::cerr << "No raw peaks found near picked m/z: " << picked_mz << std::endl; + mobility_traces.push_back(MSSpectrum()); + continue; + } + + MSSpectrum trace_spectrum; // A single mobilogram trace + // Prepare FloatDataArray to store raw m/z values + MSSpectrum::FloatDataArray raw_mz_array; + raw_mz_array.setName("raw_mz"); + + // Expand left + SignedSize left_idx = center_idx; + while (left_idx >= 0 && raw_spectrum[left_idx].getMZ() >= lower_bound) + { + Peak1D p; + p.setMZ((*ion_mobility_array)[left_idx]); // Ion Mobility as m/z + p.setIntensity(raw_spectrum[left_idx].getIntensity()); + + trace_spectrum.push_back(p); + + // Store the raw m/z + raw_mz_array.push_back(raw_spectrum[left_idx].getMZ()); + + --left_idx; + } + + // Expand right + SignedSize right_idx = center_idx + 1; + while (right_idx < static_cast(raw_spectrum.size()) && + raw_spectrum[right_idx].getMZ() <= upper_bound) + { + Peak1D p; + p.setMZ((*ion_mobility_array)[right_idx]); // Ion Mobility as m/z + p.setIntensity(raw_spectrum[right_idx].getIntensity()); + + trace_spectrum.push_back(p); + + // Store the raw m/z data in floatDataArrays() + raw_mz_array.push_back(raw_spectrum[right_idx].getMZ()); + + ++right_idx; + } + + // Attach the raw m/z array to trace_spectrum + auto& trace_float_arrays = trace_spectrum.getFloatDataArrays(); + trace_float_arrays.push_back(std::move(raw_mz_array)); + + // Sort the trace_spectrum by ion mobility (m/z), while keeping raw m/z aligned + trace_spectrum.sortByPosition(); + + mobility_traces.push_back(std::move(trace_spectrum)); + } + + return mobility_traces; + } + + // Function to compute m/z centers from mobilogram_traces and picked_traces + MSSpectrum PeakPickerIM::ComputeCenters(const std::vector& mobilogram_traces, + const std::vector& picked_traces) + { + MSSpectrum centroided_frame; + + // Create float data arrays to house ion mobility data and peaks FWHM + MSSpectrum::FloatDataArray ion_mobility_array; + ion_mobility_array.setName("Ion Mobility"); + + MSSpectrum::FloatDataArray ion_mobility_fwhm; + ion_mobility_fwhm.setName("IM FWHM"); + + MSSpectrum::FloatDataArray mz_fwhm_array; + mz_fwhm_array.setName("MZ FWHM"); + + // debug + std::cout << "picked_traces.size(): " << picked_traces.size() << std::endl; + + // Loop over picked traces and their corresponding raw mobilogram traces + for (size_t i = 0; i < picked_traces.size(); ++i) + { + std::cout << "Looping through picked_trace that has .. " << picked_traces[i].size() << std::endl; + const MSSpectrum& picked_trace = picked_traces[i]; + const MSSpectrum& raw_trace = mobilogram_traces[i]; + + const auto& picked_float_arrays = picked_trace.getFloatDataArrays(); + + if (picked_float_arrays.empty()) + { + std::cerr << "No IM FWHM array found for picked_trace " << i << "!" << std::endl; + continue; + } + + // Assuming the first FloatDataArray holds the ion mobility peak FWHM values + const auto& fwhm_array = picked_float_arrays[0]; + + if (fwhm_array.size() != picked_trace.size()) + { + std::cerr << "FWHM array size mismatch with picked_trace size!" << std::endl; + continue; + } + + // Get the FloatDataArrays from raw_trace (assumed to hold the raw m/z values) + const auto& raw_float_arrays = raw_trace.getFloatDataArrays(); + + if (raw_float_arrays.empty()) + { + std::cerr << "No raw m/z peaks found for raw_trace " << i << "!" << std::endl; + continue; + } + + // Assume the first array holds the raw m/z values + const auto& raw_mz_values = raw_float_arrays[0]; + + if (raw_mz_values.size() != raw_trace.size()) + { + std::cerr << "raw_mz_values size mismatch with raw_trace size!" << std::endl; + continue; + } + + std::cout << "\n--- Processing picked_trace " << i << " ---\n"; + + // Iterate through picked peaks in this trace + for (Size j = 0; j < picked_trace.size(); ++j) + { + double centroid_im = picked_trace[j].getMZ(); // Ion mobility centroid (stored as m/z) + double fwhm = fwhm_array[j]; + + + double im_lower = centroid_im - (fwhm / 2.0); + double im_upper = centroid_im + (fwhm / 2.0); + + std::cout << "Picked peak " << j << " IM centroid: " << centroid_im + << " ion mobility FWHM: " << fwhm + << " --> IM bounds: [" << im_lower << ", " << im_upper << "]" << std::endl; + + // Use findNearest() to get the index of the closest peak in the raw mobilogram trace + SignedSize center_idx = raw_trace.findNearest(centroid_im); + + if (center_idx == -1) + { + std::cerr << "Could not find nearest peak to centroid_im in raw_trace!" << std::endl; + continue; + } + + MSSpectrum raw_peaks_within_bounds; + + // --- Expand Left --- + SignedSize left_idx = center_idx; + while (left_idx >= 0 && raw_trace[left_idx].getMZ() >= im_lower) + { + Peak1D new_peak; + new_peak.setMZ(raw_mz_values[left_idx]); // m/z from FloatDataArray + new_peak.setIntensity(raw_trace[left_idx].getIntensity()); // intensity from raw_trace + raw_peaks_within_bounds.push_back(new_peak); + + --left_idx; + } + + // --- Expand Right --- + SignedSize right_idx = center_idx + 1; + while (right_idx < static_cast(raw_trace.size()) && + raw_trace[right_idx].getMZ() <= im_upper) + { + Peak1D new_peak; + new_peak.setMZ(raw_mz_values[right_idx]); + new_peak.setIntensity(raw_trace[right_idx].getIntensity()); + raw_peaks_within_bounds.push_back(new_peak); + + ++right_idx; + } + + std::cout << "Picked IM peak " << j << ": collected " << raw_peaks_within_bounds.size() + << " raw m/z points between IM [" << im_lower << ", " << im_upper << "]" << std::endl; + + // If we only retrieved one raw peak, pass it over to centroided_frame as is + // Resampling and smoothing the raw data distorts the intensity values. + // We recompute the m/z peak maxima and intensity using spline + if (raw_peaks_within_bounds.size() == 1) + { + const Peak1D& single_peak = raw_peaks_within_bounds.front(); + + // Add it directly to centroided_frame + centroided_frame.push_back(single_peak); + + // Push corresponding ion mobility and FWHM arrays + ion_mobility_array.push_back(centroid_im); + ion_mobility_fwhm.push_back(fwhm); + mz_fwhm_array.push_back(0.0); + + std::cout << "[INFO] Only one raw peak found. Added directly to centroided_frame. m/z: " + << single_peak.getMZ() << " intensity: " << single_peak.getIntensity() << std::endl; + + // Skip the rest of the loop and move on to the next picked_trace peak + continue; + } + + + + raw_peaks_within_bounds.sortByPosition(); + + MSSpectrum raw_mz_peaks = SumFrame(raw_peaks_within_bounds, 0.1); + // Prepare data for spline + std::map peak_raw_data; + + for (const auto& peak : raw_mz_peaks) + { + peak_raw_data[peak.getMZ()] = peak.getIntensity(); + } + if (peak_raw_data.empty()) + { + std::cerr << "No data in raw_mz_peaks for picked IM peak " << j << "!" << std::endl; + continue; + } + + // Initialize spline + CubicSpline2d spline(peak_raw_data); + + // Define boundaries + const double left_bound = peak_raw_data.begin()->first; + const double right_bound = peak_raw_data.rbegin()->first; + + // Find maximum via spline bisection + double apex_mz = (left_bound + right_bound) / 2.0; + double apex_intensity = 0.0; + + const double max_search_threshold = 1e-6; + + Math::spline_bisection(spline, left_bound, right_bound, apex_mz, apex_intensity, max_search_threshold); + + std::cout << "Apex m/z: " << apex_mz << std::endl; + std::cout << "Apex intensity: " << apex_intensity << std::endl; + + // FWHM calculation (same binary search as before) + double half_height = apex_intensity / 2.0; + const double fwhm_search_threshold = 0.01 * half_height; + + // ---- Left side search ---- + double mz_left = left_bound; + double mz_center = apex_mz; + double int_mid = 0.0; + double mz_mid = mz_left; + + if (spline.eval(mz_left) > half_height) + { + mz_mid = mz_left; + } + else + { + do + { + mz_mid = (mz_left + mz_center) / 2.0; + int_mid = spline.eval(mz_mid); + + if (int_mid < half_height) + { + mz_left = mz_mid; + } + else + { + mz_center = mz_mid; + } + + } while (std::fabs(int_mid - half_height) > fwhm_search_threshold); + } + double fwhm_left_mz = mz_mid; + + // ---- Right side search ---- + double mz_right = right_bound; + mz_center = apex_mz; + + if (spline.eval(mz_right) > half_height) + { + mz_mid = mz_right; + } + else + { + do + { + mz_mid = (mz_right + mz_center) / 2.0; + int_mid = spline.eval(mz_mid); + + if (int_mid < half_height) + { + mz_right = mz_mid; + } + else + { + mz_center = mz_mid; + } + + } while (std::fabs(int_mid - half_height) > fwhm_search_threshold); + } + double fwhm_right_mz = mz_mid; + + // ---- FWHM result ---- + double mz_fwhm = fwhm_right_mz - fwhm_left_mz; + + std::cout << "Left m/z at half height: " << fwhm_left_mz << std::endl; + std::cout << "Right m/z at half height: " << fwhm_right_mz << std::endl; + std::cout << "m/z FWHM: " << mz_fwhm << std::endl; + + + Peak1D centroided_peak; + centroided_peak.setMZ(apex_mz); + centroided_peak.setIntensity(apex_intensity); + + centroided_frame.push_back(centroided_peak); + ion_mobility_array.push_back(centroid_im); + ion_mobility_fwhm.push_back(fwhm); + mz_fwhm_array.push_back(mz_fwhm); + } + + std::cout << "--- Finished processing picked_trace " << i << " ---\n\n"; + } + + auto& centroided_frame_fda = centroided_frame.getFloatDataArrays(); + centroided_frame_fda.push_back(std::move(ion_mobility_array)); + centroided_frame_fda.push_back(std::move(ion_mobility_fwhm)); + centroided_frame_fda.push_back(std::move(mz_fwhm_array)); + + centroided_frame.sortByPosition(); + + std::cout << "Printing centroided_frame inside ComputerCenters function " << std::endl; + for (const auto& peak : centroided_frame) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + return centroided_frame; + } + + PeakPickerIM::PeakPickerIM() : parameters_(getDefaultParameters()) { @@ -90,7 +577,6 @@ namespace OpenMS { } - // Returns a Param object with the default settings for peak picking. Param PeakPickerIM::getDefaultParameters() const { Param p; @@ -98,24 +584,21 @@ namespace OpenMS p.setValue("spacing_difference_gap", 0.0, "The extension of a peak is stopped if the spacing between two subsequent data points exceeds 'spacing_difference_gap * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. '0' to disable the constraint. Not applicable to chromatograms."); p.setValue("spacing_difference", 0.0, "Maximum allowed difference between points during peak extension, in multiples of the minimal difference between the peak apex and its two neighboring points. If this difference is exceeded a missing point is assumed (see parameter 'missing'). A higher value implies a less stringent peak definition, since individual signals within the peak are allowed to be further apart. '0' to disable the constraint. Not applicable to chromatograms."); p.setValue("missing", 0, "Maximum number of missing points allowed when extending a peak to the left or to the right. A missing data point occurs if the spacing between two subsequent data points exceeds 'spacing_difference * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. Not applicable to chromatograms."); - p.setValue("signal_to_noise", 0.0, "Signal to noise threshold for peak picking"); + p.setValue("report_FWHM", "true"); + p.setValue("report_FWHM_unit", "absolute"); return p; } - // Update internal members if any parameter changes require it. void PeakPickerIM::updateMembers_() { - // For this example, no extra member variables need updating. // This function is a placeholder for potential future use. } - // Sets the parameters and updates internal members accordingly. void PeakPickerIM::setParameters(const Param& param) { parameters_ = param; updateMembers_(); } - // Retrieves the current parameters. Param PeakPickerIM::getParameters() const { return parameters_; @@ -123,62 +606,192 @@ namespace OpenMS void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) { - // Debugging: Print the input spectrum size - std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; + // Debugging: Print the input spectrum size + std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; - /* - // IM format determination (Temporarily commented out) - IMFormat format = IMTypes::determineIMFormat(spectrum); - switch (format) - { - case IMFormat::NONE: - return; // no IM data - case IMFormat::CENTROIDED: - return; // already centroided - case IMFormat::UNKNOWN: - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, - "IMFormat set to UNKNOWN after determineIMFormat. This should never happen.", - String(NamesOfIMFormat[(size_t)format])); - } - */ + /* + // IM format determination (Temporarily commented out) + IMFormat format = IMTypes::determineIMFormat(spectrum); + switch (format) + { + case IMFormat::NONE: + return; // no IM data + case IMFormat::CENTROIDED: + return; // already centroided + case IMFormat::UNKNOWN: + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "IMFormat set to UNKNOWN after determineIMFormat. This should never happen.", + String(NamesOfIMFormat[(size_t)format])); + } + */ + + + // --- Step 1a: Sum m/z peaks + // First, we project all timsTOF peaks into the m/z axis using SumFrame + // The ppm tolerance is a dynamic way of testing m/z floats being almost identical. Set it to 0.1 ppm + MSSpectrum summed_spectrum = SumFrame(spectrum, 0.1); + std::cout << "Spectrum after SumFrame has " << summed_spectrum.size() << " peaks." << std::endl; + + // --- step 2a: smooth the data projected to the m/z axis. + std::cout << "Applying Gaussian smoothing..." << std::endl; + GaussFilter gauss_filter; + + // Set Gaussian filter parameters. 5 ppm m/z is good approximation (make this a user parameter!) + Param gauss_params; + gauss_params.setValue("ppm_tolerance", 5.0); + gauss_params.setValue("use_ppm_tolerance", "true"); + + gauss_filter.setParameters(gauss_params); + gauss_filter.filter(summed_spectrum); + std::cout << "Spectrum after Gaussian smoothing has " << summed_spectrum.size() << " peaks." << std::endl; + + for (const auto& peak : summed_spectrum) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + // ---step 3a: Apply PeakPickerHiRes and make sure the peak FWHM is reported in ppm. + // (Maybe this should change to absolute FWHM value...?). + PeakPickerHiRes picker; + Param hirez_mz_p; + hirez_mz_p.setValue("signal_to_noise", 0.0); + hirez_mz_p.setValue("report_FWHM", "true"); + hirez_mz_p.setValue("report_FWHM_unit", "relative"); + + picker.setParameters(hirez_mz_p); + MSSpectrum picked_spectrum; + + picker.pick(summed_spectrum, picked_spectrum); + std::cout << "Size of picked spectrum: " << picked_spectrum.size() << std::endl; + + + // ---step 4a: Extract ion mobility traces for each picked m/z peak + auto mobilogram_traces = extractIonMobilityTraces(picked_spectrum, spectrum); - // Initialize resampling - double sampling_rate = computeOptimalSamplingRate(spectrum) * 4; - std::cout << "Using sampling rate: " << sampling_rate << std::endl; + // --- compute optimal sampling rate from well-populated mobilograms in this frame. + // This is currently set to +20 peaks in a mobilogram. (Should this be a user parameter?) - // Set up custom parameters for the resampler - bool ppm = false; - Param resampler_param; - resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); - resampler_param.setValue("ppm", ppm ? "true" : "false", "Whether spacing is in ppm or Th"); + // Add a parameter to allow user to control sampling). Here we simply multiply by 4. + double sampling_rate = computeOptimalSamplingRate(mobilogram_traces) * 4; + Param resampler_param; + resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); + resampler_param.setValue("ppm", "false", "Whether spacing is in ppm or Th"); + + std::cout << "Using sampling rate... : " << sampling_rate << std::endl; + + + + // *************** PART II *************** + + // for each ion mobility trace, we process the raw signal, peak pick + // and recompute m/z and ion mobility centroid. + for (size_t i = 0; i < mobilogram_traces.size(); ++i) + { + std::cout << "Trace " << i << " contains " << mobilogram_traces[i].size() << " points in ion mobility space." << std::endl; + } + std::vector picked_traces; + + for (size_t i = 0; i < mobilogram_traces.size(); ++i) + { + MSSpectrum& trace = mobilogram_traces[i]; + + std::cout << "\n--- Processing Trace " << i << " ---\n"; + std::cout << "Original trace has " << trace.size() << " peaks." << std::endl; + + // --- Step 1b: Sum peaks that are too close --- + MSSpectrum summed_trace = SumFrame(trace, 7000.0); // 7000 ppm tolerance (temporary. Change function to accept absolute number) + std::cout << "Trace after SumFrame has " << summed_trace.size() << " peaks." << std::endl; + + // Determine im boundaries of current mobilogram. Add 10 padding points (should this be a parameter?) + // If you do not pad the edges, peaks on the edge will have an odd shape and not be picked by PeakPickerHiRes! + double im_start = summed_trace.front().getMZ(); + double im_end = summed_trace.back().getMZ(); + + std::cout << "Original summed trace ion mobility range: [" << im_start << ", " << im_end << "]" << std::endl; + + Peak1D front_padding; + front_padding.setMZ(im_start - 10 * sampling_rate); + front_padding.setIntensity(0.0); + summed_trace.insert(summed_trace.begin(), front_padding); + + Peak1D back_padding; + back_padding.setMZ(im_end + 10 * sampling_rate); + back_padding.setIntensity(0.0); + summed_trace.push_back(back_padding); + + std::cout << "Padded summed trace im range: [" << summed_trace.front().getMZ() << ", " << summed_trace.back().getMZ() << "]" << std::endl; + + // --- Step 2b: Resample the trace --- LinearResamplerAlign lin_resampler; lin_resampler.setParameters(resampler_param); - lin_resampler.raster(spectrum); - std::cout << "Size of resampled spectrum: " << spectrum.size() << std::endl; + lin_resampler.raster(summed_trace); + std::cout << "Size of resampled trace: " << summed_trace.size() << " peaks." << std::endl; + + // --- Step 3b: Apply Gaussian Smoothing --- + /* + + std::cout << "Applying Gaussian smoothing..." << std::endl; + + GaussFilter gauss_filter; + Param gauss_params; + gauss_params.setValue("gaussian_width", 0.005); + gauss_params.setValue("use_ppm_tolerance", "false"); // or "true" for ppm-based width - // Print resampled peaks for debugging - for (const auto& peak : spectrum) + gauss_filter.setParameters(gauss_params); + gauss_filter.filter(summed_trace); + + std::cout << "Trace after Gaussian smoothing has " << summed_trace.size() << " peaks." << std::endl; + */ + + // --- Step 3b: Apply SGolay Smoothing --- + SavitzkyGolayFilter sgolay_filter; + Param sgolay_params; + + sgolay_params.setValue("frame_length", 15); + sgolay_params.setValue("polynomial_order", 3); + sgolay_filter.setParameters(sgolay_params); + + sgolay_filter.filter(summed_trace); + + std::cout << "Trace after Savitzky-Golay smoothing has " << summed_trace.size() << " peaks." << std::endl; + + for (const auto& peak : summed_trace) { - std::cout << "m/z: " << peak.getMZ() - << ", intensity: " << peak.getIntensity() << std::endl; + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; } - // Apply PeakPickerHiRes + // --- Step 4b: Apply PeakPickerHiRes --- + // We will use ion mobility peak FWHM to define min/max ion mobility boundaries. + // Revisit the raw traces and compute intensity weighted ion mobility centroids. + // The ion mobility traces also contains raw m/z peaks in FloatDataArrays. + // This makes it convenient to re-compute m/z centroid. PeakPickerHiRes picker; picker.setParameters(parameters_); - MSSpectrum picked_spectrum; - picker.pick(spectrum, picked_spectrum); - std::cout << "Size of picked spectrum: " << picked_spectrum.size() << std::endl; + MSSpectrum picked_trace; + picker.pick(summed_trace, picked_trace); + // populated picked_traces vector + picked_traces.push_back(picked_trace); - // Replace original spectrum with processed version - spectrum = picked_spectrum; + std::cout << "Size of picked trace: " << picked_trace.size() << " peaks." << std::endl; - // Temporarily skipping IM format setting since we commented out the check - // spectrum.setIMFormat(IMFormat::CENTROIDED); - } + std::cout << "--- Finished Processing Trace " << i << " ---\n\n"; + } + + // Recompute m/z centers and output centroided frame + MSSpectrum centroided_frame = ComputeCenters(mobilogram_traces, picked_traces); + std::cout << "--- Centroided frame has been generated. It has " << centroided_frame.size() << " --- peaks."; + // Replace the input spectrum with the centroided result + spectrum = centroided_frame; + std::cout << "--- Spectrum final output object has .. " << spectrum.size() << " --- peaks."; + // Print peaks for debugging + for (const auto& peak : spectrum) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + } } // namespace OpenMS \ No newline at end of file diff --git a/src/topp/PeakPickerIM.cpp b/src/topp/PeakPickerIM.cpp index 41092b802d5..e751d1b089a 100644 --- a/src/topp/PeakPickerIM.cpp +++ b/src/topp/PeakPickerIM.cpp @@ -26,10 +26,10 @@ class TOPPPeakPickerIM : public TOPPBase void registerOptionsAndFlags_() override { registerInputFile_("in", "", "", "Input mzML file"); - setValidFormats_("in", ListUtils::create("mzML")); + setValidFormats_("in", { "mzML" }); registerOutputFile_("out", "", "", "Output mzML file"); - setValidFormats_("out", ListUtils::create("mzML")); + setValidFormats_("out", { "mzML" }); } ExitCodes main_(int, const char**) override From 4ddd7a99c0965170d56462597bd3adba9104c0ab Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 26 Mar 2025 13:21:53 +0100 Subject: [PATCH 20/34] Added Author --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 2623f026bd3..c60a731c419 100644 --- a/AUTHORS +++ b/AUTHORS @@ -15,6 +15,7 @@ the authors tag in the respective file header. - Anton Kriese - Anton Pervukhin - Bastian Blank + - Bela Pfeffer - Chenghao Zhu - Chris Bauer - Chris Bielow @@ -120,4 +121,4 @@ the authors tag in the respective file header. - Witold Wolski - Xiao Liang - Yasset Perez-Riverol - - Peter Jones \ No newline at end of file + - Peter Jones From d55e173aa8798d9118582ba94ed04e7c792516b0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:04:53 +0100 Subject: [PATCH 21/34] Potential Runtime Improvement of GaussTraceFilter.cpp^ --- .../FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp | 2 +- src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..9dbe24853a8 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -1007,7 +1007,7 @@ namespace OpenMS { //store map of abort reasons for failed seeds FeatureMap abort_map; - abort_map.reserve(abort_reasons_.size()); + abort_map.reserve( abort_reasons_.size()); Size counter = 0; for (std::map::iterator it2 = abort_reasons_.begin(); it2 != abort_reasons_.end(); ++it2, ++counter) { diff --git a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp index 8c945eac5f0..14abbc9a24e 100644 --- a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp +++ b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp @@ -7,7 +7,7 @@ // -------------------------------------------------------------------------- #include - +#include #include #include @@ -171,19 +171,23 @@ namespace OpenMS double sig_sq = pow(sig, 2); double sig_3 = pow(sig, 3); double c_fac = -0.5 / sig_sq; + // std::cout << sig_3 << std::endl; + Size count = 0; for (Size t = 0; t < m_data->traces_ptr->size(); ++t) { const FeatureFinderAlgorithmPickedHelperStructs::MassTrace& trace = (*m_data->traces_ptr)[t]; double weight = m_data->weighted ? trace.theoretical_int : 1.0; + double constant = trace.theoretical_int * weight; for (Size i = 0; i < trace.peaks.size(); ++i) { double rt = trace.peaks[i].first; double e = exp(c_fac * pow(rt - x0, 2)); - J(count, 0) = trace.theoretical_int * e * weight; - J(count, 1) = trace.theoretical_int * height * e * (rt - x0) / sig_sq * weight; - J(count, 2) = 0.125* trace.theoretical_int* height* e* pow(rt - x0, 2) / sig_3 * weight; + double eConstant = constant * e + J(count, 0) = eConstant; + J(count, 1) = eConstant* height * (rt - x0) / sig_sq; + J(count, 2) = 0.125* eConstant * height * pow(rt - x0, 2) / sig_3; ++count; } } From d2d6a19063c156ae9006c76a6f007aa7f533bc73 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 28 Mar 2025 15:26:16 +0100 Subject: [PATCH 22/34] patched --- src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp index 14abbc9a24e..656d3eee86e 100644 --- a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp +++ b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp @@ -184,7 +184,7 @@ namespace OpenMS { double rt = trace.peaks[i].first; double e = exp(c_fac * pow(rt - x0, 2)); - double eConstant = constant * e + double eConstant = constant * e; J(count, 0) = eConstant; J(count, 1) = eConstant* height * (rt - x0) / sig_sq; J(count, 2) = 0.125* eConstant * height * pow(rt - x0, 2) / sig_3; From 0a64adffda22c6a15d0221c7338800029156fcc9 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 9 Apr 2025 12:34:56 +0200 Subject: [PATCH 23/34] modified XMLHandler.h/cpp Changed Implementation of appendASCII added some helping functions to class StringManager --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 19 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 101 +++++++++++++++--- 2 files changed, 101 insertions(+), 19 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..38279edbb9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -224,8 +224,18 @@ namespace OpenMS // Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) - { - return String(unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + { + String r; + XMLSize_t l = xercesc::XMLString::stringLen(str); + if(isASCII(str, l)) + { + appendASCII(str,l,r); + } + else + { + r = (unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + } + return r; } // Converts from a wide-character string to a narrow-character string. @@ -283,7 +293,12 @@ namespace OpenMS { return toNative_(str); } + /// Checks if supplied if chars in XMLCh* can be encoded with ASCII + static bool isASCII(const XMLCh * chars, const XMLSize_t length); + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64 (const XMLCh * input_it, char* output_it); + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String * diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8f4b037e829..e9a897959a8 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -315,7 +316,7 @@ namespace OpenMS::Internal } // no value, although there should be a numerical value else if (term.xref_type != ControlledVocabulary::CVTerm::NONE && term.xref_type != ControlledVocabulary::CVTerm::XSD_STRING && // should be numerical - !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the binary data array, not the 'value=' attribute! + !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the bits data array, not the 'value=' attribute! ) { warning(LOAD, String("The CV term '") + accession + " - " + term.name + "' used in tag '" + parent_tag + "' should have a numerical value. The value is '" + value + "'."); @@ -429,33 +430,99 @@ namespace OpenMS::Internal StringManager::~StringManager() = default; + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { + alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + } + + bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + + const XMLCh* it = chars; + const XMLCh* end = it + (quotient * 8); + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; + while (it != end && bitmask){ + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOP = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); + bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + // bitmask = simde_mm_testz_si128(bits, mask); + it+=8; + } + + end += remainder; + while (it != end && bitmask){ + bitmask = *it & 0xFF00; + it++; + } + + return bitmask; + } + void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). + // XMLCh are characters in UTF16 (usually stored as 16bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + + + + std::div_t quotient_and_remainder = std::div(length, 8); + size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient + size_t remainder = quotient_and_remainder.rem; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // cout << "length: " << length << endl; + const XMLCh* it = chars; - const XMLCh* end = it + length; + const XMLCh* end = it + (quotient * 8); + // std::cout << "Anzahl der Elemente zwischen it1 und it2: " + // << std::distance(it, end) << std::endl; size_t curr_size = result.size(); result.resize(curr_size + length); std::string::iterator str_it = result.begin(); std::advance(str_it, curr_size); + // int i = 0; + + //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks while (it!=end) - { - *str_it = (char)*it; - ++str_it; - ++it; + { + // std::cout << "Aktueller Wert: " << *it << std::endl; + + compress64(it, &(*str_it)); + // printf("loop: %d\n", i); + str_it += 8; + it += 8; + // i++; } - // This is ca. 50 % faster than - // for (size_t i = 0; i < length; i++) - // { - // result[curr_size + i] = (char)chars[i]; - // } + + + end = it + remainder; + + while (it!=end) + { + *str_it = static_cast(*it & 0xFF); + // std::cout << "Aktueller Wert: " << *str_it << std::endl; + str_it ++; + it ++; + // i++; + } } From 2dd2bb6bc81b53df0131fd03c4c66b1ab8ef48d7 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Fri, 11 Apr 2025 15:28:52 +0200 Subject: [PATCH 24/34] added test for XMLHeader, Debugged it --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 16 +- .../class_tests/openms/executables.cmake | 1 + .../openms/source/XMLHandler_test.cpp | 154 ++++++++++++++++++ 3 files changed, 166 insertions(+), 5 deletions(-) create mode 100644 src/tests/class_tests/openms/source/XMLHandler_test.cpp diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index e9a897959a8..d660fb6d9b0 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -442,14 +442,20 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; + std::cout << "Remainer: " << remainder << std::endl; + std::cout << "Quotient: " << quotient << std::endl; + std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; + + if (length == 0) + { + return false; + } + while (it != end && bitmask){ simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); simde__m128i zero = simde_mm_setzero_si128(); @@ -462,10 +468,10 @@ namespace OpenMS::Internal end += remainder; while (it != end && bitmask){ - bitmask = *it & 0xFF00; + bitmask = !(*it & 0xFF00); it++; } - + std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index 69c1973fff1..e8a4112800c 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -256,6 +256,7 @@ set(format_executables_list UnimodXMLFile_test XMassFile_test XMLFile_test + XMLHandler_test XMLValidator_test XQuestResultXMLFile_test XTandemInfile_test diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp new file mode 100644 index 00000000000..41232f755e4 --- /dev/null +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -0,0 +1,154 @@ + +#include +#include +#include +#include +#include + +#include + +using namespace OpenMS::Internal; + + + + + +START_TEST(StringManager, "$Id$") + + +const XMLCh russianHello[] = { + 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, + 0x0438, 0x0440, // "Привет мир" (Hello World in Russian) +}; +XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); + +const XMLCh ascii[] = { + 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, + 0x0072,0x006C,0x0064,0x0021}; +XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); + +const XMLCh mixed[] = { + 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, + 0x0072,0x006C,0x0064, 0x0021 }; +XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); + +const XMLCh empty[] = {0}; +XMLSize_t e_length = xercesc::XMLString::stringLen(empty); +std::cout << e_length << std::endl; + +const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); + +bool isAscii = false; + +START_SECTION(if input is ascii) + isAscii = StringManager::isASCII(ascii,a_length); + std::cout << "1 \n"; + TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(russianHello,r_length); + std::cout << "2 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(mixed,m_length); + std::cout << "3 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(empty,e_length); + std::cout << "4 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(upperBoundary,u_length); + std::cout << "5 \n"; + TEST_TRUE(isAscii) +END_SECTION + +const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; + +const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; + +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; + +const XMLCh eight_block_kadabra[] = { + 0x004B, // K + 0x0041, // A + 0x0044, // D + 0x0041, // A + 0x0042, // B + 0x0052, // R + 0x0041, // A + 0x0021 // ! +}; + +START_SECTION(if Utf16 to Ascii Compression works right) + char* output1 = new char [9]; + output1[8] = '\0'; + StringManager::compress64(eight_block,output1); + std::string res1_str = "Hello,Wo"; + std::string o1_str (output1); + TEST_STRING_EQUAL(o1_str,res1_str); + delete[] output1; + + + char* output2 = new char [9]; + output2 [8] = '\0'; + char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; + res2[8] = '\0'; + StringManager::compress64(eight_block_negative,output2); + std::string res2_str (res2); + std::string o2_str(output2); + TEST_STRING_EQUAL(o2_str, res2_str); + delete[] output2; + + char* output3 = new char [9]; + output3 [8] = '\0'; + char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; + res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,output3); + std::string res3_str (res3); + std::string o3_str(output3); + TEST_STRING_EQUAL(o3_str, res3_str); + delete[] output3; + + char* output4 = new char [13]; + output4 [0] ='A'; + output4 [1] ='B'; + output4 [2] ='R'; + output4 [3] ='A'; + output3 [12] = '\0'; + + StringManager::compress64(eight_block_kadabra,(output4+4)); + std::string res4_str = "ABRAKADABRA!"; + std::string o4_str(output4); + TEST_STRING_EQUAL(o4_str, res4_str); + delete[] output4; + +END_SECTION + +//Tests Number of Chars not Dividable by 8 +OpenMS::String o5_str; +std::string res5_str = "Hello,World!"; + +//Checks how the Function handles Data thats already stored in Output string +OpenMS::String o6_str = "Gruess Gott und "; +std::string res6_str = "Gruess Gott und Hello,World!"; + +OpenMS::String o7_str; +std::string res7_str = ""; + + +START_SECTION(if appendASCII works) + + StringManager::appendASCII(ascii,a_length,o5_str); + TEST_STRING_EQUAL(o5_str, res5_str); + + StringManager::appendASCII(ascii,a_length,o6_str); + TEST_STRING_EQUAL(o6_str, res6_str); + + StringManager::appendASCII(empty,e_length,o7_str); + TEST_STRING_EQUAL(o7_str, res7_str); + std::cout << o7_str.size() << std::endl; + +END_SECTION + +END_TEST + + + + From ac506f93c57944f49d0d961238d858b6de2058f0 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Mon, 14 Apr 2025 14:35:54 +0200 Subject: [PATCH 25/34] t rid of print statements in function --- src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index d660fb6d9b0..ade4faa1170 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -442,9 +442,9 @@ namespace OpenMS::Internal std::div_t quotient_and_remainder = std::div(length, 8); size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient size_t remainder = quotient_and_remainder.rem; - std::cout << "Remainer: " << remainder << std::endl; - std::cout << "Quotient: " << quotient << std::endl; - std::cout << "length: " << length << endl; + // std::cout << "Remainer: " << remainder << std::endl; + // std::cout << "Quotient: " << quotient << std::endl; + // std::cout << "length: " << length << endl; const XMLCh* it = chars; const XMLCh* end = it + (quotient * 8); @@ -471,7 +471,7 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - std::cout << "bitmask: " << bitmask << std::endl; + // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } From e057e09a0df85cde08383824fb432a6f52398c67 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 15 Apr 2025 13:50:43 +0200 Subject: [PATCH 26/34] Moved the function in the right part of the String Manager Class --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index ade4faa1170..63066053940 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -422,14 +422,6 @@ namespace OpenMS::Internal } } - //******************************************************************************************************************* - - StringManager::StringManager() - = default; - - StringManager::~StringManager() - = default; - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); @@ -471,7 +463,6 @@ namespace OpenMS::Internal bitmask = !(*it & 0xFF00); it++; } - // std::cout << "bitmask: " << bitmask << std::endl; return bitmask; } @@ -529,7 +520,17 @@ namespace OpenMS::Internal it ++; // i++; } - } + //******************************************************************************************************************* + + StringManager::StringManager() + = default; + + StringManager::~StringManager() + = default; + + + + } // namespace OpenMS // namespace Internal From 1383969fbff4247d262410d9455778cc317e10b1 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 13:42:47 +0200 Subject: [PATCH 27/34] fferent Impelmentation of append/isAscii with potentially silghtly less overhead --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 71 ++++++------------- 1 file changed, 21 insertions(+), 50 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 63066053940..625d9d4512f 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -431,15 +431,10 @@ namespace OpenMS::Internal bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // std::cout << "length: " << length << endl; - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); + size_t quotient = length / 8; // Ganzzahliger Quotient + size_t remainder = length % 8; + + const XMLCh* input_ptr = chars; simde__m128i mask = simde_mm_set1_epi16(0xFF00); bool bitmask = true; @@ -448,20 +443,19 @@ namespace OpenMS::Internal return false; } - while (it != end && bitmask){ - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)it); + for (size_t i = 0; i < quotient && bitmask; i++) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; - // bitmask = simde_mm_testz_si128(bits, mask); - it+=8; + input_ptr+=8; } - end += remainder; - while (it != end && bitmask){ - bitmask = !(*it & 0xFF00); - it++; + for (size_t i = 0; i < remainder && bitmask; i++) + { + bitmask = !(input_ptr[i] & 0xFF00); } return bitmask; } @@ -475,50 +469,27 @@ namespace OpenMS::Internal // we can convert to char directly (only keeping the least // significant byte). + size_t quotient = length / 8; + size_t remainder = length % 8; - - - std::div_t quotient_and_remainder = std::div(length, 8); - size_t quotient = quotient_and_remainder.quot; // Ganzzahliger Quotient - size_t remainder = quotient_and_remainder.rem; - // std::cout << "Remainer: " << remainder << std::endl; - // std::cout << "Quotient: " << quotient << std::endl; - // cout << "length: " << length << endl; - - const XMLCh* it = chars; - const XMLCh* end = it + (quotient * 8); - // std::cout << "Anzahl der Elemente zwischen it1 und it2: " - // << std::distance(it, end) << std::endl; + const XMLCh* input_ptr = chars; size_t curr_size = result.size(); result.resize(curr_size + length); - std::string::iterator str_it = result.begin(); - std::advance(str_it, curr_size); - // int i = 0; + char* output_ptr = &result[curr_size]; //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - while (it!=end) + for (size_t i = 0; i < quotient; i++) { - // std::cout << "Aktueller Wert: " << *it << std::endl; - - compress64(it, &(*str_it)); - // printf("loop: %d\n", i); - str_it += 8; - it += 8; - // i++; + compress64(input_ptr, output_ptr); + input_ptr += 8; + output_ptr += 8; } - - end = it + remainder; - - while (it!=end) + for (size_t i = 0; i < remainder; i++) { - *str_it = static_cast(*it & 0xFF); - // std::cout << "Aktueller Wert: " << *str_it << std::endl; - str_it ++; - it ++; - // i++; + output_ptr[i] = static_cast(input_ptr[i] & 0xFF); } } From 4c0ca9a31cb0532dbb732c216f1ffae4feda99ac Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Thu, 17 Apr 2025 14:53:22 +0200 Subject: [PATCH 28/34] Test formatted to Conventions --- src/tests/class_tests/openms/source/XMLHandler_test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 41232f755e4..d3fed30bd98 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -41,7 +41,7 @@ XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; -START_SECTION(if input is ascii) +START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) isAscii = StringManager::isASCII(ascii,a_length); std::cout << "1 \n"; TEST_TRUE(isAscii) @@ -76,7 +76,7 @@ const XMLCh eight_block_kadabra[] = { 0x0021 // ! }; -START_SECTION(if Utf16 to Ascii Compression works right) +START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) char* output1 = new char [9]; output1[8] = '\0'; StringManager::compress64(eight_block,output1); @@ -133,7 +133,7 @@ OpenMS::String o7_str; std::string res7_str = ""; -START_SECTION(if appendASCII works) +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) StringManager::appendASCII(ascii,a_length,o5_str); TEST_STRING_EQUAL(o5_str, res5_str); From 9f90780f3875788d274dab7ff9549d9ad55f394b Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 22 Apr 2025 16:49:09 +0200 Subject: [PATCH 29/34] Changed the packus_epi16 function with the shuffle function for improved performance inside the compress64 function. Edited the the XMLHandler_test to work with new implemantation and got rid of new/delete: --- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 5 +- .../openms/source/XMLHandler_test.cpp | 62 +++++++------------ 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 625d9d4512f..8f04a091a70 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -424,7 +424,10 @@ namespace OpenMS::Internal void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); + const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1); + simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); simde_mm_storel_epi64((simde__m128i*)output_it, compressed); } diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index d3fed30bd98..19e56614144 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -24,19 +24,18 @@ XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); const XMLCh ascii[] = { 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, - 0x0072,0x006C,0x0064,0x0021}; + 0x0072,0x006C,0x0064,0x0021, 0x0000}; XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); const XMLCh mixed[] = { 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, - 0x0072,0x006C,0x0064, 0x0021 }; + 0x0072,0x006C,0x0064, 0x0021, 0x0000 }; XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); const XMLCh empty[] = {0}; XMLSize_t e_length = xercesc::XMLString::stringLen(empty); -std::cout << e_length << std::endl; -const XMLCh upperBoundary [] = {0x00FF,0x00FF}; +const XMLCh upperBoundary [] = {0x00FF,0x00FF,0x0000}; XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); bool isAscii = false; @@ -59,11 +58,11 @@ START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) TEST_TRUE(isAscii) END_SECTION -const XMLCh eight_block_negative[] = {0xFFFF,0xFFFE,0xFFFB,0xFFF6,0xFFEC,0xFFCE,0xFF9C,0xFE00}; +const XMLCh eight_block_negative[] = {0x0148,0x0165,0x016C,0x016C,0x016F,0x012C,0x0157,0x016F}; const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; -const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0xFFFF,0xFFFE,0xFFFB,0xFFF6}; +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0x0142,0x0145,0x014C,0x0141}; const XMLCh eight_block_kadabra[] = { 0x004B, // K @@ -77,47 +76,34 @@ const XMLCh eight_block_kadabra[] = { }; START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) - char* output1 = new char [9]; - output1[8] = '\0'; - StringManager::compress64(eight_block,output1); + std::string o1_str(8,'\0'); + StringManager::compress64(eight_block,o1_str.data()); std::string res1_str = "Hello,Wo"; - std::string o1_str (output1); TEST_STRING_EQUAL(o1_str,res1_str); - delete[] output1; - char* output2 = new char [9]; - output2 [8] = '\0'; - char res2 [9] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; - res2[8] = '\0'; - StringManager::compress64(eight_block_negative,output2); - std::string res2_str (res2); - std::string o2_str(output2); + std::string o2_str(8,'\0'); + StringManager::compress64(eight_block_negative,o2_str.data()); + std::string res2_str = res1_str; TEST_STRING_EQUAL(o2_str, res2_str); - delete[] output2; - - char* output3 = new char [9]; - output3 [8] = '\0'; - char res3 [9] = {0x42,0x45,0x4C,0x41,0x00,0x00,0x00,0x00}; - res3[8] = '\0'; - StringManager::compress64(eight_block_mixed,output3); - std::string res3_str (res3); - std::string o3_str(output3); + + + std::string o3_str(8,'\0'); + // char res3 [9] = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + // res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,o3_str.data()); + std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; TEST_STRING_EQUAL(o3_str, res3_str); - delete[] output3; - - char* output4 = new char [13]; - output4 [0] ='A'; - output4 [1] ='B'; - output4 [2] ='R'; - output4 [3] ='A'; - output3 [12] = '\0'; + + std::string o4_str(12,'\0'); + o4_str [0] ='A'; + o4_str [1] ='B'; + o4_str [2] ='R'; + o4_str [3] ='A'; - StringManager::compress64(eight_block_kadabra,(output4+4)); + StringManager::compress64(eight_block_kadabra,((o4_str.data())+4)); std::string res4_str = "ABRAKADABRA!"; - std::string o4_str(output4); TEST_STRING_EQUAL(o4_str, res4_str); - delete[] output4; END_SECTION From 212c9e7da1dbac461a8bbae6892e1c444d6d4330 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 11:17:14 +0200 Subject: [PATCH 30/34] Implemented strLength function with simde for potential runtime improvement --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 4 ++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 25 +++++++++++++++++++ .../openms/source/XMLHandler_test.cpp | 9 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 38279edbb9f..6124c6cb206 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -226,7 +226,7 @@ namespace OpenMS inline static String toNative_(const XMLCh* str) { String r; - XMLSize_t l = xercesc::XMLString::stringLen(str); + XMLSize_t l = strLength(str); if(isASCII(str, l)) { appendASCII(str,l,r); @@ -252,6 +252,8 @@ namespace OpenMS /// Destructor ~StringManager(); + static int strLength(const XMLCh* input_ptr); + /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8f04a091a70..0a8ce6609d3 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -422,6 +422,31 @@ namespace OpenMS::Internal } } + int StringManager::strLength(const XMLCh* input_ptr) { + size_t processedChars = 0; + XMLCh* pos_ptr = const_cast(input_ptr); + + while (true) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + + if (zeroMask != 0x0000) { + int bytePosZero = __builtin_ctz(zeroMask); + int charPosZero = bytePosZero / 2; + pos_ptr += charPosZero; + return processedChars + charPosZero; + } + + pos_ptr += 8; + processedChars += 8; + } + + // Reached max length without finding null terminator + return 0; + } + void StringManager::compress64 (const XMLCh* input_it, char* output_it) { alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp index 19e56614144..b55c6ad27d7 100644 --- a/src/tests/class_tests/openms/source/XMLHandler_test.cpp +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -133,6 +133,15 @@ START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & END_SECTION +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + int o_length = StringManager::strLength(ascii); + TEST_EQUAL(o_length, a_length); + o_length = StringManager::strLength(empty); + TEST_EQUAL(o_length, e_length); + o_length = StringManager::strLength(upperBoundary); + TEST_EQUAL(o_length, u_length); +END_SECTION + END_TEST From 8ca05624df46d1978a2046c666993a6e9850404b Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:11:49 +0200 Subject: [PATCH 31/34] Added strLength Method using simde for Potential runtime improvement. Added a for loop to prevent this method from crossing page Boundaries --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + .../source/FORMAT/HANDLERS/MzMLHandler.cpp | 5 +++- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 28 +++++++++++++++---- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 6124c6cb206..5f4dcc89777 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -21,6 +21,7 @@ #include #include +#include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index d8377313848..d17d9711b93 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -18,8 +18,11 @@ #include #include + #include +using namespace std::literals; + namespace OpenMS::Internal { @@ -261,7 +264,7 @@ namespace OpenMS::Internal UInt meta_string_array_index = 0; for (Size i = 0; i < input_data.size(); i++) //loop over all binary data arrays { - if (input_data[i].meta.getName() != "m/z array" && input_data[i].meta.getName() != "intensity array") // is meta data array? + if (input_data[i].meta.getName() != "m/z array"sv && input_data[i].meta.getName() != "intensity array"sv) // is meta data array? { if (input_data[i].data_type == MzMLHandlerHelper::BinaryData::DT_FLOAT) { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 0a8ce6609d3..0c8f141fefd 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -425,8 +425,20 @@ namespace OpenMS::Internal int StringManager::strLength(const XMLCh* input_ptr) { size_t processedChars = 0; XMLCh* pos_ptr = const_cast(input_ptr); + size_t align = (size_t)pos_ptr % 16; + // Prevents Page boundary crossing + for (size_t i = 0; i < align; i++) + { + if (pos_ptr[i] == 0) + { + return processedChars + i; + } + processedChars++; + pos_ptr++; + }; - while (true) { + while (true) + { simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); @@ -447,8 +459,9 @@ namespace OpenMS::Internal return 0; } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) { - alignas(16) simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); + void StringManager::compress64 (const XMLCh* input_it, char* output_it) + { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1); @@ -471,13 +484,18 @@ namespace OpenMS::Internal return false; } - for (size_t i = 0; i < quotient && bitmask; i++) + for (size_t i = 0; i < quotient; i++) { + simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); simde__m128i zero = simde_mm_setzero_si128(); simde__m128i andOP = simde_mm_and_si128(bits, mask); simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - bitmask = simde_mm_movemask_epi8(cmp) == 0xFFFF; + + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) + { + bitmask = false; + } input_ptr+=8; } From 83d290af41271ac73a4030e631121f5ad1ef4536 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 16:44:33 +0200 Subject: [PATCH 32/34] Added Description of strLength method for documentation --- src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 5f4dcc89777..51ef962aa9f 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -253,6 +253,7 @@ namespace OpenMS /// Destructor ~StringManager(); + /// Calculates the length of a XMLCh* string using SIMDe static int strLength(const XMLCh* input_ptr); /// Transcode the supplied C string to a xerces string From 32092981233395fc26e3f955a48765c5fad0d364 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Tue, 13 May 2025 23:27:28 +0200 Subject: [PATCH 33/34] code is now up to the coding conventions --- .../OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- .../source/FORMAT/HANDLERS/XMLHandler.cpp | 183 ++++++++++-------- 2 files changed, 99 insertions(+), 86 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 51ef962aa9f..0563d765809 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -11,7 +11,7 @@ #include #include -#include // StringList + #include #include #include diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 0c8f141fefd..3d2cf4affc8 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -422,120 +422,133 @@ namespace OpenMS::Internal } } - int StringManager::strLength(const XMLCh* input_ptr) { - size_t processedChars = 0; + int StringManager::strLength(const XMLCh* input_ptr) + { + size_t processed_chars = 0; XMLCh* pos_ptr = const_cast(input_ptr); size_t align = (size_t)pos_ptr % 16; - // Prevents Page boundary crossing - for (size_t i = 0; i < align; i++) + + // Prevent crossing page boundaries + for (size_t i = 0; i < align; ++i) { if (pos_ptr[i] == 0) { - return processedChars + i; + return processed_chars + i; } - processedChars++; - pos_ptr++; - }; + ++processed_chars; + ++pos_ptr; + } - while (true) + while (true) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)pos_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i cmpZero = simde_mm_cmpeq_epi16(bits, zero); - uint16_t zeroMask = simde_mm_movemask_epi8(cmpZero); + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); - if (zeroMask != 0x0000) { - int bytePosZero = __builtin_ctz(zeroMask); - int charPosZero = bytePosZero / 2; - pos_ptr += charPosZero; - return processedChars + charPosZero; - } + if (zero_mask != 0x0000) + { + int byte_pos_zero = __builtin_ctz(zero_mask); + int char_pos_zero = byte_pos_zero / 2; + pos_ptr += char_pos_zero; + return processed_chars + char_pos_zero; + } - pos_ptr += 8; - processedChars += 8; + pos_ptr += 8; + processed_chars += 8; } + } + + void StringManager::compress64(const XMLCh* inputIt, char* outputIt) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); + + // Select every second byte (little-endian lower byte of each UTF-16 character) + const simde__m128i shuffleMask = simde_mm_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1 + ); + + simde__m128i compressed = simde_mm_shuffle_epi8(bits, shuffleMask); - // Reached max length without finding null terminator - return 0; + // Store the lower 64 bits (8 ASCII characters) + simde_mm_storel_epi64(reinterpret_cast(outputIt), compressed); } - void StringManager::compress64 (const XMLCh* input_it, char* output_it) + bool StringManager::isASCII(const XMLCh* chars, const XMLSize_t length) + { + if (length == 0) { - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_it); - // simde__m128i compressed = simde_mm_packus_epi16(bits, simde_mm_setzero_si128()); - const simde__m128i shuffleMask = simde_mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, - -1, -1, -1, -1, -1, -1, -1, -1); - simde__m128i compressed = simde_mm_shuffle_epi8(bits,shuffleMask); - simde_mm_storel_epi64((simde__m128i*)output_it, compressed); + return false; } - bool StringManager::isASCII(const XMLCh * chars, const XMLSize_t length) { + Size quotient = length / 8; + Size remainder = length % 8; - - size_t quotient = length / 8; // Ganzzahliger Quotient - size_t remainder = length % 8; + const XMLCh* inputPtr = chars; + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; - const XMLCh* input_ptr = chars; - simde__m128i mask = simde_mm_set1_epi16(0xFF00); - bool bitmask = true; + // Process blocks of 8 UTF-16 characters using SIMD + for (Size i = 0; i < quotient; ++i) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOp = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOp, zero); - if (length == 0) + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) { - return false; + bitmask = false; + break; } - for (size_t i = 0; i < quotient; i++) - { - - simde__m128i bits = simde_mm_loadu_si128((simde__m128i*)input_ptr); - simde__m128i zero = simde_mm_setzero_si128(); - simde__m128i andOP = simde_mm_and_si128(bits, mask); - simde__m128i cmp = simde_mm_cmpeq_epi16(andOP, zero); - - if (simde_mm_movemask_epi8(cmp) != 0xFFFF) - { - bitmask = false; - } - input_ptr+=8; - } - - for (size_t i = 0; i < remainder && bitmask; i++) + inputPtr += 8; + } + + // Check remaining characters individually + for (Size i = 0; i < remainder && bitmask; ++i) + { + if (inputPtr[i] & 0xFF00) { - bitmask = !(input_ptr[i] & 0xFF00); + bitmask = false; + break; } - return bitmask; } - void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) + return bitmask; + } + + void StringManager::appendASCII(const XMLCh* chars, const XMLSize_t length, String& result) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned - // short but this is not guaranteed). - // We know that the Base64 string here can only contain plain ASCII - // and all bytes except the least significant one will be zero. Thus - // we can convert to char directly (only keeping the least - // significant byte). - - size_t quotient = length / 8; - size_t remainder = length % 8; - - const XMLCh* input_ptr = chars; - - size_t curr_size = result.size(); - result.resize(curr_size + length); - char* output_ptr = &result[curr_size]; - - //Copy Block of 8 chars at a time. Then jumps to the next eight Blocks - for (size_t i = 0; i < quotient; i++) - { - compress64(input_ptr, output_ptr); - input_ptr += 8; - output_ptr += 8; + // XMLCh are characters in UTF16 (usually stored as 16-bit unsigned + // short but this is not guaranteed). + // We know that the Base64 string here can only contain plain ASCII + // and all bytes except the least significant one will be zero. Thus + // we can convert to char directly (only keeping the least + // significant byte). + + Size quotient = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + + Size currentSize = result.size(); + result.resize(currentSize + length); + char* outputPtr = &result[currentSize]; + + // Copy blocks of 8 characters at a time + for (Size i = 0; i < quotient; ++i) + { + compress64(inputPtr, outputPtr); + inputPtr += 8; + outputPtr += 8; } - - - for (size_t i = 0; i < remainder; i++) - { - output_ptr[i] = static_cast(input_ptr[i] & 0xFF); + + // Copy any remaining characters individually + for (Size i = 0; i < remainder; ++i) + { + outputPtr[i] = static_cast(inputPtr[i] & 0xFF); } } From d411bf6e5805a7f9ded4e719a156ecfba0d392c8 Mon Sep 17 00:00:00 2001 From: Bela Bennet Pfeffer Date: Wed, 14 May 2025 13:58:12 +0200 Subject: [PATCH 34/34] Neue Stringlenght funktion durch alte ersetzt aus dem MzMLHandler.Cpp sv rausgenommen --- src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h | 2 +- src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 0563d765809..513842ce854 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -227,7 +227,7 @@ namespace OpenMS inline static String toNative_(const XMLCh* str) { String r; - XMLSize_t l = strLength(str); + XMLSize_t l = xercesc::XMLString::stringLen(str); if(isASCII(str, l)) { appendASCII(str,l,r); diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index d17d9711b93..eca1a1f82aa 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -21,7 +21,7 @@ #include -using namespace std::literals; + namespace OpenMS::Internal { @@ -264,7 +264,7 @@ namespace OpenMS::Internal UInt meta_string_array_index = 0; for (Size i = 0; i < input_data.size(); i++) //loop over all binary data arrays { - if (input_data[i].meta.getName() != "m/z array"sv && input_data[i].meta.getName() != "intensity array"sv) // is meta data array? + if (input_data[i].meta.getName() != "m/z array" && input_data[i].meta.getName() != "intensity array") // is meta data array? { if (input_data[i].data_type == MzMLHandlerHelper::BinaryData::DT_FLOAT) {