diff --git a/AUTHORS b/AUTHORS index 2623f026bd3..c60a731c419 100644 --- a/AUTHORS +++ b/AUTHORS @@ -15,6 +15,7 @@ the authors tag in the respective file header. - Anton Kriese - Anton Pervukhin - Bastian Blank + - Bela Pfeffer - Chenghao Zhu - Chris Bauer - Chris Bielow @@ -120,4 +121,4 @@ the authors tag in the respective file header. - Witold Wolski - Xiao Liang - Yasset Perez-Riverol - - Peter Jones \ No newline at end of file + - Peter Jones diff --git a/contrib b/contrib index 3cdef5c7c7f..e6fde7cfed8 160000 --- a/contrib +++ b/contrib @@ -1 +1 @@ -Subproject commit 3cdef5c7c7f98032f7d43c59ed642ebe5a1d56b1 +Subproject commit e6fde7cfed8cde73c6625cd493ce3f82e21263cc diff --git a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h index 9d4f98665ce..513842ce854 100644 --- a/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h +++ b/src/openms/include/OpenMS/FORMAT/HANDLERS/XMLHandler.h @@ -11,7 +11,7 @@ #include #include -#include // StringList + #include #include #include @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -224,8 +225,18 @@ namespace OpenMS // Converts from a wide-character string to a narrow-character string. inline static String toNative_(const XMLCh* str) - { - return String(unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + { + String r; + XMLSize_t l = xercesc::XMLString::stringLen(str); + if(isASCII(str, l)) + { + appendASCII(str,l,r); + } + else + { + r = (unique_xerces_ptr(xercesc::XMLString::transcode(str)).get()); + } + return r; } // Converts from a wide-character string to a narrow-character string. @@ -242,6 +253,9 @@ namespace OpenMS /// Destructor ~StringManager(); + /// Calculates the length of a XMLCh* string using SIMDe + static int strLength(const XMLCh* input_ptr); + /// Transcode the supplied C string to a xerces string inline static XercesString convert(const char * str) { @@ -283,7 +297,12 @@ namespace OpenMS { return toNative_(str); } + /// Checks if supplied if chars in XMLCh* can be encoded with ASCII + static bool isASCII(const XMLCh * chars, const XMLSize_t length); + /// Compresses eight 8x16bit Chars in XMLCh* to 8x8bit Chars by cutting upper byte + static void compress64 (const XMLCh * input_it, char* output_it); + /** * @brief Transcodes the supplied XMLCh* and appends it to the OpenMS String * diff --git a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h index 375453bc609..42a0cd0f576 100644 --- a/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h +++ b/src/openms/include/OpenMS/IONMOBILITY/IMTypes.h @@ -40,12 +40,18 @@ namespace OpenMS OPENMS_DLLAPI const std::string& toString(const DriftTimeUnit value); /// Different ways to represent ion mobility data in a spectrum + /// Note: + /// 1. MIXED is only used for MSExperiment, not for MSSpectrum + /// 2. UNKNOWN should be used if the format is not yet determined. + /// FileHandler or e.g. IM peak picker should ideally set the format a known value. enum class IMFormat { NONE, ///< no ion mobility CONCATENATED, ///< ion mobility frame is stacked in a single spectrum (i.e. has an IM float data array) MULTIPLE_SPECTRA,///< ion mobility is recorded as multiple spectra per frame (i.e. has one IM annotation per spectrum) MIXED, ///< an MSExperiment contains both CONCATENATED and MULTIPLE_SPECTRA + CENTROIDED, ///< ion mobility of peaks after centroiding in IM dimension. Ion mobility is annotated in a single float data array (i.e., each peak might have a different IM value in the data array); identical to CONCATENATED in terms of data layout. + UNKNOWN, ///< ion mobility format not yet determined. SIZE_OF_IMFORMAT }; /// Names of IMFormat diff --git a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h index d2fd8947cac..342658072e1 100644 --- a/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h +++ b/src/openms/include/OpenMS/METADATA/MetaInfoInterface.h @@ -36,7 +36,7 @@ namespace OpenMS public: /// Constructor - MetaInfoInterface(); + MetaInfoInterface() = default; /// Copy constructor MetaInfoInterface(const MetaInfoInterface& rhs); /// Move constructor @@ -107,8 +107,8 @@ namespace OpenMS /// Creates the MetaInfo object if it does not exist inline void createIfNotExists_(); - /// Pointer to the MetaInfo object (0 by default) - MetaInfo* meta_; + /// Pointer to the MetaInfo object + MetaInfo* meta_ = nullptr; }; } // namespace OpenMS diff --git a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h index 37c79c137db..3a3bafef045 100644 --- a/src/openms/include/OpenMS/METADATA/SpectrumSettings.h +++ b/src/openms/include/OpenMS/METADATA/SpectrumSettings.h @@ -3,7 +3,7 @@ // // -------------------------------------------------------------------------- // $Maintainer: Timo Sachsenberg $ -// $Authors: Marc Sturm $ +// $Authors: Marc Sturm, Timo Sachsenberg $ // -------------------------------------------------------------------------- #pragma once @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -52,13 +53,13 @@ namespace OpenMS static const std::string NamesOfSpectrumType[SIZE_OF_SPECTRUMTYPE]; /// Constructor - SpectrumSettings(); + SpectrumSettings() = default; /// Copy constructor SpectrumSettings(const SpectrumSettings &) = default; /// Move constructor - SpectrumSettings(SpectrumSettings&&) = default; + SpectrumSettings(SpectrumSettings&&) noexcept = default; /// Destructor - ~SpectrumSettings(); + ~SpectrumSettings() noexcept = default; // Assignment operator SpectrumSettings & operator=(const SpectrumSettings &) = default; @@ -78,6 +79,16 @@ namespace OpenMS ///sets the spectrum type void setType(SpectrumType type); + /// @brief sets the IMFormat of the spectrum + /// @param im_type + void setIMFormat(const IMFormat& im_type); + + /// @brief returns the IMFormat of the spectrum if set. Otherwise UNKNOWN (default). + /// + /// Note: If UNKNOWN, use IMFormat::determineIMFormat to determine the IMFormat based on the data. + /// @return IMFormat of the spectrum + IMFormat getIMFormat() const; + /// returns the native identifier for the spectrum, used by the acquisition software. const String & getNativeID() const; /// sets the native identifier for the spectrum, used by the acquisition software. @@ -141,7 +152,8 @@ namespace OpenMS protected: - SpectrumType type_; + SpectrumType type_ = UNKNOWN; + IMFormat im_type_ = IMFormat::UNKNOWN; String native_id_; String comment_; InstrumentSettings instrument_settings_; diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h new file mode 100644 index 00000000000..79e40114563 --- /dev/null +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/PeakPickerIM.h @@ -0,0 +1,71 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#pragma once + +#include +#include + +namespace OpenMS +{ + /** + @brief Peak picking algorithm for ion mobility data + + @ingroup PeakPicking + */ + class OPENMS_DLLAPI PeakPickerIM + { + public: + /// Default constructor initializing parameters with default values. + PeakPickerIM(); + + /// Destructor. + virtual ~PeakPickerIM(); + + /// Picks ion mobility traces from the given spectrum. + void pickIMTraces(MSSpectrum& spectrum); + + /// Sets the parameters for peak picking. + void setParameters(const Param& param); + + /// Gets the current parameters. + Param getParameters() const; + + protected: + /// Updates internal member variables when parameters are changed. + void updateMembers_(); + + /// Returns the default parameters. + Param getDefaultParameters() const; + + /// Stores the parameters for peak picking. + Param parameters_; + + private: + /// determine sampling rate for linear resampler + double computeOptimalSamplingRate(const std::vector& spectra); + + /// Sum up the intensity of data points with nearly identical float values + MSSpectrum SumFrame(const MSSpectrum& spectrum, double ppm_tolerance = 0.01); + + /// Compute lower and upper m/z bounds based on ppm + std::pair ppmBounds(double mz, double ppm); + + /// Extract ion mobility traces as MSSpectra from the raw TimsTOF frame + /// Ion mobility is temporarily written in place of m/z inside Peak1D object. + /// raw m/z values are allocated to float data arrays with the label 'raw_mz' + std::vector extractIonMobilityTraces( + const MSSpectrum& picked_spectrum, + const MSSpectrum& raw_spectrum); + + /// compute m/z and ion mobility centers for picked traces. Returns centroided spectrum. + MSSpectrum ComputeCenters(const std::vector& mobilogram_traces, + const std::vector& picked_traces); + }; + +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake index 9b0eb7b73f3..89c08ace988 100644 --- a/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake +++ b/src/openms/include/OpenMS/PROCESSING/CENTROIDING/sources.cmake @@ -5,6 +5,7 @@ set(directory include/OpenMS/PROCESSING/CENTROIDING) set(sources_list_h PeakPickerHiRes.h PeakPickerIterative.h +PeakPickerIM.h ) ### add path to the filenames diff --git a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp index e819450dfcf..9dbe24853a8 100644 --- a/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp +++ b/src/openms/source/FEATUREFINDER/FeatureFinderAlgorithmPicked.cpp @@ -1007,7 +1007,7 @@ namespace OpenMS { //store map of abort reasons for failed seeds FeatureMap abort_map; - abort_map.reserve(abort_reasons_.size()); + abort_map.reserve( abort_reasons_.size()); Size counter = 0; for (std::map::iterator it2 = abort_reasons_.begin(); it2 != abort_reasons_.end(); ++it2, ++counter) { diff --git a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp index 8c945eac5f0..656d3eee86e 100644 --- a/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp +++ b/src/openms/source/FEATUREFINDER/GaussTraceFitter.cpp @@ -7,7 +7,7 @@ // -------------------------------------------------------------------------- #include - +#include #include #include @@ -171,19 +171,23 @@ namespace OpenMS double sig_sq = pow(sig, 2); double sig_3 = pow(sig, 3); double c_fac = -0.5 / sig_sq; + // std::cout << sig_3 << std::endl; + Size count = 0; for (Size t = 0; t < m_data->traces_ptr->size(); ++t) { const FeatureFinderAlgorithmPickedHelperStructs::MassTrace& trace = (*m_data->traces_ptr)[t]; double weight = m_data->weighted ? trace.theoretical_int : 1.0; + double constant = trace.theoretical_int * weight; for (Size i = 0; i < trace.peaks.size(); ++i) { double rt = trace.peaks[i].first; double e = exp(c_fac * pow(rt - x0, 2)); - J(count, 0) = trace.theoretical_int * e * weight; - J(count, 1) = trace.theoretical_int * height * e * (rt - x0) / sig_sq * weight; - J(count, 2) = 0.125* trace.theoretical_int* height* e* pow(rt - x0, 2) / sig_3 * weight; + double eConstant = constant * e; + J(count, 0) = eConstant; + J(count, 1) = eConstant* height * (rt - x0) / sig_sq; + J(count, 2) = 0.125* eConstant * height * pow(rt - x0, 2) / sig_3; ++count; } } diff --git a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp index d8377313848..eca1a1f82aa 100644 --- a/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/MzMLHandler.cpp @@ -18,8 +18,11 @@ #include #include + #include + + namespace OpenMS::Internal { diff --git a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp index 8f4b037e829..3d2cf4affc8 100644 --- a/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp +++ b/src/openms/source/FORMAT/HANDLERS/XMLHandler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -315,7 +316,7 @@ namespace OpenMS::Internal } // no value, although there should be a numerical value else if (term.xref_type != ControlledVocabulary::CVTerm::NONE && term.xref_type != ControlledVocabulary::CVTerm::XSD_STRING && // should be numerical - !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the binary data array, not the 'value=' attribute! + !cv.isChildOf(accession, "MS:1000513") // here the value type relates to the bits data array, not the 'value=' attribute! ) { warning(LOAD, String("The CV term '") + accession + " - " + term.name + "' used in tag '" + parent_tag + "' should have a numerical value. The value is '" + value + "'."); @@ -421,42 +422,145 @@ namespace OpenMS::Internal } } - //******************************************************************************************************************* + int StringManager::strLength(const XMLCh* input_ptr) + { + size_t processed_chars = 0; + XMLCh* pos_ptr = const_cast(input_ptr); + size_t align = (size_t)pos_ptr % 16; - StringManager::StringManager() - = default; + // Prevent crossing page boundaries + for (size_t i = 0; i < align; ++i) + { + if (pos_ptr[i] == 0) + { + return processed_chars + i; + } + ++processed_chars; + ++pos_ptr; + } + + while (true) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(pos_ptr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i cmp_zero = simde_mm_cmpeq_epi16(bits, zero); + uint16_t zero_mask = simde_mm_movemask_epi8(cmp_zero); + + if (zero_mask != 0x0000) + { + int byte_pos_zero = __builtin_ctz(zero_mask); + int char_pos_zero = byte_pos_zero / 2; + pos_ptr += char_pos_zero; + return processed_chars + char_pos_zero; + } + + pos_ptr += 8; + processed_chars += 8; + } + } - StringManager::~StringManager() - = default; + void StringManager::compress64(const XMLCh* inputIt, char* outputIt) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputIt)); + + // Select every second byte (little-endian lower byte of each UTF-16 character) + const simde__m128i shuffleMask = simde_mm_setr_epi8( + 0, 2, 4, 6, 8, 10, 12, 14, + -1, -1, -1, -1, -1, -1, -1, -1 + ); + + simde__m128i compressed = simde_mm_shuffle_epi8(bits, shuffleMask); + + // Store the lower 64 bits (8 ASCII characters) + simde_mm_storel_epi64(reinterpret_cast(outputIt), compressed); + } + + bool StringManager::isASCII(const XMLCh* chars, const XMLSize_t length) + { + if (length == 0) + { + return false; + } + + Size quotient = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + simde__m128i mask = simde_mm_set1_epi16(0xFF00); + bool bitmask = true; + + // Process blocks of 8 UTF-16 characters using SIMD + for (Size i = 0; i < quotient; ++i) + { + simde__m128i bits = simde_mm_loadu_si128(reinterpret_cast(inputPtr)); + simde__m128i zero = simde_mm_setzero_si128(); + simde__m128i andOp = simde_mm_and_si128(bits, mask); + simde__m128i cmp = simde_mm_cmpeq_epi16(andOp, zero); - void StringManager::appendASCII(const XMLCh * chars, const XMLSize_t length, String & result) + if (simde_mm_movemask_epi8(cmp) != 0xFFFF) + { + bitmask = false; + break; + } + + inputPtr += 8; + } + + // Check remaining characters individually + for (Size i = 0; i < remainder && bitmask; ++i) { - // XMLCh are characters in UTF16 (usually stored as 16bit unsigned + if (inputPtr[i] & 0xFF00) + { + bitmask = false; + break; + } + } + + return bitmask; + } + + void StringManager::appendASCII(const XMLCh* chars, const XMLSize_t length, String& result) + { + // XMLCh are characters in UTF16 (usually stored as 16-bit unsigned // short but this is not guaranteed). // We know that the Base64 string here can only contain plain ASCII // and all bytes except the least significant one will be zero. Thus // we can convert to char directly (only keeping the least // significant byte). - const XMLCh* it = chars; - const XMLCh* end = it + length; - - size_t curr_size = result.size(); - result.resize(curr_size + length); - std::string::iterator str_it = result.begin(); - std::advance(str_it, curr_size); - while (it!=end) - { - *str_it = (char)*it; - ++str_it; - ++it; + + Size quotient = length / 8; + Size remainder = length % 8; + + const XMLCh* inputPtr = chars; + + Size currentSize = result.size(); + result.resize(currentSize + length); + char* outputPtr = &result[currentSize]; + + // Copy blocks of 8 characters at a time + for (Size i = 0; i < quotient; ++i) + { + compress64(inputPtr, outputPtr); + inputPtr += 8; + outputPtr += 8; + } + + // Copy any remaining characters individually + for (Size i = 0; i < remainder; ++i) + { + outputPtr[i] = static_cast(inputPtr[i] & 0xFF); } + } - // This is ca. 50 % faster than - // for (size_t i = 0; i < length; i++) - // { - // result[curr_size + i] = (char)chars[i]; - // } + //******************************************************************************************************************* + + StringManager::StringManager() + = default; + + StringManager::~StringManager() + = default; + + - } } // namespace OpenMS // namespace Internal diff --git a/src/openms/source/IONMOBILITY/IMTypes.cpp b/src/openms/source/IONMOBILITY/IMTypes.cpp index b351cf0a3dc..65da3f2f769 100644 --- a/src/openms/source/IONMOBILITY/IMTypes.cpp +++ b/src/openms/source/IONMOBILITY/IMTypes.cpp @@ -16,7 +16,7 @@ namespace OpenMS { const std::string NamesOfDriftTimeUnit[] = {"", "ms", "1/K0", "FAIMS_CV"}; - const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed"}; + const std::string NamesOfIMFormat[] = {"none", "concatenated", "multiple_spectra", "mixed", "centroided", "unknown"}; DriftTimeUnit toDriftTimeUnit(const std::string& dtu_string) @@ -73,33 +73,45 @@ namespace OpenMS if (occs.empty()) { return IMFormat::NONE; - } - if (occs.size() == 1 && (occs.find(IMFormat::CONCATENATED) != occs.end() || occs.find(IMFormat::MULTIPLE_SPECTRA) != occs.end())) + } + + if (occs.size() == 1) { - return *occs.begin(); + auto format = *occs.begin(); + if (format != IMFormat::CONCATENATED + && format != IMFormat::MULTIPLE_SPECTRA + && format != IMFormat::CENTROIDED) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } + return format; } - if (occs.size() == 2 && occs.find(IMFormat::CONCATENATED) != occs.end() && occs.find(IMFormat::MULTIPLE_SPECTRA) != occs.end()) + else { return IMFormat::MIXED; - } - - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "subfunction returned invalid value(s)", "Number of different values: " + String(occs.size())); + } } IMFormat IMTypes::determineIMFormat(const MSSpectrum& spec) { - bool has_float_data = spec.containsIMData(); // cache value; query is 'expensive' - bool has_drift_time = spec.getDriftTime() != DRIFTTIME_NOT_SET; - if (has_float_data && has_drift_time) + // First check if format is already set and not UNKNOWN + IMFormat current_format = spec.getIMFormat(); + if (current_format != IMFormat::UNKNOWN) { - const auto& fda = spec.getFloatDataArrays()[spec.getIMData().first]; - String array_val = fda.empty() ? "[empty]" : String(fda[0]); - throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "MSSpectrum contains both an float-data-array and a single drift time. At most one is allowed per spectrum!", String("Array: ") + array_val + ", ... <> Spec: " + spec.getDriftTime()); + return current_format; } + // If format is UNKNOWN, determine it + bool has_float_data = spec.containsIMData(); // cache value; query is 'expensive' + bool has_drift_time = spec.getDriftTime() != DRIFTTIME_NOT_SET; + if (has_float_data) { - return IMFormat::CONCATENATED; + if (has_drift_time) + { + OPENMS_LOG_DEBUG << "both drift time and IM data array found in spectrum " << spec.getNativeID() << "\n. Support for both is experimental." << std::endl; + } + return IMFormat::CONCATENATED; // TODO: or CENTROIDED. for now assume that no picking was done (otherwise we would have annotated it) } else if (has_drift_time) { diff --git a/src/openms/source/METADATA/MetaInfoInterface.cpp b/src/openms/source/METADATA/MetaInfoInterface.cpp index 9f3bf777733..18b82f689eb 100644 --- a/src/openms/source/METADATA/MetaInfoInterface.cpp +++ b/src/openms/source/METADATA/MetaInfoInterface.cpp @@ -14,12 +14,6 @@ using namespace std; namespace OpenMS { - - MetaInfoInterface::MetaInfoInterface() : - meta_(nullptr) - { - } - /// Copy constructor MetaInfoInterface::MetaInfoInterface(const MetaInfoInterface& rhs) : meta_(nullptr) diff --git a/src/openms/source/METADATA/SpectrumSettings.cpp b/src/openms/source/METADATA/SpectrumSettings.cpp index d9a5b045359..a605f025a2f 100644 --- a/src/openms/source/METADATA/SpectrumSettings.cpp +++ b/src/openms/source/METADATA/SpectrumSettings.cpp @@ -9,7 +9,6 @@ #include #include -#include // for equality using namespace std; @@ -18,23 +17,6 @@ namespace OpenMS const std::string SpectrumSettings::NamesOfSpectrumType[] = {"Unknown", "Centroid", "Profile"}; - SpectrumSettings::SpectrumSettings() : - MetaInfoInterface(), - type_(UNKNOWN), - native_id_(), - comment_(), - instrument_settings_(), - source_file_(), - acquisition_info_(), - precursors_(), - products_(), - identification_(), - data_processing_() - { - } - - SpectrumSettings::~SpectrumSettings() = default; - bool SpectrumSettings::operator==(const SpectrumSettings & rhs) const { return MetaInfoInterface::operator==(rhs) && @@ -94,6 +76,20 @@ namespace OpenMS type_ = type; } + void SpectrumSettings::setIMFormat(const IMFormat& im_type) + { + if (im_type == IMFormat::MIXED) + { + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Single spectrum can't have MIXED ion mobility format.", "SIZE_OF_IMFORMAT"); + } + im_type_ = im_type; + } + + IMFormat SpectrumSettings::getIMFormat() const + { + return im_type_; + } + const String & SpectrumSettings::getComment() const { return comment_; diff --git a/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp new file mode 100644 index 00000000000..d876e052fbe --- /dev/null +++ b/src/openms/source/PROCESSING/CENTROIDING/PeakPickerIM.cpp @@ -0,0 +1,797 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace OpenMS +{ + double PeakPickerIM::computeOptimalSamplingRate(const std::vector& spectra) + { + std::vector mz_diffs; + + for (size_t s = 0; s < spectra.size(); ++s) + { + const MSSpectrum& spectrum = spectra[s]; + // The spectrum could have multiple peaks at the same x position. + // Sum the peak intensity + MSSpectrum summed_trace = SumFrame(spectrum, 7000.0); + + if (summed_trace.size() < 20) + { + std::cerr << "Skipping trace " << s << " because it has too few points (" + << summed_trace.size() << ")." << std::endl; + continue; // skip this spectrum + } + + for (size_t i = 1; i < summed_trace.size(); ++i) + { + double diff = summed_trace[i].getMZ() - summed_trace[i - 1].getMZ(); + mz_diffs.push_back(diff); + } + } + + // If we found no valid m/z differences + if (mz_diffs.empty()) + { + std::cerr << "Warning: No valid m/z differences found in any spectra. Using default sampling rate of 0.01" << std::endl; + return 0.01; // Fallback value + } + + // Sort the differences to compute the 75th percentile threshold + // This is needed in case there is a gap in the mobilogram. i+1 peak will skew the computed + // sampling rate. + std::sort(mz_diffs.begin(), mz_diffs.end()); + + size_t percentile_index = static_cast(mz_diffs.size() * 0.75); + double threshold = mz_diffs[percentile_index]; + + std::cerr << "75th percentile of position differences is: " << threshold << std::endl; + + // Filter out large differences (keep diffs <= threshold) + std::vector small_mz_diffs; + for (double diff : mz_diffs) + { + if (diff <= threshold) + { + small_mz_diffs.push_back(diff); + } + } + + if (small_mz_diffs.empty()) + { + std::cerr << "Warning: No valid small m/z differences found after filtering. Using default sampling rate of 0.01" << std::endl; + return 0.01; + } + + // Compute the mode + std::map freq_map; + for (double diff : small_mz_diffs) + { + freq_map[diff]++; + } + + double mode_sampling_rate = small_mz_diffs.front(); + int max_count = 0; + + for (const auto& [diff, count] : freq_map) + { + if (count > max_count) + { + mode_sampling_rate = diff; + max_count = count; + } + } + + std::cout << "Computed optimal sampling rate: " << mode_sampling_rate << std::endl; + + return mode_sampling_rate; + } + + // Function to compute the lower and upper m/z bounds based on ppm tolerance + std::pair PeakPickerIM::ppmBounds(double mz, double ppm) + { + ppm = ppm / 1e6; + double delta_mz = (ppm * mz) / 2.0; + + double low = mz - delta_mz; + double high = mz + delta_mz; + + return std::make_pair(low, high); + } + + // This function sums peaks if they are nearly identical + // OpenMS represents TimsTOF data in MSSpectrum() objects as one-array. + // There could be multiple 500.0 m/z peaks with different ion mobility values. + // Peak picking (such as HiRes) will not work properly if there are multiple y measurements at a given x m/z position. + MSSpectrum PeakPickerIM::SumFrame(const MSSpectrum& input_spectrum, double ppm_tolerance) + { + MSSpectrum export_spectrum; + + if (input_spectrum.empty()) return export_spectrum; + + MSSpectrum spectrum = input_spectrum; + + if (!spectrum.isSorted()) + { + std::cout << "Spectrum not sorted by m/z, sorting now..." << std::endl; + spectrum.sortByPosition(); + } + + double current_mz = spectrum[0].getMZ(); + double current_intensity = spectrum[0].getIntensity(); + + for (Size i = 1; i < spectrum.size(); ++i) + { + double next_mz = spectrum[i].getMZ(); + double next_intensity = spectrum[i].getIntensity(); + + double delta_mz = std::abs(next_mz - current_mz); + double ppm_diff = (delta_mz / current_mz) * 1e6; + + if (ppm_diff <= ppm_tolerance) + { + current_intensity += next_intensity; + } + else + { + Peak1D peak; + peak.setMZ(current_mz); + peak.setIntensity(current_intensity); + export_spectrum.push_back(peak); + + current_mz = next_mz; + current_intensity = next_intensity; + } + } + Peak1D last_peak; + last_peak.setMZ(current_mz); + last_peak.setIntensity(current_intensity); + export_spectrum.push_back(last_peak); + + return export_spectrum; + } + + // We use peak FWHM (from PeakPickerHiRes) to extract ion mobility traces. + // Given a picked m/z peak, we write a temporary MSSpectrum() object with ion mobility measurements + // in place of m/z in Peak1D object. This facilitates peak picking in the ion mobility dimension. + // To enable recomputing of m/z center after ion mobility peak picking, we tack raw m/z peak values + // in FloatDataArrays(). + + std::vector PeakPickerIM::extractIonMobilityTraces( + const MSSpectrum& picked_spectrum, + const MSSpectrum& raw_spectrum) + { + const auto& float_data_arrays = picked_spectrum.getFloatDataArrays(); + + // Find FWHM array in picked_spectrum + const MSSpectrum::FloatDataArray* fwhm_array = nullptr; + + for (const auto& array : float_data_arrays) + { + if (array.getName() == "FWHM_ppm") + { + fwhm_array = &array; + break; + } + } + + if (!fwhm_array) + { + std::cerr << "FWHM data array not found!" << std::endl; + return {}; + } + + if (fwhm_array->size() != picked_spectrum.size()) + { + std::cerr << "Size mismatch between FWHM array and picked peaks!" << std::endl; + return {}; + } + + // Get the Ion Mobility array from raw_spectrum + const auto& raw_float_data_arrays = raw_spectrum.getFloatDataArrays(); + const MSSpectrum::FloatDataArray* ion_mobility_array = nullptr; + + for (const auto& array : raw_float_data_arrays) + { + if (array.getName() == "Ion Mobility") + { + ion_mobility_array = &array; + break; + } + } + + if (!ion_mobility_array) + { + std::cerr << "Ion Mobility data array not found in raw_spectrum!" << std::endl; + return {}; + } + + // Vector of MSSpectra for each picked m/z peak (each spectrum is a mobilogram trace) + std::vector mobility_traces; + + for (size_t i = 0; i < picked_spectrum.size(); ++i) + { + double picked_mz = picked_spectrum[i].getMZ(); + double fwhm_ppm = (*fwhm_array)[i]; + + auto bounds = ppmBounds(picked_mz, fwhm_ppm); + double lower_bound = bounds.first; + double upper_bound = bounds.second; + + SignedSize center_idx = raw_spectrum.findNearest(picked_mz); + + if (center_idx == -1) + { + std::cerr << "No raw peaks found near picked m/z: " << picked_mz << std::endl; + mobility_traces.push_back(MSSpectrum()); + continue; + } + + MSSpectrum trace_spectrum; // A single mobilogram trace + // Prepare FloatDataArray to store raw m/z values + MSSpectrum::FloatDataArray raw_mz_array; + raw_mz_array.setName("raw_mz"); + + // Expand left + SignedSize left_idx = center_idx; + while (left_idx >= 0 && raw_spectrum[left_idx].getMZ() >= lower_bound) + { + Peak1D p; + p.setMZ((*ion_mobility_array)[left_idx]); // Ion Mobility as m/z + p.setIntensity(raw_spectrum[left_idx].getIntensity()); + + trace_spectrum.push_back(p); + + // Store the raw m/z + raw_mz_array.push_back(raw_spectrum[left_idx].getMZ()); + + --left_idx; + } + + // Expand right + SignedSize right_idx = center_idx + 1; + while (right_idx < static_cast(raw_spectrum.size()) && + raw_spectrum[right_idx].getMZ() <= upper_bound) + { + Peak1D p; + p.setMZ((*ion_mobility_array)[right_idx]); // Ion Mobility as m/z + p.setIntensity(raw_spectrum[right_idx].getIntensity()); + + trace_spectrum.push_back(p); + + // Store the raw m/z data in floatDataArrays() + raw_mz_array.push_back(raw_spectrum[right_idx].getMZ()); + + ++right_idx; + } + + // Attach the raw m/z array to trace_spectrum + auto& trace_float_arrays = trace_spectrum.getFloatDataArrays(); + trace_float_arrays.push_back(std::move(raw_mz_array)); + + // Sort the trace_spectrum by ion mobility (m/z), while keeping raw m/z aligned + trace_spectrum.sortByPosition(); + + mobility_traces.push_back(std::move(trace_spectrum)); + } + + return mobility_traces; + } + + // Function to compute m/z centers from mobilogram_traces and picked_traces + MSSpectrum PeakPickerIM::ComputeCenters(const std::vector& mobilogram_traces, + const std::vector& picked_traces) + { + MSSpectrum centroided_frame; + + // Create float data arrays to house ion mobility data and peaks FWHM + MSSpectrum::FloatDataArray ion_mobility_array; + ion_mobility_array.setName("Ion Mobility"); + + MSSpectrum::FloatDataArray ion_mobility_fwhm; + ion_mobility_fwhm.setName("IM FWHM"); + + MSSpectrum::FloatDataArray mz_fwhm_array; + mz_fwhm_array.setName("MZ FWHM"); + + // debug + std::cout << "picked_traces.size(): " << picked_traces.size() << std::endl; + + // Loop over picked traces and their corresponding raw mobilogram traces + for (size_t i = 0; i < picked_traces.size(); ++i) + { + std::cout << "Looping through picked_trace that has .. " << picked_traces[i].size() << std::endl; + const MSSpectrum& picked_trace = picked_traces[i]; + const MSSpectrum& raw_trace = mobilogram_traces[i]; + + const auto& picked_float_arrays = picked_trace.getFloatDataArrays(); + + if (picked_float_arrays.empty()) + { + std::cerr << "No IM FWHM array found for picked_trace " << i << "!" << std::endl; + continue; + } + + // Assuming the first FloatDataArray holds the ion mobility peak FWHM values + const auto& fwhm_array = picked_float_arrays[0]; + + if (fwhm_array.size() != picked_trace.size()) + { + std::cerr << "FWHM array size mismatch with picked_trace size!" << std::endl; + continue; + } + + // Get the FloatDataArrays from raw_trace (assumed to hold the raw m/z values) + const auto& raw_float_arrays = raw_trace.getFloatDataArrays(); + + if (raw_float_arrays.empty()) + { + std::cerr << "No raw m/z peaks found for raw_trace " << i << "!" << std::endl; + continue; + } + + // Assume the first array holds the raw m/z values + const auto& raw_mz_values = raw_float_arrays[0]; + + if (raw_mz_values.size() != raw_trace.size()) + { + std::cerr << "raw_mz_values size mismatch with raw_trace size!" << std::endl; + continue; + } + + std::cout << "\n--- Processing picked_trace " << i << " ---\n"; + + // Iterate through picked peaks in this trace + for (Size j = 0; j < picked_trace.size(); ++j) + { + double centroid_im = picked_trace[j].getMZ(); // Ion mobility centroid (stored as m/z) + double fwhm = fwhm_array[j]; + + + double im_lower = centroid_im - (fwhm / 2.0); + double im_upper = centroid_im + (fwhm / 2.0); + + std::cout << "Picked peak " << j << " IM centroid: " << centroid_im + << " ion mobility FWHM: " << fwhm + << " --> IM bounds: [" << im_lower << ", " << im_upper << "]" << std::endl; + + // Use findNearest() to get the index of the closest peak in the raw mobilogram trace + SignedSize center_idx = raw_trace.findNearest(centroid_im); + + if (center_idx == -1) + { + std::cerr << "Could not find nearest peak to centroid_im in raw_trace!" << std::endl; + continue; + } + + MSSpectrum raw_peaks_within_bounds; + + // --- Expand Left --- + SignedSize left_idx = center_idx; + while (left_idx >= 0 && raw_trace[left_idx].getMZ() >= im_lower) + { + Peak1D new_peak; + new_peak.setMZ(raw_mz_values[left_idx]); // m/z from FloatDataArray + new_peak.setIntensity(raw_trace[left_idx].getIntensity()); // intensity from raw_trace + raw_peaks_within_bounds.push_back(new_peak); + + --left_idx; + } + + // --- Expand Right --- + SignedSize right_idx = center_idx + 1; + while (right_idx < static_cast(raw_trace.size()) && + raw_trace[right_idx].getMZ() <= im_upper) + { + Peak1D new_peak; + new_peak.setMZ(raw_mz_values[right_idx]); + new_peak.setIntensity(raw_trace[right_idx].getIntensity()); + raw_peaks_within_bounds.push_back(new_peak); + + ++right_idx; + } + + std::cout << "Picked IM peak " << j << ": collected " << raw_peaks_within_bounds.size() + << " raw m/z points between IM [" << im_lower << ", " << im_upper << "]" << std::endl; + + // If we only retrieved one raw peak, pass it over to centroided_frame as is + // Resampling and smoothing the raw data distorts the intensity values. + // We recompute the m/z peak maxima and intensity using spline + if (raw_peaks_within_bounds.size() == 1) + { + const Peak1D& single_peak = raw_peaks_within_bounds.front(); + + // Add it directly to centroided_frame + centroided_frame.push_back(single_peak); + + // Push corresponding ion mobility and FWHM arrays + ion_mobility_array.push_back(centroid_im); + ion_mobility_fwhm.push_back(fwhm); + mz_fwhm_array.push_back(0.0); + + std::cout << "[INFO] Only one raw peak found. Added directly to centroided_frame. m/z: " + << single_peak.getMZ() << " intensity: " << single_peak.getIntensity() << std::endl; + + // Skip the rest of the loop and move on to the next picked_trace peak + continue; + } + + + + raw_peaks_within_bounds.sortByPosition(); + + MSSpectrum raw_mz_peaks = SumFrame(raw_peaks_within_bounds, 0.1); + // Prepare data for spline + std::map peak_raw_data; + + for (const auto& peak : raw_mz_peaks) + { + peak_raw_data[peak.getMZ()] = peak.getIntensity(); + } + if (peak_raw_data.empty()) + { + std::cerr << "No data in raw_mz_peaks for picked IM peak " << j << "!" << std::endl; + continue; + } + + // Initialize spline + CubicSpline2d spline(peak_raw_data); + + // Define boundaries + const double left_bound = peak_raw_data.begin()->first; + const double right_bound = peak_raw_data.rbegin()->first; + + // Find maximum via spline bisection + double apex_mz = (left_bound + right_bound) / 2.0; + double apex_intensity = 0.0; + + const double max_search_threshold = 1e-6; + + Math::spline_bisection(spline, left_bound, right_bound, apex_mz, apex_intensity, max_search_threshold); + + std::cout << "Apex m/z: " << apex_mz << std::endl; + std::cout << "Apex intensity: " << apex_intensity << std::endl; + + // FWHM calculation (same binary search as before) + double half_height = apex_intensity / 2.0; + const double fwhm_search_threshold = 0.01 * half_height; + + // ---- Left side search ---- + double mz_left = left_bound; + double mz_center = apex_mz; + double int_mid = 0.0; + double mz_mid = mz_left; + + if (spline.eval(mz_left) > half_height) + { + mz_mid = mz_left; + } + else + { + do + { + mz_mid = (mz_left + mz_center) / 2.0; + int_mid = spline.eval(mz_mid); + + if (int_mid < half_height) + { + mz_left = mz_mid; + } + else + { + mz_center = mz_mid; + } + + } while (std::fabs(int_mid - half_height) > fwhm_search_threshold); + } + double fwhm_left_mz = mz_mid; + + // ---- Right side search ---- + double mz_right = right_bound; + mz_center = apex_mz; + + if (spline.eval(mz_right) > half_height) + { + mz_mid = mz_right; + } + else + { + do + { + mz_mid = (mz_right + mz_center) / 2.0; + int_mid = spline.eval(mz_mid); + + if (int_mid < half_height) + { + mz_right = mz_mid; + } + else + { + mz_center = mz_mid; + } + + } while (std::fabs(int_mid - half_height) > fwhm_search_threshold); + } + double fwhm_right_mz = mz_mid; + + // ---- FWHM result ---- + double mz_fwhm = fwhm_right_mz - fwhm_left_mz; + + std::cout << "Left m/z at half height: " << fwhm_left_mz << std::endl; + std::cout << "Right m/z at half height: " << fwhm_right_mz << std::endl; + std::cout << "m/z FWHM: " << mz_fwhm << std::endl; + + + Peak1D centroided_peak; + centroided_peak.setMZ(apex_mz); + centroided_peak.setIntensity(apex_intensity); + + centroided_frame.push_back(centroided_peak); + ion_mobility_array.push_back(centroid_im); + ion_mobility_fwhm.push_back(fwhm); + mz_fwhm_array.push_back(mz_fwhm); + } + + std::cout << "--- Finished processing picked_trace " << i << " ---\n\n"; + } + + auto& centroided_frame_fda = centroided_frame.getFloatDataArrays(); + centroided_frame_fda.push_back(std::move(ion_mobility_array)); + centroided_frame_fda.push_back(std::move(ion_mobility_fwhm)); + centroided_frame_fda.push_back(std::move(mz_fwhm_array)); + + centroided_frame.sortByPosition(); + + std::cout << "Printing centroided_frame inside ComputerCenters function " << std::endl; + for (const auto& peak : centroided_frame) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + return centroided_frame; + } + + + PeakPickerIM::PeakPickerIM() : + parameters_(getDefaultParameters()) + { + } + + // Destructor + PeakPickerIM::~PeakPickerIM() + { + } + + Param PeakPickerIM::getDefaultParameters() const + { + Param p; + p.setValue("signal_to_noise", 0.0, "Signal to noise threshold for peak picking"); + p.setValue("spacing_difference_gap", 0.0, "The extension of a peak is stopped if the spacing between two subsequent data points exceeds 'spacing_difference_gap * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. '0' to disable the constraint. Not applicable to chromatograms."); + p.setValue("spacing_difference", 0.0, "Maximum allowed difference between points during peak extension, in multiples of the minimal difference between the peak apex and its two neighboring points. If this difference is exceeded a missing point is assumed (see parameter 'missing'). A higher value implies a less stringent peak definition, since individual signals within the peak are allowed to be further apart. '0' to disable the constraint. Not applicable to chromatograms."); + p.setValue("missing", 0, "Maximum number of missing points allowed when extending a peak to the left or to the right. A missing data point occurs if the spacing between two subsequent data points exceeds 'spacing_difference * min_spacing'. 'min_spacing' is the smaller of the two spacings from the peak apex to its two neighboring points. Not applicable to chromatograms."); + p.setValue("report_FWHM", "true"); + p.setValue("report_FWHM_unit", "absolute"); + return p; + } + void PeakPickerIM::updateMembers_() + { + // This function is a placeholder for potential future use. + } + + void PeakPickerIM::setParameters(const Param& param) + { + parameters_ = param; + updateMembers_(); + } + + Param PeakPickerIM::getParameters() const + { + return parameters_; + } + + void PeakPickerIM::pickIMTraces(MSSpectrum& spectrum) + { + // Debugging: Print the input spectrum size + std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; + + /* + // IM format determination (Temporarily commented out) + IMFormat format = IMTypes::determineIMFormat(spectrum); + switch (format) + { + case IMFormat::NONE: + return; // no IM data + case IMFormat::CENTROIDED: + return; // already centroided + case IMFormat::UNKNOWN: + throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + "IMFormat set to UNKNOWN after determineIMFormat. This should never happen.", + String(NamesOfIMFormat[(size_t)format])); + } + */ + + + // --- Step 1a: Sum m/z peaks + // First, we project all timsTOF peaks into the m/z axis using SumFrame + // The ppm tolerance is a dynamic way of testing m/z floats being almost identical. Set it to 0.1 ppm + MSSpectrum summed_spectrum = SumFrame(spectrum, 0.1); + std::cout << "Spectrum after SumFrame has " << summed_spectrum.size() << " peaks." << std::endl; + + // --- step 2a: smooth the data projected to the m/z axis. + std::cout << "Applying Gaussian smoothing..." << std::endl; + GaussFilter gauss_filter; + + // Set Gaussian filter parameters. 5 ppm m/z is good approximation (make this a user parameter!) + Param gauss_params; + gauss_params.setValue("ppm_tolerance", 5.0); + gauss_params.setValue("use_ppm_tolerance", "true"); + + gauss_filter.setParameters(gauss_params); + gauss_filter.filter(summed_spectrum); + std::cout << "Spectrum after Gaussian smoothing has " << summed_spectrum.size() << " peaks." << std::endl; + + for (const auto& peak : summed_spectrum) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + // ---step 3a: Apply PeakPickerHiRes and make sure the peak FWHM is reported in ppm. + // (Maybe this should change to absolute FWHM value...?). + PeakPickerHiRes picker; + Param hirez_mz_p; + hirez_mz_p.setValue("signal_to_noise", 0.0); + hirez_mz_p.setValue("report_FWHM", "true"); + hirez_mz_p.setValue("report_FWHM_unit", "relative"); + + picker.setParameters(hirez_mz_p); + MSSpectrum picked_spectrum; + + picker.pick(summed_spectrum, picked_spectrum); + std::cout << "Size of picked spectrum: " << picked_spectrum.size() << std::endl; + + + // ---step 4a: Extract ion mobility traces for each picked m/z peak + auto mobilogram_traces = extractIonMobilityTraces(picked_spectrum, spectrum); + + // --- compute optimal sampling rate from well-populated mobilograms in this frame. + // This is currently set to +20 peaks in a mobilogram. (Should this be a user parameter?) + + // Add a parameter to allow user to control sampling). Here we simply multiply by 4. + double sampling_rate = computeOptimalSamplingRate(mobilogram_traces) * 4; + Param resampler_param; + resampler_param.setValue("spacing", sampling_rate, "Spacing of the resampled output peaks."); + resampler_param.setValue("ppm", "false", "Whether spacing is in ppm or Th"); + + std::cout << "Using sampling rate... : " << sampling_rate << std::endl; + + + + // *************** PART II *************** + + // for each ion mobility trace, we process the raw signal, peak pick + // and recompute m/z and ion mobility centroid. + for (size_t i = 0; i < mobilogram_traces.size(); ++i) + { + std::cout << "Trace " << i << " contains " << mobilogram_traces[i].size() << " points in ion mobility space." << std::endl; + } + + std::vector picked_traces; + + for (size_t i = 0; i < mobilogram_traces.size(); ++i) + { + MSSpectrum& trace = mobilogram_traces[i]; + + std::cout << "\n--- Processing Trace " << i << " ---\n"; + std::cout << "Original trace has " << trace.size() << " peaks." << std::endl; + + // --- Step 1b: Sum peaks that are too close --- + MSSpectrum summed_trace = SumFrame(trace, 7000.0); // 7000 ppm tolerance (temporary. Change function to accept absolute number) + std::cout << "Trace after SumFrame has " << summed_trace.size() << " peaks." << std::endl; + + // Determine im boundaries of current mobilogram. Add 10 padding points (should this be a parameter?) + // If you do not pad the edges, peaks on the edge will have an odd shape and not be picked by PeakPickerHiRes! + double im_start = summed_trace.front().getMZ(); + double im_end = summed_trace.back().getMZ(); + + std::cout << "Original summed trace ion mobility range: [" << im_start << ", " << im_end << "]" << std::endl; + + Peak1D front_padding; + front_padding.setMZ(im_start - 10 * sampling_rate); + front_padding.setIntensity(0.0); + summed_trace.insert(summed_trace.begin(), front_padding); + + Peak1D back_padding; + back_padding.setMZ(im_end + 10 * sampling_rate); + back_padding.setIntensity(0.0); + summed_trace.push_back(back_padding); + + std::cout << "Padded summed trace im range: [" << summed_trace.front().getMZ() << ", " << summed_trace.back().getMZ() << "]" << std::endl; + + // --- Step 2b: Resample the trace --- + LinearResamplerAlign lin_resampler; + lin_resampler.setParameters(resampler_param); + + lin_resampler.raster(summed_trace); + std::cout << "Size of resampled trace: " << summed_trace.size() << " peaks." << std::endl; + + // --- Step 3b: Apply Gaussian Smoothing --- + /* + + std::cout << "Applying Gaussian smoothing..." << std::endl; + + GaussFilter gauss_filter; + Param gauss_params; + gauss_params.setValue("gaussian_width", 0.005); + gauss_params.setValue("use_ppm_tolerance", "false"); // or "true" for ppm-based width + + gauss_filter.setParameters(gauss_params); + gauss_filter.filter(summed_trace); + + std::cout << "Trace after Gaussian smoothing has " << summed_trace.size() << " peaks." << std::endl; + */ + + // --- Step 3b: Apply SGolay Smoothing --- + SavitzkyGolayFilter sgolay_filter; + Param sgolay_params; + + sgolay_params.setValue("frame_length", 15); + sgolay_params.setValue("polynomial_order", 3); + sgolay_filter.setParameters(sgolay_params); + + sgolay_filter.filter(summed_trace); + + std::cout << "Trace after Savitzky-Golay smoothing has " << summed_trace.size() << " peaks." << std::endl; + + for (const auto& peak : summed_trace) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + // --- Step 4b: Apply PeakPickerHiRes --- + // We will use ion mobility peak FWHM to define min/max ion mobility boundaries. + // Revisit the raw traces and compute intensity weighted ion mobility centroids. + // The ion mobility traces also contains raw m/z peaks in FloatDataArrays. + // This makes it convenient to re-compute m/z centroid. + PeakPickerHiRes picker; + picker.setParameters(parameters_); + + MSSpectrum picked_trace; + picker.pick(summed_trace, picked_trace); + // populated picked_traces vector + picked_traces.push_back(picked_trace); + + std::cout << "Size of picked trace: " << picked_trace.size() << " peaks." << std::endl; + + std::cout << "--- Finished Processing Trace " << i << " ---\n\n"; + } + + // Recompute m/z centers and output centroided frame + MSSpectrum centroided_frame = ComputeCenters(mobilogram_traces, picked_traces); + std::cout << "--- Centroided frame has been generated. It has " << centroided_frame.size() << " --- peaks."; + + // Replace the input spectrum with the centroided result + spectrum = centroided_frame; + std::cout << "--- Spectrum final output object has .. " << spectrum.size() << " --- peaks."; + // Print peaks for debugging + for (const auto& peak : spectrum) + { + std::cout << "m/z: " << peak.getMZ() << ", intensity: " << peak.getIntensity() << std::endl; + } + + } +} // namespace OpenMS \ No newline at end of file diff --git a/src/openms/source/PROCESSING/CENTROIDING/sources.cmake b/src/openms/source/PROCESSING/CENTROIDING/sources.cmake index 2b711b6d653..128169e8225 100644 --- a/src/openms/source/PROCESSING/CENTROIDING/sources.cmake +++ b/src/openms/source/PROCESSING/CENTROIDING/sources.cmake @@ -5,6 +5,7 @@ set(directory source/PROCESSING/CENTROIDING) set(sources_list PeakPickerHiRes.cpp PeakPickerIterative.cpp +PeakPickerIM.cpp ) ### add path to the filenames diff --git a/src/pyOpenMS/pxds/MSSpectrum.pxd b/src/pyOpenMS/pxds/MSSpectrum.pxd index 7797164d224..801d457e9cd 100644 --- a/src/pyOpenMS/pxds/MSSpectrum.pxd +++ b/src/pyOpenMS/pxds/MSSpectrum.pxd @@ -85,9 +85,12 @@ cdef extern from "" namespace "OpenMS": void setDriftTime(double) except + nogil # wrap-doc:Sets the drift time (-1 if not set) DriftTimeUnit getDriftTimeUnit() except + nogil String getDriftTimeUnitAsString() except + nogil - void setDriftTimeUnit(DriftTimeUnit dt) except + nogil + void setDriftTimeUnit(DriftTimeUnit dt) except + nogil - bool containsIMData() except + nogil + IMFormat getIMType() except + nogil # wrap-doc:Returns the ion mobility format + void setIMType(IMFormat im_type) except + nogil # wrap-doc:Sets the ion mobility format + + bool containsIMData() except + nogil libcpp_pair[Size, DriftTimeUnit] getIMData() except + nogil # wrap-ignore wrap-doc:Returns position of ion mobility float data array and drift time unit unsigned int getMSLevel() except + nogil # wrap-doc:Returns the MS level diff --git a/src/pyOpenMS/pxds/PeakPickerIM.pxd b/src/pyOpenMS/pxds/PeakPickerIM.pxd new file mode 100644 index 00000000000..7600a0de9d6 --- /dev/null +++ b/src/pyOpenMS/pxds/PeakPickerIM.pxd @@ -0,0 +1,17 @@ +from libcpp cimport bool +from Types cimport * +from MSSpectrum cimport * + +cdef extern from "" namespace "OpenMS": + + cdef cppclass PeakPickerIM(DefaultParamHandler): + # wrap-doc: + # Peak picking algorithm for ion mobility data + + PeakPickerIM() except + nogil + PeakPickerIM(PeakPickerIM &) except + nogil + +cdef extern from "" namespace "OpenMS::PeakPickerIM": + + # static members + void pickIMTraces(MSSpectrum& s) except + nogil # wrap-attach:PeakPickerIM wrap-doc:use trace detection for IM peak picking. diff --git a/src/tests/class_tests/openms/executables.cmake b/src/tests/class_tests/openms/executables.cmake index ee14ae722f4..e8a4112800c 100644 --- a/src/tests/class_tests/openms/executables.cmake +++ b/src/tests/class_tests/openms/executables.cmake @@ -256,6 +256,7 @@ set(format_executables_list UnimodXMLFile_test XMassFile_test XMLFile_test + XMLHandler_test XMLValidator_test XQuestResultXMLFile_test XTandemInfile_test @@ -569,6 +570,8 @@ set(transformations_executables_list ModelDescription_test PeakPickerHiRes_test PeakPickerIterative_test + PeakPickerIM_test + PeakPickerIM_test_2 PeakWidthEstimator_test SeedListGenerator_test TraceFitter_test diff --git a/src/tests/class_tests/openms/source/IMTypes_test.cpp b/src/tests/class_tests/openms/source/IMTypes_test.cpp index 86069f396e1..5258279d4de 100644 --- a/src/tests/class_tests/openms/source/IMTypes_test.cpp +++ b/src/tests/class_tests/openms/source/IMTypes_test.cpp @@ -126,14 +126,13 @@ START_SECTION(static IMFormat determineIMFormat(const MSExperiment& exp)) } { - // set both ... invalid! + // set both ... is valid (typically concatenated + some average value) auto IMwithFDA2 = IMwithFDA; IMwithFDA2.setDriftTime(123.4); MSExperiment exp; exp.addSpectrum(IMwithDrift); exp.addSpectrum(IMwithFDA); exp.addSpectrum(IMwithFDA2); - TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(exp)) } END_SECTION @@ -147,11 +146,9 @@ START_SECTION(static IMFormat determineIMFormat(const MSSpectrum& spec)) // convert to IM-Frame with float meta-data array TEST_EQUAL(IMTypes::determineIMFormat(IMwithFDA) == IMFormat::CONCATENATED, true) - // set both ... invalid! + // set both ... is valid (typically concatenated + some average value) auto IMwithFDA2 = IMwithFDA; IMwithFDA2.setDriftTime(123.4); - TEST_EXCEPTION(Exception::InvalidValue, IMTypes::determineIMFormat(IMwithFDA2)) - END_SECTION ///////////////////////////////////////////////////////////// diff --git a/src/tests/class_tests/openms/source/MSSpectrum_test.cpp b/src/tests/class_tests/openms/source/MSSpectrum_test.cpp index 7739259ce17..567af33010d 100644 --- a/src/tests/class_tests/openms/source/MSSpectrum_test.cpp +++ b/src/tests/class_tests/openms/source/MSSpectrum_test.cpp @@ -1391,6 +1391,14 @@ START_SECTION(PeakType::IntensityType calculateTIC() const) END_SECTION +START_SECTION(void setIMFormat(IMFormat imf)) +{ + // test invalid format validation + MSSpectrum spec; + TEST_EXCEPTION(Exception::InvalidValue, spec.setIMFormat(IMFormat::MIXED)); // this should trigger the validation check because a single spectrum can't be mixed +} +END_SECTION + START_SECTION(void clear(bool clear_meta_data)) { MSSpectrum edit; diff --git a/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp new file mode 100644 index 00000000000..39afa73d7a0 --- /dev/null +++ b/src/tests/class_tests/openms/source/PeakPickerIM_test.cpp @@ -0,0 +1,88 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include + +/////////////////////////// +#include +/////////////////////////// + +using namespace OpenMS; +using namespace std; + +START_TEST(PeakPickerIM, "$Id$") + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + +PeakPickerIM* ptr = nullptr; +PeakPickerIM* nullPointer = nullptr; + +START_SECTION((PeakPickerIM())) + ptr = new PeakPickerIM(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((virtual ~PeakPickerIM())) + delete ptr; +END_SECTION + + +// create dummy ion mobility spectrum +MSSpectrum input; +{ + const size_t points_per_im = 10; + + // For each ion mobility value (1.0 to 10.0) + for (double im = 1.0; im <= 10.0; im += 1.0) + { + double baseIntensity = (im <= 5.0) ? + 200.0 + ((im - 1.0) * 200.0) : // Increasing intensity from IM 1.0 to 5.0 + 1000.0 - ((im - 5.0) * 200.0); // Decreasing intensity from IM 6.0 to 10.0 + + // Create 10 data points for each ion mobility value + for (size_t i = 0; i < points_per_im; i++) + { + double mz = 100.0 + (i * 0.01); + double intensity = baseIntensity + (i * 20.0); + input.emplace_back(mz, intensity); + } + } + + // Set up the ion mobility array + input.getFloatDataArrays().resize(1); + input.getFloatDataArrays()[0].setName("Ion Mobility"); + + // Add ion mobility values (each repeated 10 times) + for (double im = 1.0; im <= 10.0; im += 1.0) + { + for (size_t i = 0; i < points_per_im; i++) + { + input.getFloatDataArrays()[0].push_back(im); + } + } +} + +START_SECTION(void pickIMTraces(MSSpectrum& spectrum)) +{ + PeakPickerIM pp_im; + pp_im.pickIMTraces(input); + // TODO adapt + TEST_EQUAL(input.size(), 10) + TEST_REAL_SIMILAR(input[0].getIntensity(), 450) + TEST_REAL_SIMILAR(input[0].getMZ(), 100.02) + + TEST_EQUAL(input.getFloatDataArrays().size(), 1) + TEST_EQUAL(input.getFloatDataArrays()[0].getName(), "Ion Mobility") + TEST_REAL_SIMILAR(input.getFloatDataArrays()[0][0], 150.0) +} +END_SECTION + +END_TEST \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp b/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp new file mode 100644 index 00000000000..6a996c030b4 --- /dev/null +++ b/src/tests/class_tests/openms/source/PeakPickerIM_test_2.cpp @@ -0,0 +1,98 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Timo Sachsenberg, Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include + +/////////////////////////// +#include +/////////////////////////// + +using namespace OpenMS; +using namespace std; + +START_TEST(PeakPickerIM, "$Id$") + +///////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////// + +PeakPickerIM* ptr = nullptr; +PeakPickerIM* nullPointer = nullptr; + +START_SECTION((PeakPickerIM())) + ptr = new PeakPickerIM(); + TEST_NOT_EQUAL(ptr, nullPointer) +END_SECTION + +START_SECTION((virtual ~PeakPickerIM())) + delete ptr; +END_SECTION + + +// create dummy ion mobility spectrum +MSSpectrum input; +{ + double start_mz = 0.8; + double end_mz = 1.2; + double step = 0.01; + double max_intensity = 1000.0; // Maximum intensity at m/z = 1.0 + + for (double mz = start_mz; mz <= end_mz; mz += step) + { + double intensity = 0.0; + if (mz <= 1.0) + { + // Linearly increase intensity from start_mz to 1.0 + intensity = ((mz - start_mz) / (1.0 - start_mz)) * max_intensity; + } + else + { + // Linearly decrease intensity from 1.0 to end_mz + intensity = ((end_mz - mz) / (end_mz - 1.0)) * max_intensity; + } + input.emplace_back(mz, intensity); + } + // Set the IM format to CONCATENATED to force the peak picking branch + input.setIMFormat(IMFormat::CONCATENATED); +// input.getFloatDataArrays().resize(1); +// input.getFloatDataArrays()[0].setName("Ion Mobility"); +} + + +START_SECTION(void pickIMTraces(MSSpectrum& spectrum)) +{ + PeakPickerIM pp_im; + // print all peaks in our current input. + std::cout << "start printing dummy spectrum BEFORE picking! " << std::endl; + for (const auto& peak : input) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + + pp_im.pickIMTraces(input); + std::cout << "start printing dummy spectrum AFTER picking! " << std::endl; + for (const auto& peak : input) + { + std::cout << "m/z: " << peak.getMZ() + << ", intensity: " << peak.getIntensity() << std::endl; + } + + // TODO adapt + TEST_EQUAL(input.size(), 10) + TEST_REAL_SIMILAR(input[0].getIntensity(), 450) + TEST_REAL_SIMILAR(input[0].getMZ(), 100.02) + +// TEST_EQUAL(input.getFloatDataArrays().size(), 1) + // TEST_EQUAL(input.getFloatDataArrays()[0].getName(), "Ion Mobility") + // TEST_REAL_SIMILAR(input.getFloatDataArrays()[0][0], 150.0) +} +END_SECTION + +END_TEST \ No newline at end of file diff --git a/src/tests/class_tests/openms/source/XMLHandler_test.cpp b/src/tests/class_tests/openms/source/XMLHandler_test.cpp new file mode 100644 index 00000000000..b55c6ad27d7 --- /dev/null +++ b/src/tests/class_tests/openms/source/XMLHandler_test.cpp @@ -0,0 +1,149 @@ + +#include +#include +#include +#include +#include + +#include + +using namespace OpenMS::Internal; + + + + + +START_TEST(StringManager, "$Id$") + + +const XMLCh russianHello[] = { + 0x041F, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x043C, + 0x0438, 0x0440, // "Привет мир" (Hello World in Russian) +}; +XMLSize_t r_length = xercesc::XMLString::stringLen(russianHello); + +const XMLCh ascii[] = { + 0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F, + 0x0072,0x006C,0x0064,0x0021, 0x0000}; +XMLSize_t a_length = xercesc::XMLString::stringLen(ascii); + +const XMLCh mixed[] = { + 0x0048, 0x0065,0x0432, 0x0435, 0x0442, 0x043C, 0x006F, + 0x0072,0x006C,0x0064, 0x0021, 0x0000 }; +XMLSize_t m_length = xercesc::XMLString::stringLen(mixed); + +const XMLCh empty[] = {0}; +XMLSize_t e_length = xercesc::XMLString::stringLen(empty); + +const XMLCh upperBoundary [] = {0x00FF,0x00FF,0x0000}; +XMLSize_t u_length = xercesc::XMLString::stringLen(upperBoundary); + +bool isAscii = false; + +START_SECTION(isASCII(const XMLCh * chars, const XMLSize_t length)) + isAscii = StringManager::isASCII(ascii,a_length); + std::cout << "1 \n"; + TEST_TRUE(isAscii) + isAscii = StringManager::isASCII(russianHello,r_length); + std::cout << "2 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(mixed,m_length); + std::cout << "3 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(empty,e_length); + std::cout << "4 \n"; + TEST_FALSE(isAscii) + isAscii = StringManager::isASCII(upperBoundary,u_length); + std::cout << "5 \n"; + TEST_TRUE(isAscii) +END_SECTION + +const XMLCh eight_block_negative[] = {0x0148,0x0165,0x016C,0x016C,0x016F,0x012C,0x0157,0x016F}; + +const XMLCh eight_block[] = {0x0048,0x0065,0x006C,0x006C,0x006F,0x002C,0x0057,0x006F}; + +const XMLCh eight_block_mixed[] ={0x0042,0x0045,0x004C,0x0041,0x0142,0x0145,0x014C,0x0141}; + +const XMLCh eight_block_kadabra[] = { + 0x004B, // K + 0x0041, // A + 0x0044, // D + 0x0041, // A + 0x0042, // B + 0x0052, // R + 0x0041, // A + 0x0021 // ! +}; + +START_SECTION(compress64 (const XMLCh* input_it, char* output_it)) + std::string o1_str(8,'\0'); + StringManager::compress64(eight_block,o1_str.data()); + std::string res1_str = "Hello,Wo"; + TEST_STRING_EQUAL(o1_str,res1_str); + + + std::string o2_str(8,'\0'); + StringManager::compress64(eight_block_negative,o2_str.data()); + std::string res2_str = res1_str; + TEST_STRING_EQUAL(o2_str, res2_str); + + + std::string o3_str(8,'\0'); + // char res3 [9] = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + // res3[8] = '\0'; + StringManager::compress64(eight_block_mixed,o3_str.data()); + std::string res3_str = {0x42,0x45,0x4C,0x41,0x42,0x45,0x4C,0x41}; + TEST_STRING_EQUAL(o3_str, res3_str); + + std::string o4_str(12,'\0'); + o4_str [0] ='A'; + o4_str [1] ='B'; + o4_str [2] ='R'; + o4_str [3] ='A'; + + StringManager::compress64(eight_block_kadabra,((o4_str.data())+4)); + std::string res4_str = "ABRAKADABRA!"; + TEST_STRING_EQUAL(o4_str, res4_str); + +END_SECTION + +//Tests Number of Chars not Dividable by 8 +OpenMS::String o5_str; +std::string res5_str = "Hello,World!"; + +//Checks how the Function handles Data thats already stored in Output string +OpenMS::String o6_str = "Gruess Gott und "; +std::string res6_str = "Gruess Gott und Hello,World!"; + +OpenMS::String o7_str; +std::string res7_str = ""; + + +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + + StringManager::appendASCII(ascii,a_length,o5_str); + TEST_STRING_EQUAL(o5_str, res5_str); + + StringManager::appendASCII(ascii,a_length,o6_str); + TEST_STRING_EQUAL(o6_str, res6_str); + + StringManager::appendASCII(empty,e_length,o7_str); + TEST_STRING_EQUAL(o7_str, res7_str); + std::cout << o7_str.size() << std::endl; + +END_SECTION + +START_SECTION(appendASCII(const XMLCh * chars, const XMLSize_t length, String & result)) + int o_length = StringManager::strLength(ascii); + TEST_EQUAL(o_length, a_length); + o_length = StringManager::strLength(empty); + TEST_EQUAL(o_length, e_length); + o_length = StringManager::strLength(upperBoundary); + TEST_EQUAL(o_length, u_length); +END_SECTION + +END_TEST + + + + diff --git a/src/topp/PeakPickerIM.cpp b/src/topp/PeakPickerIM.cpp new file mode 100644 index 00000000000..e751d1b089a --- /dev/null +++ b/src/topp/PeakPickerIM.cpp @@ -0,0 +1,69 @@ +// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin +// SPDX-License-Identifier: BSD-3-Clause +// +// -------------------------------------------------------------------------- +// $Author: Mohammed Alhigaylan $ +// $Maintainer: Timo Sachsenberg $ +// ------------------------------------------------------------------------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include + +using namespace OpenMS; +using namespace std; + +class TOPPPeakPickerIM : public TOPPBase +{ +public: + TOPPPeakPickerIM() : TOPPBase("PeakPickerIM", "Applies PeakPickerIM to an mzML file", false) {} + +protected: + void registerOptionsAndFlags_() override + { + registerInputFile_("in", "", "", "Input mzML file"); + setValidFormats_("in", { "mzML" }); + + registerOutputFile_("out", "", "", "Output mzML file"); + setValidFormats_("out", { "mzML" }); + } + + ExitCodes main_(int, const char**) override + { + // Get input and output file paths + String input_file = getStringOption_("in"); + String output_file = getStringOption_("out"); + + // Load input mzML file + PeakMap exp; + MzMLFile mzml; + mzml.load(input_file, exp); + + // Process each spectrum with PeakPickerIM + PeakPickerIM picker; + PeakMap processed_exp; + for (MSSpectrum& spectrum : exp) + { + std::cout << "Processing spectrum with " << spectrum.size() << " peaks." << std::endl; + picker.pickIMTraces(spectrum); + processed_exp.addSpectrum(spectrum); + std::cout << "Processed spectrum has " << spectrum.size() << " peaks." << std::endl; + } +// processed_exp.setExperimentalSettings(exp); + // Save output mzML file + mzml.store(output_file, processed_exp); + + return EXECUTION_OK; + } +}; + +int main(int argc, const char** argv) +{ + TOPPPeakPickerIM tool; + return tool.main(argc, argv); +} + diff --git a/src/topp/executables.cmake b/src/topp/executables.cmake index a56d472c322..2d561b7d2b7 100644 --- a/src/topp/executables.cmake +++ b/src/topp/executables.cmake @@ -100,6 +100,7 @@ OpenSwathFeatureXMLToTSV OpenSwathRTNormalizer PeakPickerHiRes PeakPickerIterative +PeakPickerIM PeptideIndexer PercolatorAdapter PhosphoScoring