From 2c283323671afa61734e219641250ed50de16487 Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 4 May 2023 22:17:30 +0200 Subject: [PATCH 01/24] Push MTD --- .../DATAREDUCTION/MassTraceDetection.h | 11 +- .../DATAREDUCTION/MassTraceDetection.cpp | 548 +++++++++++------- 2 files changed, 350 insertions(+), 209 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 65f004ac46a..dce1b3e9a6f 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -118,6 +118,15 @@ namespace OpenMS std::vector & found_masstraces, const Size max_traces = 0); + // Find Offset for Peak + double find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); + + // calc_right_border_ + Size calc_right_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); + + // calc_left_border_ + Size calc_left_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); + // parameter stuff double mass_error_ppm_; double mass_error_da_; @@ -133,4 +142,4 @@ namespace OpenMS bool reestimate_mt_sd_; }; -} +} \ No newline at end of file diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index dc841ae99f5..ba51bc56d5e 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -38,6 +38,13 @@ #include +#include +// #include +// #include +#include + +#include + namespace OpenMS { MassTraceDetection::MassTraceDetection() : @@ -193,6 +200,7 @@ namespace OpenMS void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { + // make sure the output vector is empty found_masstraces.clear(); @@ -251,11 +259,12 @@ namespace OpenMS // discard last spectrum's offset spec_offsets.pop_back(); + //hatten wir mal parallelisiert std::sort(chrom_apices.begin(), chrom_apices.end(), - [](const Apex & a, + [&work_exp](const Apex & a, const Apex & b) -> bool - { - return a.intensity < b.intensity; + { + return (work_exp[a.scan_idx][a.peak_idx].getMZ()) < (work_exp[b.scan_idx][b.peak_idx].getMZ()); }); // ********************************************************************* @@ -267,6 +276,40 @@ namespace OpenMS return; } // end of MassTraceDetection::run + double MassTraceDetection::find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) + { + double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); //create function + // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); // mit formel ergänzen header ppm to dalton + double offset = 3 * ftl_sd; + return offset; + } + + Size MassTraceDetection::calc_right_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) + { + double right_bound = (input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ()) + (3 /* border gerade 3 mal so groß wie "noetig" */ * find_offset_(peak_index_in_apices_vec, mass_error_ppm_, input_exp, apices_vec)); + Size j{}; + while (input_exp[apices_vec[peak_index_in_apices_vec + j].scan_idx][apices_vec[peak_index_in_apices_vec + j].peak_idx].getMZ() <= right_bound) + { + if(peak_index_in_apices_vec + j >= apices_vec.size()) break; + ++j; + } + return peak_index_in_apices_vec + j; + } + + Size MassTraceDetection::calc_left_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) + { + double left_bound = (input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ()) -( 3 /* border gerade 3 mal so groß wie "noetig" */ * find_offset_(peak_index_in_apices_vec, mass_error_ppm_, input_exp, apices_vec)); + Size j{}; + while (input_exp[apices_vec[peak_index_in_apices_vec - j].scan_idx][apices_vec[peak_index_in_apices_vec - j].peak_idx].getMZ() >= left_bound) + { + if(peak_index_in_apices_vec - j == 0) break; + ++j; + } + return peak_index_in_apices_vec - j; + } + + void MassTraceDetection::run_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, @@ -274,7 +317,7 @@ namespace OpenMS std::vector& found_masstraces, const Size max_traces) { - boost::dynamic_bitset<> peak_visited(total_peak_count); + // boost::dynamic_bitset<> peak_visited_1(total_peak_count); Size trace_number(1); // check presence of FWHM meta data @@ -303,271 +346,360 @@ namespace OpenMS this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); - for (auto m_it = chrom_apices.crbegin(); m_it != chrom_apices.crend(); ++m_it) + + // Size binnumber{1}; + Size binnumber = omp_get_max_threads(); + + Size binsize = chrom_apices.size()/binnumber; + Size bin_tmp_start{0}; + std::vector binstarts; + std::vector binends; + std::vector cutoff_mz; + + if(binnumber > 1) { - Size apex_scan_idx(m_it->scan_idx); - Size apex_peak_idx(m_it->peak_idx); + binstarts.push_back(0); + cutoff_mz.push_back(0); + bin_tmp_start += binsize; + binends.push_back(calc_right_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); + cutoff_mz.push_back(bin_tmp_start-1); - if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + for(Size i = 1; i < binnumber - 1; ++i) { - continue; + + binstarts.push_back(calc_left_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); + bin_tmp_start += binsize; + cutoff_mz.push_back(bin_tmp_start-1); + // std::cout << "Index: " << bin_tmp_start << '\n'; + binends.push_back(calc_right_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); + } - Peak2D apex_peak; - apex_peak.setRT(work_exp[apex_scan_idx].getRT()); - apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); - apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); + binstarts.push_back(calc_left_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); + binends.push_back(chrom_apices.size()); + cutoff_mz.push_back(chrom_apices.size()-1); + } + else + { + binstarts.push_back(0); + binends.push_back(chrom_apices.size()); + cutoff_mz.push_back(0); + cutoff_mz.push_back(chrom_apices.size()-1); + } - Size trace_up_idx(apex_scan_idx); - Size trace_down_idx(apex_scan_idx); + // for(int i=0; i < binstarts.size(); ++i) + // { + // std::cout << "Start: " << binstarts[i] << " End: " << binends[i] << '\n'; + // } - std::list current_trace; - current_trace.push_back(apex_peak); - std::vector fwhms_mz; // peak-FWHM meta values of collected peaks - // Initialization for the iterative version of weighted m/z mean calculation - double centroid_mz(apex_peak.getMZ()); - double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); - double prev_denom(apex_peak.getIntensity()); + #pragma omp parallel for num_threads(binnumber) + for (Size i = 0; i < binnumber; ++i) + { + boost::dynamic_bitset<> peak_visited_1(total_peak_count); + std::vector chrom_apices_2 = {chrom_apices.cbegin() + binstarts[i], chrom_apices.cbegin() + binends[i]}; // copies because of bin overlap + std::sort(chrom_apices_2.begin(), chrom_apices_2.end(), // sort by intensity + [](const Apex & a, const Apex & b) -> bool + { + return a.intensity > b.intensity; + }); + for (auto m_it = chrom_apices_2.cbegin(); m_it != chrom_apices_2.cend(); ++m_it) // iterate reverse from high intensity to low intensity + { + // bool outer_loop = false; - updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + // auto endpeak = chrom_apices.cbegin() + binends[i] - 1; + // Size end_scan_idx(endpeak->scan_idx); + // Size end_peak_idx(endpeak->peak_idx); + // double end_peak_mz = work_exp[end_scan_idx][end_peak_idx].getMZ(); - std::vector > gathered_idx; - gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); - } + // auto firstpeak = chrom_apices.cbegin() + binstarts[i]; + // Size first_scan_idx(firstpeak->scan_idx); + // Size first_peak_idx(firstpeak->peak_idx); + // double first_peak_mz = work_exp[first_scan_idx][first_peak_idx].getMZ(); - Size up_hitting_peak(0), down_hitting_peak(0); - Size up_scan_counter(0), down_scan_counter(0); - bool toggle_up = true, toggle_down = true; - Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); - Size max_consecutive_missing(trace_termination_outliers_); + Size apex_scan_idx(m_it->scan_idx); + Size apex_peak_idx(m_it->peak_idx); - double current_sample_rate(1.0); - // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); - Size min_scans_to_consider(5); + if (peak_visited_1[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + continue; + } - // double outlier_ratio(0.3); + Peak2D apex_peak; + apex_peak.setRT(work_exp[apex_scan_idx].getRT()); + apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); + apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); - // double ftl_mean(centroid_mz); - double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); - double intensity_so_far(apex_peak.getIntensity()); + Size trace_up_idx(apex_scan_idx); + Size trace_down_idx(apex_scan_idx); - while (((trace_down_idx > 0) && toggle_down) || - ((trace_up_idx < work_exp.size() - 1) && toggle_up) - ) - { - // *********************************************************** // - // Step 2.1 MOVE DOWN in RT dim - // *********************************************************** // - if ((trace_down_idx > 0) && toggle_down) + std::list current_trace; + double startint = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); + current_trace.push_back(apex_peak); + std::vector fwhms_mz; // peak-FWHM meta values of collected peaks + + // Initialization for the iterative version of weighted m/z mean calculation + double centroid_mz(apex_peak.getMZ()); + double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); + double prev_denom(apex_peak.getIntensity()); + + updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + + std::vector > gathered_idx; + gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); + if (fwhm_meta_idx != -1) { - const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; - if (!spec_trace_down.empty()) - { - Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); - double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); - double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); + fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); + } + + Size up_hitting_peak(0), down_hitting_peak(0); + Size up_scan_counter(0), down_scan_counter(0); + + bool toggle_up = true, toggle_down = true; + + Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); + Size max_consecutive_missing(trace_termination_outliers_); - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + double current_sample_rate(1.0); + // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); + Size min_scans_to_consider(5); - if ((next_down_peak_mz <= right_bound) && - (next_down_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] - ) + // double outlier_ratio(0.3); + + // double ftl_mean(centroid_mz); + // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double ftl_sd(Math::ppmToMass(mass_error_ppm_, centroid_mz)); + double intensity_so_far(apex_peak.getIntensity()); + + while (((trace_down_idx > 0) && toggle_down) || + ((trace_up_idx < work_exp.size() - 1) && toggle_up) + ) + { + // *********************************************************** // + // Step 2.1 MOVE DOWN in RT dim + // *********************************************************** // + if ((trace_down_idx > 0) && toggle_down) + { + const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; + if (!spec_trace_down.empty()) { - Peak2D next_peak; - next_peak.setRT(spec_trace_down.getRT()); - next_peak.setMZ(next_down_peak_mz); - next_peak.setIntensity(next_down_peak_int); - - current_trace.push_front(next_peak); - // FWHM average - if (fwhm_meta_idx != -1) + Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); + double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); + double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; + + if ((next_down_peak_mz <= right_bound) && + (next_down_peak_mz >= left_bound) && + !peak_visited_1[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] + ) { - fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); + if(next_down_peak_int > startint) + { + std::cout << "Down Bigger than start\n"; + } - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) - { - // if (ftl_t > min_fwhm_scans) + Peak2D next_peak; + next_peak.setRT(spec_trace_down.getRT()); + next_peak.setMZ(next_down_peak_mz); + next_peak.setIntensity(next_down_peak_int); + + current_trace.push_front(next_peak); + // FWHM average + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); + } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); + + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + // if (ftl_t > min_fwhm_scans) + { + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + } } + + ++down_hitting_peak; + conseq_missed_peak_down = 0; + } + else + { + ++conseq_missed_peak_down; } - ++down_hitting_peak; - conseq_missed_peak_down = 0; } - else - { - ++conseq_missed_peak_down; - } - - } - --trace_down_idx; - ++down_scan_counter; + --trace_down_idx; + ++down_scan_counter; - // trace termination criterion: max allowed number of - // consecutive outliers reached OR cancel extension if - // sampling_rate falls below min_sample_rate_ - if (trace_termination_criterion_ == "outlier") - { - if (conseq_missed_peak_down > max_consecutive_missing) + // trace termination criterion: max allowed number of + // consecutive outliers reached OR cancel extension if + // sampling_rate falls below min_sample_rate_ + if (trace_termination_criterion_ == "outlier") { - toggle_down = false; + if (conseq_missed_peak_down > max_consecutive_missing) + { + toggle_down = false; + } } - } - else if (trace_termination_criterion_ == "sample_rate") - { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / - (double)(down_scan_counter + up_scan_counter + 1); - if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + else if (trace_termination_criterion_ == "sample_rate") { - // std::cout << "stopping down..." << std::endl; - toggle_down = false; + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / + (double)(down_scan_counter + up_scan_counter + 1); + if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + { + // std::cout << "stopping down..." << std::endl; + toggle_down = false; + } } } - } - // *********************************************************** // - // Step 2.2 MOVE UP in RT dim - // *********************************************************** // - if ((trace_up_idx < work_exp.size() - 1) && toggle_up) - { - const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; - if (!spec_trace_up.empty()) + // *********************************************************** // + // Step 2.2 MOVE UP in RT dim + // *********************************************************** // + if ((trace_up_idx < work_exp.size() - 1) && toggle_up) { - Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); - double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); - double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; - - if ((next_up_peak_mz <= right_bound) && - (next_up_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; + if (!spec_trace_up.empty()) { - Peak2D next_peak; - next_peak.setRT(spec_trace_up.getRT()); - next_peak.setMZ(next_up_peak_mz); - next_peak.setIntensity(next_up_peak_int); + Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); + double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); + double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - current_trace.push_back(next_peak); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) + if ((next_up_peak_mz <= right_bound) && + (next_up_peak_mz >= left_bound) && + !peak_visited_1[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - // if (ftl_t > min_fwhm_scans) + if(next_up_peak_int > startint) { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + std::cout << "Up Bigger than start\n"; } - } + Peak2D next_peak; + next_peak.setRT(spec_trace_up.getRT()); + next_peak.setMZ(next_up_peak_mz); + next_peak.setIntensity(next_up_peak_int); - ++up_hitting_peak; - conseq_missed_peak_up = 0; + current_trace.push_back(next_peak); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); + } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); - } - else - { - ++conseq_missed_peak_up; - } + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) + { + // if (ftl_t > min_fwhm_scans) + { + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + } + } - } + ++up_hitting_peak; + conseq_missed_peak_up = 0; - ++trace_up_idx; - ++up_scan_counter; + } + else + { + ++conseq_missed_peak_up; + } - if (trace_termination_criterion_ == "outlier") - { - if (conseq_missed_peak_up > max_consecutive_missing) - { - toggle_up = false; } - } - else if (trace_termination_criterion_ == "sample_rate") - { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + ++trace_up_idx; + ++up_scan_counter; + + if (trace_termination_criterion_ == "outlier") { - // std::cout << "stopping up" << std::endl; - toggle_up = false; + if (conseq_missed_peak_up > max_consecutive_missing) + { + toggle_up = false; + } } - } - + else if (trace_termination_criterion_ == "sample_rate") + { + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); + if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + { + // std::cout << "stopping up" << std::endl; + toggle_up = false; + } + } + } } - } + // if(outer_loop) continue; + double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); - // std::cout << "current sr: " << current_sample_rate << std::endl; - double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); + double mt_quality((double)current_trace.size() / (double)num_scans); + // std::cout << "mt quality: " << mt_quality << std::endl; + double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - double mt_quality((double)current_trace.size() / (double)num_scans); - // std::cout << "mt quality: " << mt_quality << std::endl; - double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - - // *********************************************************** // - // Step 2.3 check if minimum length and quality of mass trace criteria are met - // *********************************************************** // - bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) - { - // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; - - // mark all peaks as visited - for (Size i = 0; i < gathered_idx.size(); ++i) - { - peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; - } - - // create new MassTrace object and store collected peaks from list current_trace - MassTrace new_trace(current_trace); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) + // *********************************************************** // + // Step 2.3 check if minimum length and quality of mass trace criteria are met + // *********************************************************** // + bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); - } - new_trace.setQuantMethod(quant_method_); - //new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; + // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; - found_masstraces.push_back(new_trace); - - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); + // mark all peaks as visited + for (Size i = 0; i < gathered_idx.size(); ++i) + { + peak_visited_1[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } - // check if we already reached the (optional) maximum number of traces - if (max_traces > 0 && found_masstraces.size() == max_traces) - { - break; + // create new MassTrace object and store collected peaks from list current_trace + MassTrace new_trace(current_trace); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + } + new_trace.setQuantMethod(quant_method_); + //new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); + + + #pragma omp critical (add_trace) + { + // Size leftbound_index = binsize * i; + // Size rightbound_index = binsize * (i+1); + Size leftbound_index = cutoff_mz[i]; + Size rightbound_index = cutoff_mz[i+1]; + double leftbound_mz = work_exp[chrom_apices[leftbound_index].scan_idx][chrom_apices[leftbound_index].peak_idx].getMZ(); + double rightbound_mz = work_exp[chrom_apices[rightbound_index].scan_idx][chrom_apices[rightbound_index].peak_idx].getMZ(); + if(new_trace.getCentroidMZ() >= leftbound_mz && + new_trace.getCentroidMZ() < rightbound_mz) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } + } + // check if we already reached the (optional) maximum number of traces + if (max_traces > 0 && found_masstraces.size() == max_traces) + { + break; + } } - } + } } - this->endProgress(); - } void MassTraceDetection::updateMembers_() @@ -585,4 +717,4 @@ namespace OpenMS reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); } -} +} \ No newline at end of file From 54fcc16c84bcae42fe99e2d0fd21d4ac1497f611 Mon Sep 17 00:00:00 2001 From: Tristan Date: Mon, 15 May 2023 15:51:56 +0200 Subject: [PATCH 02/24] Deque for MassTrace --- src/openms/include/OpenMS/KERNEL/MassTrace.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/openms/include/OpenMS/KERNEL/MassTrace.h b/src/openms/include/OpenMS/KERNEL/MassTrace.h index 9110e93e277..510d6410f4d 100644 --- a/src/openms/include/OpenMS/KERNEL/MassTrace.h +++ b/src/openms/include/OpenMS/KERNEL/MassTrace.h @@ -88,6 +88,15 @@ namespace OpenMS /// Detailed constructor for vector MassTrace(const std::vector& trace_peaks); + /// Constructor for typr T with move iterator + template + MassTrace(InputIterator begin, InputIterator end) : + trace_peaks_(std::make_move_iterator(begin), std::make_move_iterator(end)), + label_(), + smoothed_intensities_() + { + } + /// Destructor ~MassTrace() = default; From 97ab6d2feb8d9ffa3bd70fb4e86a995d7ada679a Mon Sep 17 00:00:00 2001 From: Tristan Date: Mon, 15 May 2023 16:12:18 +0200 Subject: [PATCH 03/24] Works but not pretty or finished, but works --- .../DATAREDUCTION/MassTraceDetection.h | 6 - .../DATAREDUCTION/MassTraceDetection.cpp | 168 +++++------------- 2 files changed, 48 insertions(+), 126 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index dce1b3e9a6f..2e6eee4b519 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -121,12 +121,6 @@ namespace OpenMS // Find Offset for Peak double find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); - // calc_right_border_ - Size calc_right_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); - - // calc_left_border_ - Size calc_left_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); - // parameter stuff double mass_error_ppm_; double mass_error_da_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index ba51bc56d5e..f0f2f3edee3 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -34,16 +34,11 @@ #include +#include #include #include - #include -// #include -// #include -#include - -#include namespace OpenMS { @@ -278,38 +273,12 @@ namespace OpenMS double MassTraceDetection::find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) { - double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); //create function - // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); - double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); // mit formel ergänzen header ppm to dalton + double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); + double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); double offset = 3 * ftl_sd; return offset; } - Size MassTraceDetection::calc_right_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) - { - double right_bound = (input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ()) + (3 /* border gerade 3 mal so groß wie "noetig" */ * find_offset_(peak_index_in_apices_vec, mass_error_ppm_, input_exp, apices_vec)); - Size j{}; - while (input_exp[apices_vec[peak_index_in_apices_vec + j].scan_idx][apices_vec[peak_index_in_apices_vec + j].peak_idx].getMZ() <= right_bound) - { - if(peak_index_in_apices_vec + j >= apices_vec.size()) break; - ++j; - } - return peak_index_in_apices_vec + j; - } - - Size MassTraceDetection::calc_left_border_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) - { - double left_bound = (input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ()) -( 3 /* border gerade 3 mal so groß wie "noetig" */ * find_offset_(peak_index_in_apices_vec, mass_error_ppm_, input_exp, apices_vec)); - Size j{}; - while (input_exp[apices_vec[peak_index_in_apices_vec - j].scan_idx][apices_vec[peak_index_in_apices_vec - j].peak_idx].getMZ() >= left_bound) - { - if(peak_index_in_apices_vec - j == 0) break; - ++j; - } - return peak_index_in_apices_vec - j; - } - - void MassTraceDetection::run_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, @@ -317,7 +286,6 @@ namespace OpenMS std::vector& found_masstraces, const Size max_traces) { - // boost::dynamic_bitset<> peak_visited_1(total_peak_count); Size trace_number(1); // check presence of FWHM meta data @@ -347,57 +315,55 @@ namespace OpenMS Size peaks_detected(0); - // Size binnumber{1}; - Size binnumber = omp_get_max_threads(); + std::vector gaps_detected; + for(Size i = 0; i < chrom_apices.size() - 1; ++i) + { + if((work_exp[chrom_apices[i+1].scan_idx][chrom_apices[i+1].peak_idx].getMZ() - work_exp[chrom_apices[i].scan_idx][chrom_apices[i].peak_idx].getMZ()) > find_offset_(i, mass_error_ppm_, work_exp, chrom_apices)) + { + gaps_detected.push_back(i); + // std::cout << "Gap\n"; + } + } - Size binsize = chrom_apices.size()/binnumber; - Size bin_tmp_start{0}; - std::vector binstarts; - std::vector binends; - std::vector cutoff_mz; + std::cout << "gaps_detected: " << gaps_detected.size() << '\n'; - if(binnumber > 1) - { - binstarts.push_back(0); - cutoff_mz.push_back(0); - bin_tmp_start += binsize; - binends.push_back(calc_right_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); - cutoff_mz.push_back(bin_tmp_start-1); + // for(auto gap : gaps_detected) + // { + // std::cout << "gap index: " << gap << '\n'; + // } - for(Size i = 1; i < binnumber - 1; ++i) - { - - binstarts.push_back(calc_left_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); - bin_tmp_start += binsize; - cutoff_mz.push_back(bin_tmp_start-1); - // std::cout << "Index: " << bin_tmp_start << '\n'; - binends.push_back(calc_right_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); + std::vector gaps_filtered; + gaps_filtered.push_back(0); - } + Size border{}; - binstarts.push_back(calc_left_border_(bin_tmp_start-1, mass_error_ppm_, work_exp, chrom_apices)); - binends.push_back(chrom_apices.size()); - cutoff_mz.push_back(chrom_apices.size()-1); - } - else + for(Size i = 0; i < gaps_detected.size() - 1; ++i) { - binstarts.push_back(0); - binends.push_back(chrom_apices.size()); - cutoff_mz.push_back(0); - cutoff_mz.push_back(chrom_apices.size()-1); + if(gaps_detected[i+1] - gaps_detected[i - border] > 30000) + { + gaps_filtered.push_back(gaps_detected[i] + 1); + border = 0; + } + else + { + ++border; + } } + gaps_filtered.push_back(chrom_apices.size() - 1); - // for(int i=0; i < binstarts.size(); ++i) + std::cout << "gaps_detected filtered: " << gaps_filtered.size()-2 << '\n'; + + // for(auto gap : gaps_filtered) // { - // std::cout << "Start: " << binstarts[i] << " End: " << binends[i] << '\n'; + // std::cout << "filtered gap index: " << gap << '\n'; // } + boost::dynamic_bitset<> peak_visited(total_peak_count); + Size threads_number = gaps_filtered.size()-1; - - #pragma omp parallel for num_threads(binnumber) - for (Size i = 0; i < binnumber; ++i) + #pragma omp parallel for num_threads(threads_number) + for (Size i = 0; i < gaps_filtered.size() - 1; ++i) { - boost::dynamic_bitset<> peak_visited_1(total_peak_count); - std::vector chrom_apices_2 = {chrom_apices.cbegin() + binstarts[i], chrom_apices.cbegin() + binends[i]}; // copies because of bin overlap + std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap std::sort(chrom_apices_2.begin(), chrom_apices_2.end(), // sort by intensity [](const Apex & a, const Apex & b) -> bool { @@ -405,24 +371,11 @@ namespace OpenMS }); for (auto m_it = chrom_apices_2.cbegin(); m_it != chrom_apices_2.cend(); ++m_it) // iterate reverse from high intensity to low intensity { - // bool outer_loop = false; - - // auto endpeak = chrom_apices.cbegin() + binends[i] - 1; - // Size end_scan_idx(endpeak->scan_idx); - // Size end_peak_idx(endpeak->peak_idx); - // double end_peak_mz = work_exp[end_scan_idx][end_peak_idx].getMZ(); - - // auto firstpeak = chrom_apices.cbegin() + binstarts[i]; - // Size first_scan_idx(firstpeak->scan_idx); - // Size first_peak_idx(firstpeak->peak_idx); - // double first_peak_mz = work_exp[first_scan_idx][first_peak_idx].getMZ(); - - Size apex_scan_idx(m_it->scan_idx); Size apex_peak_idx(m_it->peak_idx); - if (peak_visited_1[spec_offsets[apex_scan_idx] + apex_peak_idx]) + if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; } @@ -435,8 +388,7 @@ namespace OpenMS Size trace_up_idx(apex_scan_idx); Size trace_down_idx(apex_scan_idx); - std::list current_trace; - double startint = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); + std::deque current_trace; current_trace.push_back(apex_peak); std::vector fwhms_mz; // peak-FWHM meta values of collected peaks @@ -493,13 +445,9 @@ namespace OpenMS if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && - !peak_visited_1[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] + !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] ) { - if(next_down_peak_int > startint) - { - std::cout << "Down Bigger than start\n"; - } Peak2D next_peak; next_peak.setRT(spec_trace_down.getRT()); @@ -576,12 +524,8 @@ namespace OpenMS if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && - !peak_visited_1[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - if(next_up_peak_int > startint) - { - std::cout << "Up Bigger than start\n"; - } Peak2D next_peak; next_peak.setRT(spec_trace_up.getRT()); next_peak.setMZ(next_up_peak_mz); @@ -639,7 +583,6 @@ namespace OpenMS } } - // if(outer_loop) continue; double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); double mt_quality((double)current_trace.size() / (double)num_scans); @@ -650,18 +593,14 @@ namespace OpenMS // Step 2.3 check if minimum length and quality of mass trace criteria are met // *********************************************************** // bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_ ) { - // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; - - // mark all peaks as visited for (Size i = 0; i < gathered_idx.size(); ++i) { - peak_visited_1[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; } - // create new MassTrace object and store collected peaks from list current_trace - MassTrace new_trace(current_trace); + MassTrace new_trace(current_trace.begin(), current_trace.end()); new_trace.updateWeightedMeanRT(); new_trace.updateWeightedMeanMZ(); if (!fwhms_mz.empty()) @@ -672,24 +611,13 @@ namespace OpenMS //new_trace.setCentroidSD(ftl_sd); new_trace.updateWeightedMZsd(); - #pragma omp critical (add_trace) - { - // Size leftbound_index = binsize * i; - // Size rightbound_index = binsize * (i+1); - Size leftbound_index = cutoff_mz[i]; - Size rightbound_index = cutoff_mz[i+1]; - double leftbound_mz = work_exp[chrom_apices[leftbound_index].scan_idx][chrom_apices[leftbound_index].peak_idx].getMZ(); - double rightbound_mz = work_exp[chrom_apices[rightbound_index].scan_idx][chrom_apices[rightbound_index].peak_idx].getMZ(); - if(new_trace.getCentroidMZ() >= leftbound_mz && - new_trace.getCentroidMZ() < rightbound_mz) - { + { new_trace.setLabel("T" + String(trace_number)); ++trace_number; found_masstraces.push_back(new_trace); peaks_detected += new_trace.getSize(); this->setProgress(peaks_detected); - } } // check if we already reached the (optional) maximum number of traces if (max_traces > 0 && found_masstraces.size() == max_traces) From 314ee530805bc8bb36b82bd172f3aa5d3e4e074b Mon Sep 17 00:00:00 2001 From: Tristan Date: Mon, 15 May 2023 23:33:06 +0200 Subject: [PATCH 04/24] Sort In Place, no copies, less memory --- .../FILTERING/DATAREDUCTION/MassTraceDetection.h | 2 +- .../FILTERING/DATAREDUCTION/MassTraceDetection.cpp | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 2e6eee4b519..9af27599e75 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -111,7 +111,7 @@ namespace OpenMS }; /// The internal run method - void run_(const std::vector& chrom_apices, + void run_(std::vector& chrom_apices, const Size peak_count, const PeakMap & work_exp, const std::vector& spec_offsets, diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index f0f2f3edee3..7bcfe63f0c1 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -279,7 +279,7 @@ namespace OpenMS return offset; } - void MassTraceDetection::run_(const std::vector& chrom_apices, + void MassTraceDetection::run_(std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, @@ -357,21 +357,20 @@ namespace OpenMS // { // std::cout << "filtered gap index: " << gap << '\n'; // } - boost::dynamic_bitset<> peak_visited(total_peak_count); Size threads_number = gaps_filtered.size()-1; #pragma omp parallel for num_threads(threads_number) for (Size i = 0; i < gaps_filtered.size() - 1; ++i) { - std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap - std::sort(chrom_apices_2.begin(), chrom_apices_2.end(), // sort by intensity + // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap + std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity [](const Apex & a, const Apex & b) -> bool { return a.intensity > b.intensity; }); - for (auto m_it = chrom_apices_2.cbegin(); m_it != chrom_apices_2.cend(); ++m_it) // iterate reverse from high intensity to low intensity + boost::dynamic_bitset<> peak_visited(total_peak_count); + for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity { - Size apex_scan_idx(m_it->scan_idx); Size apex_peak_idx(m_it->peak_idx); From 3704886bbe14347e9b65e556c853bb40fff80091 Mon Sep 17 00:00:00 2001 From: Tristan Date: Wed, 17 May 2023 09:45:59 +0200 Subject: [PATCH 05/24] First Steps in queue try --- .../DATAREDUCTION/MassTraceDetection.cpp | 134 +++++++++++------- 1 file changed, 79 insertions(+), 55 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 7bcfe63f0c1..192230958d2 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -259,7 +259,7 @@ namespace OpenMS [&work_exp](const Apex & a, const Apex & b) -> bool { - return (work_exp[a.scan_idx][a.peak_idx].getMZ()) < (work_exp[b.scan_idx][b.peak_idx].getMZ()); + return a.intensity > b.intensity; }); // ********************************************************************* @@ -275,7 +275,14 @@ namespace OpenMS { double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); - double offset = 3 * ftl_sd; + double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation + return offset; + } + + double MassTraceDetection::findOffset_(double centroid_mz, double mass_error_ppm_) + { + double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); + double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation return offset; } @@ -314,71 +321,80 @@ namespace OpenMS this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); + std::vector mz_locked; - std::vector gaps_detected; - for(Size i = 0; i < chrom_apices.size() - 1; ++i) - { - if((work_exp[chrom_apices[i+1].scan_idx][chrom_apices[i+1].peak_idx].getMZ() - work_exp[chrom_apices[i].scan_idx][chrom_apices[i].peak_idx].getMZ()) > find_offset_(i, mass_error_ppm_, work_exp, chrom_apices)) - { - gaps_detected.push_back(i); - // std::cout << "Gap\n"; - } - } - - std::cout << "gaps_detected: " << gaps_detected.size() << '\n'; - // for(auto gap : gaps_detected) + // #pragma omp parallel for schedule(static) + // for (Size i = 0; i < chrom_apices.size(); ++i) // { - // std::cout << "gap index: " << gap << '\n'; - // } + // // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap + // std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity + // [](const Apex & a, const Apex & b) -> bool + // { + // return a.intensity > b.intensity; + // }); + boost::dynamic_bitset<> peak_visited(total_peak_count); + // // boost::dynamic_bitset<> peak_visited(gaps_filtered[i+1]-gaps_filtered[i]); + // for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity + // #pragma omp parallel + // for (Size i = 0; i < chrom_apices.size(); ++i) + Size index = 0; + #pragma omp parallel + while(index != chrom_apices.size()) + { - std::vector gaps_filtered; - gaps_filtered.push_back(0); + // std::cout << "start\n"; + auto m_it = chrom_apices[index]; + // ++index; + // Size apex_scan_idx(m_it->scan_idx); + // Size apex_peak_idx(m_it->peak_idx); + Size apex_scan_idx(m_it.scan_idx); + Size apex_peak_idx(m_it.peak_idx); - Size border{}; + double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); - for(Size i = 0; i < gaps_detected.size() - 1; ++i) - { - if(gaps_detected[i+1] - gaps_detected[i - border] > 30000) - { - gaps_filtered.push_back(gaps_detected[i] + 1); - border = 0; - } - else - { - ++border; - } - } - gaps_filtered.push_back(chrom_apices.size() - 1); + // std::cout << "index: " << index << '\n'; - std::cout << "gaps_detected filtered: " << gaps_filtered.size()-2 << '\n'; + if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + // std::cout << "Continue peak visited\n"; + #pragma omp atomic + ++index; + continue; + } - // for(auto gap : gaps_filtered) - // { - // std::cout << "filtered gap index: " << gap << '\n'; - // } - Size threads_number = gaps_filtered.size()-1; + bool lock_found = false; + #pragma omp critical (check_lock) + { + // std::cout << "mz_locked Size: " << mz_locked.size() << '\n'; + for (const auto& lockedApex : mz_locked) + { + if ((currentApex_mz + findOffset_(currentApex_mz, mass_error_ppm_) > lockedApex) || (currentApex_mz - findOffset_(currentApex_mz, mass_error_ppm_) < lockedApex)) + { + lock_found = true; + } + } + if(lock_found) + { + // std::cout << "Lock found\n"; + } + else + { + // std::cout << "pusg back and next\n"; + mz_locked.push_back(currentApex_mz); + ++index; + } + } - #pragma omp parallel for num_threads(threads_number) - for (Size i = 0; i < gaps_filtered.size() - 1; ++i) - { - // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap - std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity - [](const Apex & a, const Apex & b) -> bool - { - return a.intensity > b.intensity; - }); - boost::dynamic_bitset<> peak_visited(total_peak_count); - for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity - { - Size apex_scan_idx(m_it->scan_idx); - Size apex_peak_idx(m_it->peak_idx); + // std::cout << "Are we here?\n"; - if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + if(lock_found) { + // std::cout << "continue\n"; continue; } + Peak2D apex_peak; apex_peak.setRT(work_exp[apex_scan_idx].getRT()); apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); @@ -624,8 +640,16 @@ namespace OpenMS break; } } - } - } + #pragma omp critical + { + auto it = std::find(mz_locked.begin(), mz_locked.end(), currentApex_mz); + if (it != mz_locked.end()) { + mz_locked.erase(it); + // std::cout << "erase\n"; + } + // std::cout << "index: " << index << '\n'; + } + } this->endProgress(); } From b3d8a9c8118ab66a0e32c7dcc3730969290c563c Mon Sep 17 00:00:00 2001 From: Tristan Date: Wed, 17 May 2023 09:59:04 +0200 Subject: [PATCH 06/24] works but not really parallel, why? --- .../source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 192230958d2..93953494393 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -339,10 +339,10 @@ namespace OpenMS // #pragma omp parallel // for (Size i = 0; i < chrom_apices.size(); ++i) Size index = 0; - #pragma omp parallel - while(index != chrom_apices.size()) + #pragma omp parallels + while(true) { - + if(index == chrom_apices.size()) break; // std::cout << "start\n"; auto m_it = chrom_apices[index]; // ++index; @@ -383,6 +383,7 @@ namespace OpenMS // std::cout << "pusg back and next\n"; mz_locked.push_back(currentApex_mz); ++index; + // if(index == chrom_apices.size()) break; } } From a9dbb3a66a758ddc96da7130b9f1291466dd6d42 Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 18 May 2023 10:15:18 +0200 Subject: [PATCH 07/24] Different outcome with different threads and slow --- .../DATAREDUCTION/MassTraceDetection.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 93953494393..a2079825c8d 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -339,9 +339,13 @@ namespace OpenMS // #pragma omp parallel // for (Size i = 0; i < chrom_apices.size(); ++i) Size index = 0; - #pragma omp parallels - while(true) + #pragma omp parallel { + // Size index = 0; + while(index < chrom_apices.size()) + { + + // std::cout << chrom_apices.size(); if(index == chrom_apices.size()) break; // std::cout << "start\n"; auto m_it = chrom_apices[index]; @@ -374,11 +378,7 @@ namespace OpenMS lock_found = true; } } - if(lock_found) - { - // std::cout << "Lock found\n"; - } - else + if(!lock_found) { // std::cout << "pusg back and next\n"; mz_locked.push_back(currentApex_mz); @@ -636,12 +636,12 @@ namespace OpenMS this->setProgress(peaks_detected); } // check if we already reached the (optional) maximum number of traces - if (max_traces > 0 && found_masstraces.size() == max_traces) - { - break; - } + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // break; + // } } - #pragma omp critical + #pragma omp critical (erase_lock) { auto it = std::find(mz_locked.begin(), mz_locked.end(), currentApex_mz); if (it != mz_locked.end()) { @@ -651,6 +651,7 @@ namespace OpenMS // std::cout << "index: " << index << '\n'; } } + } this->endProgress(); } From 4f03fd63d9a5c543da1356d2a37fc88abd5dad2c Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 18 May 2023 10:15:56 +0200 Subject: [PATCH 08/24] gitignore add smaple file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 396e9a49436..9485db60de1 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,4 @@ cmake-*-build *.code-workspace .vs .clion.* +src/tests/class_tests/openms/data/SA1_subset_verysmall.mzML From 82a6c42482560ec925ebb8d16189ece4049cd897 Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 18 May 2023 13:21:12 +0200 Subject: [PATCH 09/24] Another Approach and search_traces function --- .../DATAREDUCTION/MassTraceDetection.h | 11 +- .../DATAREDUCTION/MassTraceDetection.cpp | 570 +++++++++--------- 2 files changed, 279 insertions(+), 302 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 9af27599e75..1eedcabd8e9 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -40,6 +40,8 @@ #include #include +#include + namespace OpenMS { @@ -108,10 +110,13 @@ namespace OpenMS double intensity; Size scan_idx; Size peak_idx; + + // double upperbound_(); + // double lowerbound_(); }; /// The internal run method - void run_(std::vector& chrom_apices, + void run_(const std::vector& chrom_apices, const Size peak_count, const PeakMap & work_exp, const std::vector& spec_offsets, @@ -120,6 +125,10 @@ namespace OpenMS // Find Offset for Peak double find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); + double findOffset_(double centroid_mz, double mass_error_ppm_); + + + boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<> allowed_peaks, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); // parameter stuff double mass_error_ppm_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index a2079825c8d..1f755327e37 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -34,11 +34,9 @@ #include -#include #include #include -#include namespace OpenMS { @@ -195,7 +193,6 @@ namespace OpenMS void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { - // make sure the output vector is empty found_masstraces.clear(); @@ -254,12 +251,11 @@ namespace OpenMS // discard last spectrum's offset spec_offsets.pop_back(); - //hatten wir mal parallelisiert std::sort(chrom_apices.begin(), chrom_apices.end(), - [&work_exp](const Apex & a, + [](const Apex & a, const Apex & b) -> bool - { - return a.intensity > b.intensity; + { + return a.intensity < b.intensity; }); // ********************************************************************* @@ -271,28 +267,15 @@ namespace OpenMS return; } // end of MassTraceDetection::run - double MassTraceDetection::find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) - { - double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); - double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); - double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation - return offset; - } - - double MassTraceDetection::findOffset_(double centroid_mz, double mass_error_ppm_) - { - double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); - double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation - return offset; - } - - void MassTraceDetection::run_(std::vector& chrom_apices, + void MassTraceDetection::run_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces) { + boost::dynamic_bitset<> peak_visited(total_peak_count); + boost::dynamic_bitset<> apex_visited(total_peak_count); Size trace_number(1); // check presence of FWHM meta data @@ -321,353 +304,338 @@ namespace OpenMS this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); - std::vector mz_locked; - - - // #pragma omp parallel for schedule(static) - // for (Size i = 0; i < chrom_apices.size(); ++i) - // { - // // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap - // std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity - // [](const Apex & a, const Apex & b) -> bool - // { - // return a.intensity > b.intensity; - // }); - boost::dynamic_bitset<> peak_visited(total_peak_count); - // // boost::dynamic_bitset<> peak_visited(gaps_filtered[i+1]-gaps_filtered[i]); - // for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity - // #pragma omp parallel - // for (Size i = 0; i < chrom_apices.size(); ++i) - Size index = 0; - #pragma omp parallel + + bool trace_added = false; + Size trace_count{}; + boost::dynamic_bitset<> allowed_peaks{total_peak_count}; + while(true){ + Size current_trace_number{}; + boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); + if(trace_count == (trace_count + current_trace_number)) { - // Size index = 0; - while(index < chrom_apices.size()) + // std::cout << "break while\n"; + break; + } else { + allowed_peaks |= new_found; + // std::cout << "trace count plus\n"; + trace_count += current_trace_number; + } + } + // std::cout << "peaks detected: " << chrom_apices.size() - peaks_detected << '\n'; + this->endProgress(); - // std::cout << chrom_apices.size(); - if(index == chrom_apices.size()) break; - // std::cout << "start\n"; - auto m_it = chrom_apices[index]; - // ++index; - // Size apex_scan_idx(m_it->scan_idx); - // Size apex_peak_idx(m_it->peak_idx); - Size apex_scan_idx(m_it.scan_idx); - Size apex_peak_idx(m_it.peak_idx); + } - double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); + void MassTraceDetection::updateMembers_() + { + mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); + noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); + chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); + quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); - // std::cout << "index: " << index << '\n'; + trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); + trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); + min_sample_rate_ = (double)param_.getValue("min_sample_rate"); + min_trace_length_ = (double)param_.getValue("min_trace_length"); + max_trace_length_ = (double)param_.getValue("max_trace_length"); + reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); + } - if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - // std::cout << "Continue peak visited\n"; - #pragma omp atomic - ++index; - continue; - } - bool lock_found = false; - #pragma omp critical (check_lock) - { - // std::cout << "mz_locked Size: " << mz_locked.size() << '\n'; - for (const auto& lockedApex : mz_locked) - { - if ((currentApex_mz + findOffset_(currentApex_mz, mass_error_ppm_) > lockedApex) || (currentApex_mz - findOffset_(currentApex_mz, mass_error_ppm_) < lockedApex)) - { - lock_found = true; - } - } - if(!lock_found) - { - // std::cout << "pusg back and next\n"; - mz_locked.push_back(currentApex_mz); - ++index; - // if(index == chrom_apices.size()) break; - } - } - // std::cout << "Are we here?\n"; - if(lock_found) - { - // std::cout << "continue\n"; - continue; - } + boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<> allowed_peaks, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) + { + boost::dynamic_bitset<> apex_visited{total_peak_count}; + + #pragma omp parallel for + for (auto m_it = chrom_apices.crbegin(); m_it != chrom_apices.crend(); ++m_it) + { + bool outer_loop = false; - Peak2D apex_peak; - apex_peak.setRT(work_exp[apex_scan_idx].getRT()); - apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); - apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); + Size apex_scan_idx(m_it->scan_idx); + Size apex_peak_idx(m_it->peak_idx); - Size trace_up_idx(apex_scan_idx); - Size trace_down_idx(apex_scan_idx); + if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + continue; + } - std::deque current_trace; - current_trace.push_back(apex_peak); - std::vector fwhms_mz; // peak-FWHM meta values of collected peaks + Peak2D apex_peak; + apex_peak.setRT(work_exp[apex_scan_idx].getRT()); + apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); + apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); - // Initialization for the iterative version of weighted m/z mean calculation - double centroid_mz(apex_peak.getMZ()); - double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); - double prev_denom(apex_peak.getIntensity()); + double start_int = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); - updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + Size trace_up_idx(apex_scan_idx); + Size trace_down_idx(apex_scan_idx); - std::vector > gathered_idx; - gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); - } + std::list current_trace; + current_trace.push_back(apex_peak); + std::vector fwhms_mz; // peak-FWHM meta values of collected peaks + + // Initialization for the iterative version of weighted m/z mean calculation + double centroid_mz(apex_peak.getMZ()); + double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); + double prev_denom(apex_peak.getIntensity()); + + updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + + std::vector> gathered_idx; + gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); + } - Size up_hitting_peak(0), down_hitting_peak(0); - Size up_scan_counter(0), down_scan_counter(0); + Size up_hitting_peak(0), down_hitting_peak(0); + Size up_scan_counter(0), down_scan_counter(0); - bool toggle_up = true, toggle_down = true; + bool toggle_up = true, toggle_down = true; - Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); - Size max_consecutive_missing(trace_termination_outliers_); + Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); + Size max_consecutive_missing(trace_termination_outliers_); - double current_sample_rate(1.0); - // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); - Size min_scans_to_consider(5); + double current_sample_rate(1.0); + // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); + Size min_scans_to_consider(5); - // double outlier_ratio(0.3); + // double outlier_ratio(0.3); - // double ftl_mean(centroid_mz); - // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); - double ftl_sd(Math::ppmToMass(mass_error_ppm_, centroid_mz)); - double intensity_so_far(apex_peak.getIntensity()); + // double ftl_mean(centroid_mz); + double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double intensity_so_far(apex_peak.getIntensity()); - while (((trace_down_idx > 0) && toggle_down) || - ((trace_up_idx < work_exp.size() - 1) && toggle_up) - ) + while (((trace_down_idx > 0) && toggle_down) || + ((trace_up_idx < work_exp.size() - 1) && toggle_up)) + { + // *********************************************************** // + // Step 2.1 MOVE DOWN in RT dim + // *********************************************************** // + if ((trace_down_idx > 0) && toggle_down) { - // *********************************************************** // - // Step 2.1 MOVE DOWN in RT dim - // *********************************************************** // - if ((trace_down_idx > 0) && toggle_down) + const MSSpectrum &spec_trace_down = work_exp[trace_down_idx - 1]; + if (!spec_trace_down.empty()) { - const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; - if (!spec_trace_down.empty()) + Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); + double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); + double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); + + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; + + if ((next_down_peak_mz <= right_bound) && + (next_down_peak_mz >= left_bound) && + !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) { - Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); - double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); - double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; - - if ((next_down_peak_mz <= right_bound) && - (next_down_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] - ) + + if (start_int < next_down_peak_int) { + outer_loop = true; + // std::cout << "Down\n"; + break; + } - Peak2D next_peak; - next_peak.setRT(spec_trace_down.getRT()); - next_peak.setMZ(next_down_peak_mz); - next_peak.setIntensity(next_down_peak_int); + Peak2D next_peak; + next_peak.setRT(spec_trace_down.getRT()); + next_peak.setMZ(next_down_peak_mz); + next_peak.setIntensity(next_down_peak_int); - current_trace.push_front(next_peak); - // FWHM average - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); + current_trace.push_front(next_peak); + // FWHM average + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); + } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) + { + // if (ftl_t > min_fwhm_scans) { - // if (ftl_t > min_fwhm_scans) - { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); - } + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } - - ++down_hitting_peak; - conseq_missed_peak_down = 0; - } - else - { - ++conseq_missed_peak_down; } + ++down_hitting_peak; + conseq_missed_peak_down = 0; } - --trace_down_idx; - ++down_scan_counter; - - // trace termination criterion: max allowed number of - // consecutive outliers reached OR cancel extension if - // sampling_rate falls below min_sample_rate_ - if (trace_termination_criterion_ == "outlier") + else { - if (conseq_missed_peak_down > max_consecutive_missing) - { - toggle_down = false; - } + ++conseq_missed_peak_down; } - else if (trace_termination_criterion_ == "sample_rate") + } + --trace_down_idx; + ++down_scan_counter; + + // trace termination criterion: max allowed number of + // consecutive outliers reached OR cancel extension if + // sampling_rate falls below min_sample_rate_ + if (trace_termination_criterion_ == "outlier") + { + if (conseq_missed_peak_down > max_consecutive_missing) { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / - (double)(down_scan_counter + up_scan_counter + 1); - if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - { - // std::cout << "stopping down..." << std::endl; - toggle_down = false; - } + toggle_down = false; } } - - // *********************************************************** // - // Step 2.2 MOVE UP in RT dim - // *********************************************************** // - if ((trace_up_idx < work_exp.size() - 1) && toggle_up) + else if (trace_termination_criterion_ == "sample_rate") { - const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; - if (!spec_trace_up.empty()) + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / + (double)(down_scan_counter + up_scan_counter + 1); + if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) { - Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); - double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); - double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); + // std::cout << "stopping down..." << std::endl; + toggle_down = false; + } + } + } - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + // *********************************************************** // + // Step 2.2 MOVE UP in RT dim + // *********************************************************** // + if ((trace_up_idx < work_exp.size() - 1) && toggle_up) + { + const MSSpectrum &spec_trace_up = work_exp[trace_up_idx + 1]; + if (!spec_trace_up.empty()) + { + Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); + double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); + double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - if ((next_up_peak_mz <= right_bound) && - (next_up_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) - { - Peak2D next_peak; - next_peak.setRT(spec_trace_up.getRT()); - next_peak.setMZ(next_up_peak_mz); - next_peak.setIntensity(next_up_peak_int); + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; - current_trace.push_back(next_peak); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); + if ((next_up_peak_mz <= right_bound) && + (next_up_peak_mz >= left_bound) && + !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + { - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) - { - // if (ftl_t > min_fwhm_scans) - { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); - } - } + if (start_int < next_up_peak_int) + { + outer_loop = true; + // std::cout << "Up\n"; + break; + } - ++up_hitting_peak; - conseq_missed_peak_up = 0; + Peak2D next_peak; + next_peak.setRT(spec_trace_up.getRT()); + next_peak.setMZ(next_up_peak_mz); + next_peak.setIntensity(next_up_peak_int); + current_trace.push_back(next_peak); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); } - else + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); + + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) { - ++conseq_missed_peak_up; + // if (ftl_t > min_fwhm_scans) + { + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + } } + ++up_hitting_peak; + conseq_missed_peak_up = 0; + } + else + { + ++conseq_missed_peak_up; } + } - ++trace_up_idx; - ++up_scan_counter; + ++trace_up_idx; + ++up_scan_counter; - if (trace_termination_criterion_ == "outlier") + if (trace_termination_criterion_ == "outlier") + { + if (conseq_missed_peak_up > max_consecutive_missing) { - if (conseq_missed_peak_up > max_consecutive_missing) - { - toggle_up = false; - } + toggle_up = false; } - else if (trace_termination_criterion_ == "sample_rate") - { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); + } + else if (trace_termination_criterion_ == "sample_rate") + { + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - { - // std::cout << "stopping up" << std::endl; - toggle_up = false; - } + if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + { + // std::cout << "stopping up" << std::endl; + toggle_up = false; } } } + } + + if (outer_loop) + { + // std::cout << "Continue\n"; + continue; + } - double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); + // std::cout << "After\n"; + // std::cout << "current sr: " << current_sample_rate << std::endl; + double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); - double mt_quality((double)current_trace.size() / (double)num_scans); - // std::cout << "mt quality: " << mt_quality << std::endl; - double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); + double mt_quality((double)current_trace.size() / (double)num_scans); + // std::cout << "mt quality: " << mt_quality << std::endl; + double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - // *********************************************************** // - // Step 2.3 check if minimum length and quality of mass trace criteria are met - // *********************************************************** // - bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_ ) + // *********************************************************** // + // Step 2.3 check if minimum length and quality of mass trace criteria are met + // *********************************************************** // + bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) + { + // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; + + // mark all peaks as visited + for (Size i = 0; i < gathered_idx.size(); ++i) { - for (Size i = 0; i < gathered_idx.size(); ++i) - { - peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; - } - // create new MassTrace object and store collected peaks from list current_trace - MassTrace new_trace(current_trace.begin(), current_trace.end()); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) - { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); - } - new_trace.setQuantMethod(quant_method_); - //new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); + apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } - #pragma omp critical (add_trace) - { - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); - } - // check if we already reached the (optional) maximum number of traces - // if (max_traces > 0 && found_masstraces.size() == max_traces) - // { - // break; - // } + // create new MassTrace object and store collected peaks from list current_tracekm + MassTrace new_trace(current_trace); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); } - #pragma omp critical (erase_lock) - { - auto it = std::find(mz_locked.begin(), mz_locked.end(), currentApex_mz); - if (it != mz_locked.end()) { - mz_locked.erase(it); - // std::cout << "erase\n"; - } - // std::cout << "index: " << index << '\n'; - } - } - } - this->endProgress(); - } + new_trace.setQuantMethod(quant_method_); + // new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); - void MassTraceDetection::updateMembers_() - { - mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); - noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); - chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); - quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); + #pragma omp critical(add_trace) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + ++current_trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } - trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); - trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); - min_sample_rate_ = (double)param_.getValue("min_sample_rate"); - min_trace_length_ = (double)param_.getValue("min_trace_length"); - max_trace_length_ = (double)param_.getValue("max_trace_length"); - reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); + // check if we already reached the (optional) maximum number of traces + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // break; + // } + } + } + return apex_visited; } } \ No newline at end of file From fbe1d4f8ebc5b0cd70e355513a0c30c98497dbda Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 18 May 2023 14:15:39 +0200 Subject: [PATCH 10/24] noch mit bug, fehler noch nicht gefunden --- .../DATAREDUCTION/MassTraceDetection.h | 2 +- .../DATAREDUCTION/MassTraceDetection.cpp | 30 ++++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 1eedcabd8e9..ab1c62d98ba 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -128,7 +128,7 @@ namespace OpenMS double findOffset_(double centroid_mz, double mass_error_ppm_); - boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<> allowed_peaks, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); + boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); // parameter stuff double mass_error_ppm_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 1f755327e37..ad93a4f8f07 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -274,8 +274,9 @@ namespace OpenMS std::vector& found_masstraces, const Size max_traces) { - boost::dynamic_bitset<> peak_visited(total_peak_count); - boost::dynamic_bitset<> apex_visited(total_peak_count); + // boost::dynamic_bitset<> peak_visited(total_peak_count); + // boost::dynamic_bitset<> apex_started(total_peak_count); + Size trace_number(1); // check presence of FWHM meta data @@ -308,10 +309,17 @@ namespace OpenMS bool trace_added = false; Size trace_count{}; boost::dynamic_bitset<> allowed_peaks{total_peak_count}; - while(true){ + boost::dynamic_bitset<> apex_started{total_peak_count}; + + while(true) + // for(Size i{}; i < 1; ++i) + { Size current_trace_number{}; - boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); + boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); + + apex_started |= new_found; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen if(trace_count == (trace_count + current_trace_number)) + // if(apex_started.all()) { // std::cout << "break while\n"; break; @@ -346,7 +354,7 @@ namespace OpenMS - boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<> allowed_peaks, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) + boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) { boost::dynamic_bitset<> apex_visited{total_peak_count}; @@ -358,6 +366,11 @@ namespace OpenMS Size apex_scan_idx(m_it->scan_idx); Size apex_peak_idx(m_it->peak_idx); + // if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) + // { + // continue; + // } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? + if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; @@ -432,7 +445,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) { - if (start_int < next_down_peak_int) + if (start_int < next_down_peak_int /* && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]*/) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus { outer_loop = true; // std::cout << "Down\n"; @@ -516,7 +529,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - if (start_int < next_up_peak_int) + if (start_int < next_up_peak_int /* && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx] */ ) { outer_loop = true; // std::cout << "Up\n"; @@ -578,6 +591,9 @@ namespace OpenMS } } + apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // kommt man hier ueberhaubt zwanghaft an wenn man mit einem Peak startet + // eigentlich schon es sei denn man überspringt durch allowed peaks, dafuer wird aber in zeile 320 das bitset verodert + if (outer_loop) { // std::cout << "Continue\n"; From e4fc8abc523c18f13be23b233193e30b526e4cb3 Mon Sep 17 00:00:00 2001 From: Tristan Date: Thu, 18 May 2023 17:33:09 +0200 Subject: [PATCH 11/24] Some changes --- .../DATAREDUCTION/MassTraceDetection.cpp | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index ad93a4f8f07..4687efcfaae 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -318,15 +318,15 @@ namespace OpenMS boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); apex_started |= new_found; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen - if(trace_count == (trace_count + current_trace_number)) - // if(apex_started.all()) + // if(trace_count == (trace_count + current_trace_number)) + if(apex_started.all()) { // std::cout << "break while\n"; break; } else { allowed_peaks |= new_found; - // std::cout << "trace count plus\n"; + std::cout << "trace count plus\n"; trace_count += current_trace_number; } } @@ -366,12 +366,12 @@ namespace OpenMS Size apex_scan_idx(m_it->scan_idx); Size apex_peak_idx(m_it->peak_idx); - // if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) - // { - // continue; - // } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? + if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + continue; + } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? - if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) + if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx] || apex_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; } @@ -445,7 +445,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) { - if (start_int < next_down_peak_int /* && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]*/) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus + if (start_int < next_down_peak_int && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus { outer_loop = true; // std::cout << "Down\n"; @@ -529,7 +529,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - if (start_int < next_up_peak_int /* && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx] */ ) + if (start_int < next_up_peak_int && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { outer_loop = true; // std::cout << "Up\n"; @@ -597,6 +597,7 @@ namespace OpenMS if (outer_loop) { // std::cout << "Continue\n"; + apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; continue; } From 2a9a523db4cb48968d175b6a238c25be4b0ad4fd Mon Sep 17 00:00:00 2001 From: Tristan Date: Fri, 19 May 2023 11:59:33 +0200 Subject: [PATCH 12/24] Searches in Funtions, for trying pragma omp tasks --- .../DATAREDUCTION/MassTraceDetection.h | 23 +- .../DATAREDUCTION/MassTraceDetection.cpp | 626 ++++++++++-------- 2 files changed, 369 insertions(+), 280 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index ab1c62d98ba..ee76bd2679e 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -116,7 +116,7 @@ namespace OpenMS }; /// The internal run method - void run_(const std::vector& chrom_apices, + void run_(std::vector& chrom_apices, const Size peak_count, const PeakMap & work_exp, const std::vector& spec_offsets, @@ -128,7 +128,26 @@ namespace OpenMS double findOffset_(double centroid_mz, double mass_error_ppm_); - boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); + // boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); + + void searchDownInRT(Size& trace_down_idx, bool& toggle_down, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, + boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, + std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& down_hitting_peak, + Size& down_scan_counter, Size& conseq_missed_peak_down, const Size max_consecutive_missing, double& current_sample_rate, + const Size min_scans_to_consider, double& intensity_so_far); + + void searchUpInRT(Size& trace_up_idx, bool& toggle_up, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, + boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, + std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& up_hitting_peak, + Size& up_scan_counter, Size& conseq_missed_peak_up, const Size max_consecutive_missing, double& current_sample_rate, + const Size min_scans_to_consider, double& intensity_so_far); + + + void checkAndAddTrace(const double & rt_range, const bool max_trace_criteria, const double & mt_quality, Size & trace_number, + boost::dynamic_bitset<>& peak_visited, const std::vector& spec_offsets, const std::vector>& gathered_idx, + const std::deque& current_trace, std::vector& found_masstraces, Size & peaks_detected, const Size max_traces, + const std::vector& fwhms_mz); + // parameter stuff double mass_error_ppm_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 4687efcfaae..b59d20b8d68 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -34,9 +34,11 @@ #include +#include #include #include +#include namespace OpenMS { @@ -193,6 +195,7 @@ namespace OpenMS void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { + // make sure the output vector is empty found_masstraces.clear(); @@ -251,11 +254,12 @@ namespace OpenMS // discard last spectrum's offset spec_offsets.pop_back(); + //hatten wir mal parallelisiert std::sort(chrom_apices.begin(), chrom_apices.end(), - [](const Apex & a, + [&work_exp](const Apex & a, const Apex & b) -> bool - { - return a.intensity < b.intensity; + { + return a.intensity > b.intensity; }); // ********************************************************************* @@ -267,16 +271,27 @@ namespace OpenMS return; } // end of MassTraceDetection::run - void MassTraceDetection::run_(const std::vector& chrom_apices, + double MassTraceDetection::find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) + { + double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); + double offset = 3 * Math::ppmToMass(mass_error_ppm_,centroid_mz); // full trace length standard deviation + return offset; + } + + double MassTraceDetection::findOffset_(double centroid_mz, double mass_error_ppm_) + { + double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); + double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation + return offset; + } + + void MassTraceDetection::run_(std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces) { - // boost::dynamic_bitset<> peak_visited(total_peak_count); - // boost::dynamic_bitset<> apex_started(total_peak_count); - Size trace_number(1); // check presence of FWHM meta data @@ -305,184 +320,272 @@ namespace OpenMS this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); + std::vector mz_locked; + + + // #pragma omp parallel for schedule(static) + // for (Size i = 0; i < chrom_apices.size(); ++i) + // { + // // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap + // std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity + // [](const Apex & a, const Apex & b) -> bool + // { + // return a.intensity > b.intensity; + // }); + boost::dynamic_bitset<> peak_visited(total_peak_count); + // // boost::dynamic_bitset<> peak_visited(gaps_filtered[i+1]-gaps_filtered[i]); + // for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity + // #pragma omp parallel + // for (Size i = 0; i < chrom_apices.size(); ++i) + Size index = 0; + #pragma omp parallel shared (index) + while(true) + { + if(index >= chrom_apices.size()) break; + // std::cout << "start\n"; + auto m_it = chrom_apices[index]; + // ++index; + // Size apex_scan_idx(m_it->scan_idx); + // Size apex_peak_idx(m_it->peak_idx); + Size apex_scan_idx(m_it.scan_idx); + Size apex_peak_idx(m_it.peak_idx); - bool trace_added = false; - Size trace_count{}; - boost::dynamic_bitset<> allowed_peaks{total_peak_count}; - boost::dynamic_bitset<> apex_started{total_peak_count}; + double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); - while(true) - // for(Size i{}; i < 1; ++i) - { - Size current_trace_number{}; - boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); - - apex_started |= new_found; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen - // if(trace_count == (trace_count + current_trace_number)) - if(apex_started.all()) - { - // std::cout << "break while\n"; - break; - } else - { - allowed_peaks |= new_found; - std::cout << "trace count plus\n"; - trace_count += current_trace_number; - } - } - // std::cout << "peaks detected: " << chrom_apices.size() - peaks_detected << '\n'; - this->endProgress(); + // std::cout << "index: " << index << '\n'; - } + if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + // std::cout << "Continue peak visited\n"; + #pragma omp atomic + ++index; + continue; + } - void MassTraceDetection::updateMembers_() - { - mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); - noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); - chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); - quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); + bool lock_found = false; + #pragma omp critical (check_lock) + { + // std::cout << "mz_locked Size: " << mz_locked.size() << '\n'; + for (const auto& lockedApex : mz_locked) + { + if ((currentApex_mz + findOffset_(currentApex_mz, mass_error_ppm_) > lockedApex) || (currentApex_mz - findOffset_(currentApex_mz, mass_error_ppm_) < lockedApex)) + { + lock_found = true; + } + } + if(!lock_found) + { + // std::cout << "pusg back and next\n"; + mz_locked.push_back(currentApex_mz); + ++index; + // if(index == chrom_apices.size()) break; + } + } - trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); - trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); - min_sample_rate_ = (double)param_.getValue("min_sample_rate"); - min_trace_length_ = (double)param_.getValue("min_trace_length"); - max_trace_length_ = (double)param_.getValue("max_trace_length"); - reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); - } + // std::cout << "Are we here?\n"; + if(lock_found) + { + // std::cout << "continue\n"; + continue; + } + Peak2D apex_peak; + apex_peak.setRT(work_exp[apex_scan_idx].getRT()); + apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); + apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); + Size trace_up_idx(apex_scan_idx); + Size trace_down_idx(apex_scan_idx); - boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) - { - boost::dynamic_bitset<> apex_visited{total_peak_count}; + std::deque current_trace; + current_trace.push_back(apex_peak); + std::vector fwhms_mz; // peak-FWHM meta values of collected peaks - #pragma omp parallel for - for (auto m_it = chrom_apices.crbegin(); m_it != chrom_apices.crend(); ++m_it) - { - bool outer_loop = false; + // Initialization for the iterative version of weighted m/z mean calculation + double centroid_mz(apex_peak.getMZ()); + double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); + double prev_denom(apex_peak.getIntensity()); - Size apex_scan_idx(m_it->scan_idx); - Size apex_peak_idx(m_it->peak_idx); + updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); - if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - continue; - } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? + std::vector > gathered_idx; + gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); + } - if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx] || apex_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - continue; - } + Size up_hitting_peak(0), down_hitting_peak(0); + Size up_scan_counter(0), down_scan_counter(0); - Peak2D apex_peak; - apex_peak.setRT(work_exp[apex_scan_idx].getRT()); - apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); - apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); + bool toggle_up = true, toggle_down = true; - double start_int = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); + Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); + Size max_consecutive_missing(trace_termination_outliers_); - Size trace_up_idx(apex_scan_idx); - Size trace_down_idx(apex_scan_idx); + double current_sample_rate(1.0); + // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); + Size min_scans_to_consider(5); - std::list current_trace; - current_trace.push_back(apex_peak); - std::vector fwhms_mz; // peak-FWHM meta values of collected peaks + // double outlier_ratio(0.3); - // Initialization for the iterative version of weighted m/z mean calculation - double centroid_mz(apex_peak.getMZ()); - double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); - double prev_denom(apex_peak.getIntensity()); + // double ftl_mean(centroid_mz); + // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double ftl_sd(Math::ppmToMass(mass_error_ppm_, centroid_mz)); + double intensity_so_far(apex_peak.getIntensity()); - updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + double start_int{}; - std::vector> gathered_idx; - gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); - } + while (((trace_down_idx > 0) && toggle_down) || + ((trace_up_idx < work_exp.size() - 1) && toggle_up) + ) + { + // *********************************************************** // + // Step 2.1 MOVE DOWN in RT dim + // *********************************************************** // - Size up_hitting_peak(0), down_hitting_peak(0); - Size up_scan_counter(0), down_scan_counter(0); + searchDownInRT(trace_down_idx, toggle_down, work_exp, start_int, spec_offsets, max_traces, peak_visited, current_trace, centroid_mz, ftl_sd, fwhm_meta_idx, gathered_idx, fwhms_mz, + prev_counter, prev_denom, down_hitting_peak, down_scan_counter, conseq_missed_peak_down, max_consecutive_missing, current_sample_rate, min_scans_to_consider, + intensity_so_far); - bool toggle_up = true, toggle_down = true; + // *********************************************************** // + // Step 2.2 MOVE UP in RT dim + // *********************************************************** // - Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); - Size max_consecutive_missing(trace_termination_outliers_); + searchUpInRT(trace_up_idx, toggle_up, work_exp, start_int, spec_offsets, max_traces, peak_visited, current_trace, centroid_mz, ftl_sd, fwhm_meta_idx, gathered_idx, fwhms_mz, + prev_counter, prev_denom, up_hitting_peak, up_scan_counter, conseq_missed_peak_up, max_consecutive_missing, current_sample_rate, min_scans_to_consider, + intensity_so_far); - double current_sample_rate(1.0); - // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); - Size min_scans_to_consider(5); + } - // double outlier_ratio(0.3); + double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); - // double ftl_mean(centroid_mz); - double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); - double intensity_so_far(apex_peak.getIntensity()); + double mt_quality((double)current_trace.size() / (double)num_scans); + // std::cout << "mt quality: " << mt_quality << std::endl; + double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - while (((trace_down_idx > 0) && toggle_down) || - ((trace_up_idx < work_exp.size() - 1) && toggle_up)) - { // *********************************************************** // - // Step 2.1 MOVE DOWN in RT dim + // Step 2.3 check if minimum length and quality of mass trace criteria are met // *********************************************************** // - if ((trace_down_idx > 0) && toggle_down) + bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_ ) { - const MSSpectrum &spec_trace_down = work_exp[trace_down_idx - 1]; - if (!spec_trace_down.empty()) + for (Size i = 0; i < gathered_idx.size(); ++i) + { + peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } + // create new MassTrace object and store collected peaks from list current_trace + MassTrace new_trace(current_trace.begin(), current_trace.end()); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + } + new_trace.setQuantMethod(quant_method_); + //new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); + + #pragma omp critical (add_trace) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } + // check if we already reached the (optional) maximum number of traces + if (max_traces > 0 && found_masstraces.size() == max_traces) { - Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); - double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); - double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); + break; + } + } - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + // checkAndAddTrace(rt_range, max_trace_criteria, mt_quality, trace_number, peak_visited, spec_offsets, gathered_idx, + // current_trace,found_masstraces, peaks_detected, max_traces, fwhms_mz); - if ((next_down_peak_mz <= right_bound) && - (next_down_peak_mz >= left_bound) && - !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) - { - if (start_int < next_down_peak_int && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus - { - outer_loop = true; - // std::cout << "Down\n"; - break; + #pragma omp critical + { + auto it = std::find(mz_locked.begin(), mz_locked.end(), currentApex_mz); + if (it != mz_locked.end()) { + mz_locked.erase(it); + // std::cout << "erase\n"; } + // std::cout << "index: " << index << '\n'; + } + } + this->endProgress(); + } - Peak2D next_peak; - next_peak.setRT(spec_trace_down.getRT()); - next_peak.setMZ(next_down_peak_mz); - next_peak.setIntensity(next_down_peak_int); + void MassTraceDetection::updateMembers_() + { + mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); + noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); + chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); + quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); - current_trace.push_front(next_peak); - // FWHM average - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); + trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); + trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); + min_sample_rate_ = (double)param_.getValue("min_sample_rate"); + min_trace_length_ = (double)param_.getValue("min_trace_length"); + max_trace_length_ = (double)param_.getValue("max_trace_length"); + reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); + } + + + void MassTraceDetection::searchDownInRT(Size& trace_down_idx, bool& toggle_down, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, + boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, + std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& down_hitting_peak, + Size& down_scan_counter, Size& conseq_missed_peak_down, const Size max_consecutive_missing, double& current_sample_rate, + const Size min_scans_to_consider, double& intensity_so_far) + { + if ((trace_down_idx > 0) && toggle_down) + { + const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; + if (!spec_trace_down.empty()) + { + Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); + double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); + double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) + if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) { - // if (ftl_t > min_fwhm_scans) + Peak2D next_peak; + next_peak.setRT(spec_trace_down.getRT()); + next_peak.setMZ(next_down_peak_mz); + next_peak.setIntensity(next_down_peak_int); + + current_trace.push_front(next_peak); + // FWHM average + if (fwhm_meta_idx != -1) { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); + + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) + { + // if (ftl_t > min_fwhm_scans) + { + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } + } - ++down_hitting_peak; - conseq_missed_peak_down = 0; - } - else - { - ++conseq_missed_peak_down; - } + ++down_hitting_peak; + conseq_missed_peak_down = 0; + } + else + { + ++conseq_missed_peak_down; + } } --trace_down_idx; ++down_scan_counter; @@ -492,80 +595,73 @@ namespace OpenMS // sampling_rate falls below min_sample_rate_ if (trace_termination_criterion_ == "outlier") { - if (conseq_missed_peak_down > max_consecutive_missing) - { - toggle_down = false; - } - } - else if (trace_termination_criterion_ == "sample_rate") - { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / - (double)(down_scan_counter + up_scan_counter + 1); - if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - { - // std::cout << "stopping down..." << std::endl; - toggle_down = false; - } + if (conseq_missed_peak_down > max_consecutive_missing) + { + toggle_down = false; + } } - } + // else if (trace_termination_criterion_ == "sample_rate") + // { + // current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); + // if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + // { + // // std::cout << "stopping down..." << std::endl; + // toggle_down = false; + // } + // } + } + } - // *********************************************************** // - // Step 2.2 MOVE UP in RT dim - // *********************************************************** // - if ((trace_up_idx < work_exp.size() - 1) && toggle_up) - { - const MSSpectrum &spec_trace_up = work_exp[trace_up_idx + 1]; + void MassTraceDetection::searchUpInRT(Size& trace_up_idx, bool& toggle_up, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, + boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, + std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& up_hitting_peak, + Size& up_scan_counter, Size& conseq_missed_peak_up, const Size max_consecutive_missing, double& current_sample_rate, const Size min_scans_to_consider, + double& intensity_so_far) + { + if ((trace_up_idx < work_exp.size() - 1) && toggle_up) + { + const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; if (!spec_trace_up.empty()) { - Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); - double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); - double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); + double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); + double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - if ((next_up_peak_mz <= right_bound) && - (next_up_peak_mz >= left_bound) && - !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) - { + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; - if (start_int < next_up_peak_int && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - outer_loop = true; - // std::cout << "Up\n"; - break; - } + Peak2D next_peak; + next_peak.setRT(spec_trace_up.getRT()); + next_peak.setMZ(next_up_peak_mz); + next_peak.setIntensity(next_up_peak_int); - Peak2D next_peak; - next_peak.setRT(spec_trace_up.getRT()); - next_peak.setMZ(next_up_peak_mz); - next_peak.setIntensity(next_up_peak_int); + current_trace.push_back(next_peak); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); + } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); - current_trace.push_back(next_peak); - if (fwhm_meta_idx != -1) + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) + { + // if (ftl_t > min_fwhm_scans) { - fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); + } - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) + ++up_hitting_peak; + conseq_missed_peak_up = 0; + } + else { - // if (ftl_t > min_fwhm_scans) - { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); - } + ++conseq_missed_peak_up; } - - ++up_hitting_peak; - conseq_missed_peak_up = 0; - } - else - { - ++conseq_missed_peak_up; - } } ++trace_up_idx; @@ -573,86 +669,60 @@ namespace OpenMS if (trace_termination_criterion_ == "outlier") { - if (conseq_missed_peak_up > max_consecutive_missing) - { - toggle_up = false; - } - } - else if (trace_termination_criterion_ == "sample_rate") - { - current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - - if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - { - // std::cout << "stopping up" << std::endl; - toggle_up = false; - } + if (conseq_missed_peak_up > max_consecutive_missing) + { + toggle_up = false; + } } - } - } - - apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // kommt man hier ueberhaubt zwanghaft an wenn man mit einem Peak startet - // eigentlich schon es sei denn man überspringt durch allowed peaks, dafuer wird aber in zeile 320 das bitset verodert - - if (outer_loop) - { - // std::cout << "Continue\n"; - apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; - continue; - } - - // std::cout << "After\n"; - // std::cout << "current sr: " << current_sample_rate << std::endl; - double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); - - double mt_quality((double)current_trace.size() / (double)num_scans); - // std::cout << "mt quality: " << mt_quality << std::endl; - double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - - // *********************************************************** // - // Step 2.3 check if minimum length and quality of mass trace criteria are met - // *********************************************************** // - bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) - { - // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; - - // mark all peaks as visited - for (Size i = 0; i < gathered_idx.size(); ++i) - { - apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; - } + // else if (trace_termination_criterion_ == "sample_rate") + // { + // current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); + + // if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + // { + // // std::cout << "stopping up" << std::endl; + // toggle_up = false; + // } + // } + } + } - // create new MassTrace object and store collected peaks from list current_tracekm - MassTrace new_trace(current_trace); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) - { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); - } - new_trace.setQuantMethod(quant_method_); - // new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); - #pragma omp critical(add_trace) - { - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - ++current_trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); - } + void MassTraceDetection::checkAndAddTrace(const double & rt_range, const bool max_trace_criteria, const double & mt_quality, Size & trace_number, + boost::dynamic_bitset<>& peak_visited, const std::vector& spec_offsets, const std::vector>& gathered_idx, + const std::deque& current_trace, std::vector& found_masstraces, Size & peaks_detected, const Size max_traces, const std::vector& fwhms_mz) + { + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) + { + for (Size i = 0; i < gathered_idx.size(); ++i) + { + peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } + // create new MassTrace object and store collected peaks from list current_trace + MassTrace new_trace(current_trace.begin(), current_trace.end()); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + } + new_trace.setQuantMethod(quant_method_); + // new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); - // check if we already reached the (optional) maximum number of traces - // if (max_traces > 0 && found_masstraces.size() == max_traces) - // { - // break; - // } - } + #pragma omp critical(add_trace) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } + // check if we already reached the (optional) maximum number of traces + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // break; + // } } - return apex_visited; } - -} \ No newline at end of file +} // namespace OpenMS \ No newline at end of file From b5711d4428a8d40da1a2d5503d7d57fa23716f55 Mon Sep 17 00:00:00 2001 From: Tristan Date: Fri, 19 May 2023 21:45:07 +0200 Subject: [PATCH 13/24] Intensity Version --- .../DATAREDUCTION/MassTraceDetection.h | 8 +- .../DATAREDUCTION/MassTraceDetection.cpp | 633 ++++++++---------- 2 files changed, 290 insertions(+), 351 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index ee76bd2679e..0db2b6ac8c4 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -116,7 +116,7 @@ namespace OpenMS }; /// The internal run method - void run_(std::vector& chrom_apices, + void run_(const std::vector& chrom_apices, const Size peak_count, const PeakMap & work_exp, const std::vector& spec_offsets, @@ -129,7 +129,6 @@ namespace OpenMS // boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); - void searchDownInRT(Size& trace_down_idx, bool& toggle_down, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& down_hitting_peak, @@ -147,7 +146,10 @@ namespace OpenMS boost::dynamic_bitset<>& peak_visited, const std::vector& spec_offsets, const std::vector>& gathered_idx, const std::deque& current_trace, std::vector& found_masstraces, Size & peaks_detected, const Size max_traces, const std::vector& fwhms_mz); - + + boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, + std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, + boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, Size& current_trace_number, int fwhm_meta_idx); // parameter stuff double mass_error_ppm_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index b59d20b8d68..4bcc0b35f16 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -34,11 +34,9 @@ #include -#include #include #include -#include namespace OpenMS { @@ -195,7 +193,6 @@ namespace OpenMS void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { - // make sure the output vector is empty found_masstraces.clear(); @@ -254,12 +251,11 @@ namespace OpenMS // discard last spectrum's offset spec_offsets.pop_back(); - //hatten wir mal parallelisiert std::sort(chrom_apices.begin(), chrom_apices.end(), - [&work_exp](const Apex & a, + [](const Apex & a, const Apex & b) -> bool - { - return a.intensity > b.intensity; + { + return a.intensity < b.intensity; }); // ********************************************************************* @@ -271,27 +267,16 @@ namespace OpenMS return; } // end of MassTraceDetection::run - double MassTraceDetection::find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec) - { - double centroid_mz = input_exp[apices_vec[peak_index_in_apices_vec].scan_idx][apices_vec[peak_index_in_apices_vec].peak_idx].getMZ(); - double offset = 3 * Math::ppmToMass(mass_error_ppm_,centroid_mz); // full trace length standard deviation - return offset; - } - - double MassTraceDetection::findOffset_(double centroid_mz, double mass_error_ppm_) - { - double ftl_sd = Math::ppmToMass(mass_error_ppm_,centroid_mz); - double offset = 3 * ftl_sd; // ftl_sd == full trace length standard deviation - return offset; - } - - void MassTraceDetection::run_(std::vector& chrom_apices, + void MassTraceDetection::run_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces) { + // boost::dynamic_bitset<> peak_visited(total_peak_count); + // boost::dynamic_bitset<> apex_started(total_peak_count); + Size trace_number(1); // check presence of FWHM meta data @@ -320,272 +305,185 @@ namespace OpenMS this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); - std::vector mz_locked; - - - // #pragma omp parallel for schedule(static) - // for (Size i = 0; i < chrom_apices.size(); ++i) - // { - // // std::vector chrom_apices_2 = {chrom_apices.cbegin() + gaps_filtered[i], chrom_apices.cbegin() + gaps_filtered[i+1]}; // copies because of bin overlap - // std::sort(chrom_apices.begin() + gaps_filtered[i], chrom_apices.begin() + gaps_filtered[i+1], // sort by intensity - // [](const Apex & a, const Apex & b) -> bool - // { - // return a.intensity > b.intensity; - // }); - boost::dynamic_bitset<> peak_visited(total_peak_count); - // // boost::dynamic_bitset<> peak_visited(gaps_filtered[i+1]-gaps_filtered[i]); - // for (auto m_it = chrom_apices.cbegin() + gaps_filtered[i]; m_it != chrom_apices.cbegin() + gaps_filtered[i+1]; ++m_it) // iterate reverse from high intensity to low intensity - // #pragma omp parallel - // for (Size i = 0; i < chrom_apices.size(); ++i) - Size index = 0; - #pragma omp parallel shared (index) - while(true) - { - if(index >= chrom_apices.size()) break; - // std::cout << "start\n"; - auto m_it = chrom_apices[index]; - // ++index; - // Size apex_scan_idx(m_it->scan_idx); - // Size apex_peak_idx(m_it->peak_idx); - Size apex_scan_idx(m_it.scan_idx); - Size apex_peak_idx(m_it.peak_idx); - double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); + bool trace_added = false; + Size trace_count{}; + boost::dynamic_bitset<> allowed_peaks{total_peak_count}; + boost::dynamic_bitset<> apex_started{total_peak_count}; - // std::cout << "index: " << index << '\n'; + while(true) + // for(Size i{}; i < 1; ++i) + { + Size current_trace_number{}; + boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); + + // apex_started; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen + if(trace_count == (trace_count + current_trace_number)) + // if((apex_started | allowed_peaks).all()) + { + // std::cout << "break while\n"; + break; + } else + { + allowed_peaks |= new_found; + std::cout << "trace count plus\n"; + trace_count += current_trace_number; + + } + } + // std::cout << "peaks detected: " << chrom_apices.size() - peaks_detected << '\n'; + this->endProgress(); - if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - // std::cout << "Continue peak visited\n"; - #pragma omp atomic - ++index; - continue; - } + } - bool lock_found = false; - #pragma omp critical (check_lock) - { - // std::cout << "mz_locked Size: " << mz_locked.size() << '\n'; - for (const auto& lockedApex : mz_locked) - { - if ((currentApex_mz + findOffset_(currentApex_mz, mass_error_ppm_) > lockedApex) || (currentApex_mz - findOffset_(currentApex_mz, mass_error_ppm_) < lockedApex)) - { - lock_found = true; - } - } - if(!lock_found) - { - // std::cout << "pusg back and next\n"; - mz_locked.push_back(currentApex_mz); - ++index; - // if(index == chrom_apices.size()) break; - } - } + void MassTraceDetection::updateMembers_() + { + mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); + noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); + chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); + quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); - // std::cout << "Are we here?\n"; + trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); + trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); + min_sample_rate_ = (double)param_.getValue("min_sample_rate"); + min_trace_length_ = (double)param_.getValue("min_trace_length"); + max_trace_length_ = (double)param_.getValue("max_trace_length"); + reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); + } - if(lock_found) - { - // std::cout << "continue\n"; - continue; - } - Peak2D apex_peak; - apex_peak.setRT(work_exp[apex_scan_idx].getRT()); - apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); - apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); - Size trace_up_idx(apex_scan_idx); - Size trace_down_idx(apex_scan_idx); - std::deque current_trace; - current_trace.push_back(apex_peak); - std::vector fwhms_mz; // peak-FWHM meta values of collected peaks + boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) + { + boost::dynamic_bitset<> apex_visited{total_peak_count}; - // Initialization for the iterative version of weighted m/z mean calculation - double centroid_mz(apex_peak.getMZ()); - double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); - double prev_denom(apex_peak.getIntensity()); + #pragma omp parallel for + for (auto m_it = chrom_apices.crbegin(); m_it != chrom_apices.crend(); ++m_it) + { + bool outer_loop = false; - updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); + Size apex_scan_idx(m_it->scan_idx); + Size apex_peak_idx(m_it->peak_idx); - std::vector > gathered_idx; - gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); - } + if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + continue; + } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? - Size up_hitting_peak(0), down_hitting_peak(0); - Size up_scan_counter(0), down_scan_counter(0); + if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx] || apex_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + { + continue; + } - bool toggle_up = true, toggle_down = true; + Peak2D apex_peak; + apex_peak.setRT(work_exp[apex_scan_idx].getRT()); + apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); + apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); - Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); - Size max_consecutive_missing(trace_termination_outliers_); + double start_int = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); - double current_sample_rate(1.0); - // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); - Size min_scans_to_consider(5); + Size trace_up_idx(apex_scan_idx); + Size trace_down_idx(apex_scan_idx); - // double outlier_ratio(0.3); + std::list current_trace; + current_trace.push_back(apex_peak); + std::vector fwhms_mz; // peak-FWHM meta values of collected peaks - // double ftl_mean(centroid_mz); - // double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); - double ftl_sd(Math::ppmToMass(mass_error_ppm_, centroid_mz)); - double intensity_so_far(apex_peak.getIntensity()); + // Initialization for the iterative version of weighted m/z mean calculation + double centroid_mz(apex_peak.getMZ()); + double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); + double prev_denom(apex_peak.getIntensity()); - double start_int{}; + updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); - while (((trace_down_idx > 0) && toggle_down) || - ((trace_up_idx < work_exp.size() - 1) && toggle_up) - ) - { - // *********************************************************** // - // Step 2.1 MOVE DOWN in RT dim - // *********************************************************** // + std::vector> gathered_idx; + gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); + } - searchDownInRT(trace_down_idx, toggle_down, work_exp, start_int, spec_offsets, max_traces, peak_visited, current_trace, centroid_mz, ftl_sd, fwhm_meta_idx, gathered_idx, fwhms_mz, - prev_counter, prev_denom, down_hitting_peak, down_scan_counter, conseq_missed_peak_down, max_consecutive_missing, current_sample_rate, min_scans_to_consider, - intensity_so_far); + Size up_hitting_peak(0), down_hitting_peak(0); + Size up_scan_counter(0), down_scan_counter(0); - // *********************************************************** // - // Step 2.2 MOVE UP in RT dim - // *********************************************************** // + bool toggle_up = true, toggle_down = true; - searchUpInRT(trace_up_idx, toggle_up, work_exp, start_int, spec_offsets, max_traces, peak_visited, current_trace, centroid_mz, ftl_sd, fwhm_meta_idx, gathered_idx, fwhms_mz, - prev_counter, prev_denom, up_hitting_peak, up_scan_counter, conseq_missed_peak_up, max_consecutive_missing, current_sample_rate, min_scans_to_consider, - intensity_so_far); + Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); + Size max_consecutive_missing(trace_termination_outliers_); - } + double current_sample_rate(1.0); + // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); + Size min_scans_to_consider(5); - double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); + // double outlier_ratio(0.3); - double mt_quality((double)current_trace.size() / (double)num_scans); - // std::cout << "mt quality: " << mt_quality << std::endl; - double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); + // double ftl_mean(centroid_mz); + double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double intensity_so_far(apex_peak.getIntensity()); + while (((trace_down_idx > 0) && toggle_down) || + ((trace_up_idx < work_exp.size() - 1) && toggle_up)) + { // *********************************************************** // - // Step 2.3 check if minimum length and quality of mass trace criteria are met + // Step 2.1 MOVE DOWN in RT dim // *********************************************************** // - bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_ ) + if ((trace_down_idx > 0) && toggle_down) { - for (Size i = 0; i < gathered_idx.size(); ++i) - { - peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; - } - // create new MassTrace object and store collected peaks from list current_trace - MassTrace new_trace(current_trace.begin(), current_trace.end()); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) - { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); - } - new_trace.setQuantMethod(quant_method_); - //new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); - - #pragma omp critical (add_trace) - { - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); - } - // check if we already reached the (optional) maximum number of traces - if (max_traces > 0 && found_masstraces.size() == max_traces) + const MSSpectrum &spec_trace_down = work_exp[trace_down_idx - 1]; + if (!spec_trace_down.empty()) { - break; - } - } + Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); + double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); + double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); - // checkAndAddTrace(rt_range, max_trace_criteria, mt_quality, trace_number, peak_visited, spec_offsets, gathered_idx, - // current_trace,found_masstraces, peaks_detected, max_traces, fwhms_mz); + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; + if ((next_down_peak_mz <= right_bound) && + (next_down_peak_mz >= left_bound) && + !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) + { - #pragma omp critical - { - auto it = std::find(mz_locked.begin(), mz_locked.end(), currentApex_mz); - if (it != mz_locked.end()) { - mz_locked.erase(it); - // std::cout << "erase\n"; + if (start_int < next_down_peak_int && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus + { + outer_loop = true; + // std::cout << "Down\n"; + break; } - // std::cout << "index: " << index << '\n'; - } - } - this->endProgress(); - } - - void MassTraceDetection::updateMembers_() - { - mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); - noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); - chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); - quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); - - trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); - trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); - min_sample_rate_ = (double)param_.getValue("min_sample_rate"); - min_trace_length_ = (double)param_.getValue("min_trace_length"); - max_trace_length_ = (double)param_.getValue("max_trace_length"); - reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); - } + Peak2D next_peak; + next_peak.setRT(spec_trace_down.getRT()); + next_peak.setMZ(next_down_peak_mz); + next_peak.setIntensity(next_down_peak_int); - void MassTraceDetection::searchDownInRT(Size& trace_down_idx, bool& toggle_down, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, - boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, - std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& down_hitting_peak, - Size& down_scan_counter, Size& conseq_missed_peak_down, const Size max_consecutive_missing, double& current_sample_rate, - const Size min_scans_to_consider, double& intensity_so_far) - { - if ((trace_down_idx > 0) && toggle_down) - { - const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; - if (!spec_trace_down.empty()) - { - Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); - double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); - double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + current_trace.push_front(next_peak); + // FWHM average + if (fwhm_meta_idx != -1) + { + fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); + } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); - if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) { - Peak2D next_peak; - next_peak.setRT(spec_trace_down.getRT()); - next_peak.setMZ(next_down_peak_mz); - next_peak.setIntensity(next_down_peak_int); - - current_trace.push_front(next_peak); - // FWHM average - if (fwhm_meta_idx != -1) + // if (ftl_t > min_fwhm_scans) { - fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); - - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) - { - // if (ftl_t > min_fwhm_scans) - { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } - } - ++down_hitting_peak; - conseq_missed_peak_down = 0; - } - else - { - ++conseq_missed_peak_down; - } + ++down_hitting_peak; + conseq_missed_peak_down = 0; + } + else + { + ++conseq_missed_peak_down; + } } --trace_down_idx; ++down_scan_counter; @@ -595,73 +493,80 @@ namespace OpenMS // sampling_rate falls below min_sample_rate_ if (trace_termination_criterion_ == "outlier") { - if (conseq_missed_peak_down > max_consecutive_missing) - { - toggle_down = false; - } + if (conseq_missed_peak_down > max_consecutive_missing) + { + toggle_down = false; + } } - // else if (trace_termination_criterion_ == "sample_rate") - // { - // current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - // if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - // { - // // std::cout << "stopping down..." << std::endl; - // toggle_down = false; - // } - // } - } - } + else if (trace_termination_criterion_ == "sample_rate") + { + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / + (double)(down_scan_counter + up_scan_counter + 1); + if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + { + // std::cout << "stopping down..." << std::endl; + toggle_down = false; + } + } + } - void MassTraceDetection::searchUpInRT(Size& trace_up_idx, bool& toggle_up, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, - boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, - std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& up_hitting_peak, - Size& up_scan_counter, Size& conseq_missed_peak_up, const Size max_consecutive_missing, double& current_sample_rate, const Size min_scans_to_consider, - double& intensity_so_far) - { - if ((trace_up_idx < work_exp.size() - 1) && toggle_up) - { - const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; + // *********************************************************** // + // Step 2.2 MOVE UP in RT dim + // *********************************************************** // + if ((trace_up_idx < work_exp.size() - 1) && toggle_up) + { + const MSSpectrum &spec_trace_up = work_exp[trace_up_idx + 1]; if (!spec_trace_up.empty()) { - Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); - double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); - double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); + Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); + double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); + double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); - double right_bound = centroid_mz + 3 * ftl_sd; - double left_bound = centroid_mz - 3 * ftl_sd; + double right_bound = centroid_mz + 3 * ftl_sd; + double left_bound = centroid_mz - 3 * ftl_sd; - if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + if ((next_up_peak_mz <= right_bound) && + (next_up_peak_mz >= left_bound) && + !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + { + + if (start_int < next_up_peak_int && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - Peak2D next_peak; - next_peak.setRT(spec_trace_up.getRT()); - next_peak.setMZ(next_up_peak_mz); - next_peak.setIntensity(next_up_peak_int); + outer_loop = true; + // std::cout << "Up\n"; + break; + } - current_trace.push_back(next_peak); - if (fwhm_meta_idx != -1) - { - fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); - } - // Update the m/z mean of the current trace as we added a new peak - updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); - gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); + Peak2D next_peak; + next_peak.setRT(spec_trace_up.getRT()); + next_peak.setMZ(next_up_peak_mz); + next_peak.setIntensity(next_up_peak_int); - // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) - { - // if (ftl_t > min_fwhm_scans) + current_trace.push_back(next_peak); + if (fwhm_meta_idx != -1) { - updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); } - } + // Update the m/z mean of the current trace as we added a new peak + updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); + gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); - ++up_hitting_peak; - conseq_missed_peak_up = 0; - } - else + // Update the m/z variance dynamically + if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) { - ++conseq_missed_peak_up; + // if (ftl_t > min_fwhm_scans) + { + updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); + } } + + ++up_hitting_peak; + conseq_missed_peak_up = 0; + } + else + { + ++conseq_missed_peak_up; + } } ++trace_up_idx; @@ -669,60 +574,92 @@ namespace OpenMS if (trace_termination_criterion_ == "outlier") { - if (conseq_missed_peak_up > max_consecutive_missing) - { - toggle_up = false; - } - } - // else if (trace_termination_criterion_ == "sample_rate") - // { - // current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - - // if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) - // { - // // std::cout << "stopping up" << std::endl; - // toggle_up = false; - // } - // } - } - } - - - void MassTraceDetection::checkAndAddTrace(const double & rt_range, const bool max_trace_criteria, const double & mt_quality, Size & trace_number, - boost::dynamic_bitset<>& peak_visited, const std::vector& spec_offsets, const std::vector>& gathered_idx, - const std::deque& current_trace, std::vector& found_masstraces, Size & peaks_detected, const Size max_traces, const std::vector& fwhms_mz) - { - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) - { - for (Size i = 0; i < gathered_idx.size(); ++i) - { - peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + if (conseq_missed_peak_up > max_consecutive_missing) + { + toggle_up = false; + } } - // create new MassTrace object and store collected peaks from list current_trace - MassTrace new_trace(current_trace.begin(), current_trace.end()); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) + else if (trace_termination_criterion_ == "sample_rate") { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); - } - new_trace.setQuantMethod(quant_method_); - // new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); + current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); - #pragma omp critical(add_trace) - { - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); + if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) + { + // std::cout << "stopping up" << std::endl; + toggle_up = false; + } } - // check if we already reached the (optional) maximum number of traces - // if (max_traces > 0 && found_masstraces.size() == max_traces) - // { - // break; - // } + } + } + + // apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // kommt man hier ueberhaubt zwanghaft an wenn man mit einem Peak startet + // eigentlich schon es sei denn man überspringt durch allowed peaks, dafuer wird aber in zeile 320 das bitset verodert + + if (outer_loop) + { + // std::cout << "Continue\n"; + // apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; + continue; + } + + // std::cout << "After\n"; + // std::cout << "current sr: " << current_sample_rate << std::endl; + double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); + + double mt_quality((double)current_trace.size() / (double)num_scans); + // std::cout << "mt quality: " << mt_quality << std::endl; + double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); + + apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; + + // *********************************************************** // + // Step 2.3 check if minimum length and quality of mass trace criteria are met + // *********************************************************** // + bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) + { + // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; + + // mark all peaks as visited + for (Size i = 0; i < gathered_idx.size(); ++i) + { + allowed_peaks[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } + + // create new MassTrace object and store collected peaks from list current_tracekm + MassTrace new_trace(current_trace); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + } + new_trace.setQuantMethod(quant_method_); + // new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); + + #pragma omp critical(add_trace) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + ++current_trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } + + // check if we already reached the (optional) maximum number of traces + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // break; + // } + } } + return apex_visited; } -} // namespace OpenMS \ No newline at end of file +} + + + + + From dc9847455df35367f477b5e93cffe34530cc2bf7 Mon Sep 17 00:00:00 2001 From: Tristan Date: Sat, 20 May 2023 10:10:36 +0200 Subject: [PATCH 14/24] Same result with different threads --- .../DATAREDUCTION/MassTraceDetection.h | 5 +++-- .../DATAREDUCTION/MassTraceDetection.cpp | 18 +++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 0db2b6ac8c4..6ec4d42246b 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -148,8 +148,9 @@ namespace OpenMS const std::vector& fwhms_mz); boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, - std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, - boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, Size& current_trace_number, int fwhm_meta_idx); + std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, + const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, + Size& current_trace_number, int fwhm_meta_idx); // parameter stuff double mass_error_ppm_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 4bcc0b35f16..9383f3b505a 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -306,7 +306,7 @@ namespace OpenMS Size peaks_detected(0); - bool trace_added = false; + // bool trace_added = false; Size trace_count{}; boost::dynamic_bitset<> allowed_peaks{total_peak_count}; boost::dynamic_bitset<> apex_started{total_peak_count}; @@ -315,7 +315,7 @@ namespace OpenMS // for(Size i{}; i < 1; ++i) { Size current_trace_number{}; - boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); + boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); // apex_started; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen if(trace_count == (trace_count + current_trace_number)) @@ -355,7 +355,7 @@ namespace OpenMS - boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) + boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) { boost::dynamic_bitset<> apex_visited{total_peak_count}; @@ -367,12 +367,12 @@ namespace OpenMS Size apex_scan_idx(m_it->scan_idx); Size apex_peak_idx(m_it->peak_idx); - if (apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx]) + if (apex_started_given[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? - if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx] || apex_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) + if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; } @@ -446,7 +446,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) { - if (start_int < next_down_peak_int && !apex_started[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus + if (start_int < next_down_peak_int && !apex_started_given[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus { outer_loop = true; // std::cout << "Down\n"; @@ -530,7 +530,7 @@ namespace OpenMS !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - if (start_int < next_up_peak_int && !apex_started[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + if (start_int < next_up_peak_int && !apex_started_given[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { outer_loop = true; // std::cout << "Up\n"; @@ -610,7 +610,7 @@ namespace OpenMS // std::cout << "mt quality: " << mt_quality << std::endl; double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; + apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // maybe critical // *********************************************************** // // Step 2.3 check if minimum length and quality of mass trace criteria are met @@ -623,7 +623,7 @@ namespace OpenMS // mark all peaks as visited for (Size i = 0; i < gathered_idx.size(); ++i) { - allowed_peaks[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; } // create new MassTrace object and store collected peaks from list current_tracekm From 0ad144dd583d80c48c7b8993762545a19e160fb2 Mon Sep 17 00:00:00 2001 From: Tristan Date: Sat, 20 May 2023 10:47:44 +0200 Subject: [PATCH 15/24] Other while loop break --- .../source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 9383f3b505a..e944c1cc3df 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -318,15 +318,15 @@ namespace OpenMS boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); // apex_started; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen - if(trace_count == (trace_count + current_trace_number)) - // if((apex_started | allowed_peaks).all()) + // if(trace_count == (trace_count + current_trace_number)) + if((apex_started | allowed_peaks).all()) { // std::cout << "break while\n"; break; } else { allowed_peaks |= new_found; - std::cout << "trace count plus\n"; + // std::cout << "trace count plus\n"; trace_count += current_trace_number; } From f9b4c367e92dec581dff8b5d92ae3d5e8d998a42 Mon Sep 17 00:00:00 2001 From: Tristan Date: Sun, 21 May 2023 19:58:02 +0200 Subject: [PATCH 16/24] Fix small bug, added (add_to_peaks) critical --- .../DATAREDUCTION/MassTraceDetection.cpp | 48 ++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index e944c1cc3df..f62cb34f687 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -310,7 +310,8 @@ namespace OpenMS Size trace_count{}; boost::dynamic_bitset<> allowed_peaks{total_peak_count}; boost::dynamic_bitset<> apex_started{total_peak_count}; - + // Size round{0}; + // Size roundbefore{}; while(true) // for(Size i{}; i < 1; ++i) { @@ -328,8 +329,14 @@ namespace OpenMS allowed_peaks |= new_found; // std::cout << "trace count plus\n"; trace_count += current_trace_number; + // Size falseBitsCount = (allowed_peaks).count(); + // std::cout << "Number of false bits: " << falseBitsCount << std::endl; } + // std::cout << round << "\t" << found_masstraces.size() - roundbefore << '\n'; + // ++round; + // roundbefore = found_masstraces.size(); + // std::cout << allowed_peaks << '\n'; } // std::cout << "peaks detected: " << chrom_apices.size() - peaks_detected << '\n'; this->endProgress(); @@ -352,11 +359,11 @@ namespace OpenMS } - - - - boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx) - { + boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, + std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, + const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, + Size& current_trace_number, int fwhm_meta_idx) + { boost::dynamic_bitset<> apex_visited{total_peak_count}; #pragma omp parallel for @@ -369,11 +376,13 @@ namespace OpenMS if (apex_started_given[spec_offsets[apex_scan_idx] + apex_peak_idx]) { + // std::cout << "apex_started_given\n"; continue; } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) { + // std::cout << "allowed_peaks\n"; continue; } @@ -621,11 +630,13 @@ namespace OpenMS // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; // mark all peaks as visited - for (Size i = 0; i < gathered_idx.size(); ++i) + #pragma omp critical (add_to_peaks_visited) { - apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + for (Size i = 0; i < gathered_idx.size(); ++i) + { + apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + } } - // create new MassTrace object and store collected peaks from list current_tracekm MassTrace new_trace(current_trace); new_trace.updateWeightedMeanRT(); @@ -638,7 +649,12 @@ namespace OpenMS // new_trace.setCentroidSD(ftl_sd); new_trace.updateWeightedMZsd(); - #pragma omp critical(add_trace) + // #pragma omp critical (cout) + // { + // std::cout << apex_scan_idx << "\t" << apex_peak_idx << '\t' << new_trace.getCentroidMZ() << '\t' << new_trace.getCentroidRT() << '\n'; + // } + + #pragma omp critical (add_trace) { new_trace.setLabel("T" + String(trace_number)); ++trace_number; @@ -646,13 +662,13 @@ namespace OpenMS found_masstraces.push_back(new_trace); peaks_detected += new_trace.getSize(); this->setProgress(peaks_detected); - } - // check if we already reached the (optional) maximum number of traces - // if (max_traces > 0 && found_masstraces.size() == max_traces) - // { - // break; - // } + // check if we already reached the (optional) maximum number of traces + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // apex_visited.set(); + // } + } } } return apex_visited; From 17d5809d34df0f85bdefaf73c74ad75e148208aa Mon Sep 17 00:00:00 2001 From: Tristan Date: Tue, 23 May 2023 08:41:57 +0200 Subject: [PATCH 17/24] Deadlock ? --- .../DATAREDUCTION/MassTraceDetection.h | 111 ++- .../DATAREDUCTION/MassTraceDetection.cpp | 717 +++++++++++------- 2 files changed, 510 insertions(+), 318 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index 6ec4d42246b..b1c09e90f45 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -34,14 +34,13 @@ #pragma once +#include #include #include #include #include #include -#include - namespace OpenMS { @@ -86,7 +85,12 @@ namespace OpenMS */ /// Allows the iterative computation of the intensity-weighted mean of a mass trace's centroid m/z. - void updateIterativeWeightedMeanMZ(const double &, const double &, double &, double &, double &); + void updateIterativeWeightedMeanMZ(const double added_mz, + const double added_int, + double& centroid_mz, + double& prev_counter, + double& prev_denom + ); /** @name Main computation methods */ @@ -99,6 +103,8 @@ namespace OpenMS /** @name Private methods and members */ + + protected: void updateMembers_() override; @@ -106,17 +112,72 @@ namespace OpenMS struct Apex { - Apex(double intensity, Size scan_idx, Size peak_idx); - double intensity; - Size scan_idx; - Size peak_idx; + Apex(PeakMap& map, const Size scan_idx, const Size peak_idx); + // Default constructors + Apex() = default; + Apex(const Apex& other) = default; + Apex(Apex&& other) = default; + + // Move assignment operator + Apex& operator=(Apex&& other) + { + if (this != &other) { + map_ = other.map_; + scan_idx_ = other.scan_idx_; + peak_idx_ = other.peak_idx_; + } + return *this; + } + + PeakMap& map_; + Size scan_idx_; + Size peak_idx_; + + ///get's the corresponding values + double getMZ() const; + double getRT() const; + double getIntensity() const; + }; + + struct NextIndex + { + /// C'tor: init with number of threads in parallel region + NextIndex(const std::vector& data, const Size total_peak_count, const std::vector& spec_offsets, const double mass_error_ppm); + + /// Get the next free apex index which is not in the neighbourhood of a currently processing apex (in another thread) + /// (Internally adds the apex's m/z to a blacklist which prevents other threads from obtaining an apex nearby) + /// This function blocks until the next free apex is not conflicting anymore - i.e. another thread called setApexAsProcessed() + Size getNextFreeIndex(); + + /// If an apex was processed call this function to remove the apex from the blacklist and increase the current_apex_ + /// ... doesn't create a feature + void setApexAsProcessed(); + /// ... does create a feature + void setApexAsProcessed(const std::vector >& gathered_idx); + + bool isConflictingApex(const Apex a) const; + + bool isVisited(const Size scan_idx, const Size peak_idx) const; - // double upperbound_(); - // double lowerbound_(); + void setNumberOfThreads(const Size thread_num); + + + /// reference for usage + const std::vector& data_; + const std::vector& spec_offsets_; + + /// own datastructure + boost::dynamic_bitset<> peak_visited_; + Size current_Apex_; + std::vector> lock_list_; + double mass_error_ppm_; }; + /// internal check for FWHM meta data + bool checkFWHMMetaData_(const PeakMap& work_exp); + /// The internal run method - void run_(const std::vector& chrom_apices, + void run_(std::vector& chrom_apices, const Size peak_count, const PeakMap & work_exp, const std::vector& spec_offsets, @@ -124,34 +185,10 @@ namespace OpenMS const Size max_traces = 0); // Find Offset for Peak - double find_offset_(Size peak_index_in_apices_vec, double mass_error_ppm_, const PeakMap& input_exp, const std::vector& apices_vec); - double findOffset_(double centroid_mz, double mass_error_ppm_); + static double findOffset_(const double centroid_mz, const double mass_error_ppm_); + Size calc_right_border_(Size peak_index_in_apices_vec, const PeakMap& input_exp, const std::vector& apices_vec); + Size calc_left_border_(Size peak_index_in_apices_vec, const PeakMap& input_exp, const std::vector& apices_vec); - - // boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces, boost::dynamic_bitset<>& allowed_peaks, boost::dynamic_bitset<>& apex_started, Size & trace_number, Size & peaks_detected, Size & current_trace_number, int fwhm_meta_idx); - void searchDownInRT(Size& trace_down_idx, bool& toggle_down, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, - boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, - std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& down_hitting_peak, - Size& down_scan_counter, Size& conseq_missed_peak_down, const Size max_consecutive_missing, double& current_sample_rate, - const Size min_scans_to_consider, double& intensity_so_far); - - void searchUpInRT(Size& trace_up_idx, bool& toggle_up, const PeakMap& work_exp, const double start_int, const std::vector& spec_offsets, const Size max_traces, - boost::dynamic_bitset<>& peak_visited, std::deque& current_trace, double& centroid_mz, double& ftl_sd, const int& fwhm_meta_idx, - std::vector>& gathered_idx, std::vector& fwhms_mz, double& prev_counter, double& prev_denom, Size& up_hitting_peak, - Size& up_scan_counter, Size& conseq_missed_peak_up, const Size max_consecutive_missing, double& current_sample_rate, - const Size min_scans_to_consider, double& intensity_so_far); - - - void checkAndAddTrace(const double & rt_range, const bool max_trace_criteria, const double & mt_quality, Size & trace_number, - boost::dynamic_bitset<>& peak_visited, const std::vector& spec_offsets, const std::vector>& gathered_idx, - const std::deque& current_trace, std::vector& found_masstraces, Size & peaks_detected, const Size max_traces, - const std::vector& fwhms_mz); - - boost::dynamic_bitset<> searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, - std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, - const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, - Size& current_trace_number, int fwhm_meta_idx); - // parameter stuff double mass_error_ppm_; double mass_error_da_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index f62cb34f687..17b2c6f9b4c 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -29,15 +29,16 @@ // // -------------------------------------------------------------------------- // $Maintainer: Timo Sachsenberg $ -// $Authors: Erhan Kenar, Holger Franken $ +// $Authors: Erhan Kenar, Holger Franken, Tristan Aretz, Manuel Zschaebitz $ // -------------------------------------------------------------------------- -#include +#include +#include +#include +#include #include -#include - namespace OpenMS { MassTraceDetection::MassTraceDetection() : @@ -67,43 +68,150 @@ namespace OpenMS this->setLogType(CMD); } + MassTraceDetection::~MassTraceDetection() = default; - MassTraceDetection::Apex::Apex(double intensity, Size scan_idx, Size peak_idx): - intensity(intensity), - scan_idx(scan_idx), - peak_idx(peak_idx) + + MassTraceDetection::Apex::Apex(PeakMap& map, const Size scan_idx, const Size peak_idx): + map_(map), + scan_idx_(scan_idx), + peak_idx_(peak_idx) {} - void MassTraceDetection::updateIterativeWeightedMeanMZ(const double& added_mz, - const double& added_int, double& centroid_mz, double& prev_counter, + double MassTraceDetection::Apex::getMZ() const + { + return map_[scan_idx_][peak_idx_].getMZ(); + } + + double MassTraceDetection::Apex::getRT() const + { + return map_[scan_idx_].getRT(); + } + + double MassTraceDetection::Apex::getIntensity() const + { + return map_[scan_idx_][peak_idx_].getIntensity(); + } + + MassTraceDetection::NextIndex::NextIndex(const std::vector& data, const Size total_peak_count, const std::vector& spec_offsets, const double mass_error_ppm): + data_(data), + spec_offsets_(spec_offsets), + peak_visited_(total_peak_count), + current_Apex_(0), + mass_error_ppm_(mass_error_ppm), + lock_list_(0) + { + } + + void MassTraceDetection::NextIndex::setNumberOfThreads(const Size thread_num) + { + lock_list_.resize(thread_num); + } + + bool MassTraceDetection::NextIndex::isConflictingApex(const MassTraceDetection::Apex a) const + { + // std::cout << "Here1?\n"; + for (const std::pair& i : lock_list_) + { + // std::cout << "Here2?\n"; + if (i.first.containsMZ(a.getMZ()) && i.second.containsRT(a.getRT())) + { + // std::cout << "Here3?\n"; + return true; + } + } + return false; + } + + bool MassTraceDetection::NextIndex::isVisited(const Size scan_idx, const Size peak_idx) const + { + return peak_visited_[spec_offsets_[scan_idx] + peak_idx]; + } + + Size MassTraceDetection::NextIndex::getNextFreeIndex() + { + //isConflictingApex muss anders behandelt werden als isVisited + Size result{}; + #pragma omp critical (look_for_lock) + { + while((current_Apex_ < data_.size()) && + (*this).isVisited(data_[current_Apex_].scan_idx_, data_[current_Apex_].peak_idx_)) + { + // std::cout << "Is Visited\n"; + ++current_Apex_; + } + if (current_Apex_ == data_.size()) + { + result = current_Apex_; + } + else + { + while((*this).isConflictingApex(data_[current_Apex_])) + { + // std::cout << "Is Conflicting\n"; + // for(auto i : lock_list_) + // { + // std::cout << i.first << " " << i.second << "\n"; + // } + // std::cout << '\n'; + } + // std::cout << "After Conflicting\n"; + ++current_Apex_; + const double current_mz = data_[current_Apex_].getMZ(); + double offset_mz = findOffset_(current_mz, mass_error_ppm_); + double offset_rt = 2*5*60; + RangeMZ tmp_mz(data_[current_Apex_].getMZ() - offset_mz, data_[current_Apex_].getMZ() + offset_mz); + RangeRT tmp_rt(data_[current_Apex_].getRT() - offset_rt, data_[current_Apex_].getMZ() + offset_rt); + lock_list_[omp_get_thread_num()] = std::make_pair(tmp_mz, tmp_rt); + } + } + return current_Apex_; //next apex + } + + void MassTraceDetection::NextIndex::setApexAsProcessed() + { + lock_list_[omp_get_thread_num()] = std::make_pair(RangeMZ{0,0},RangeRT{0,0}); + } + + void MassTraceDetection::NextIndex::setApexAsProcessed(const std::vector >& gathered_idx) + { + (*this).setApexAsProcessed(); + for (Size i = 0; i < gathered_idx.size(); ++i) + { + peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; + } + } // only set this to true + + void MassTraceDetection::updateIterativeWeightedMeanMZ(const double added_mz, + const double added_int, + double& centroid_mz, + double& prev_counter, double& prev_denom) { - double new_weight(added_int); - double new_mz(added_mz); + const double nominater = 1 + (added_int * added_mz) / prev_counter; + const double denominator = 1 + (added_int) / prev_denom; - double counter_tmp(1 + (new_weight * new_mz) / prev_counter); - double denom_tmp(1 + (new_weight) / prev_denom); - centroid_mz *= (counter_tmp / denom_tmp); - prev_counter *= counter_tmp; - prev_denom *= denom_tmp; + centroid_mz *= nominater / denominator; + prev_counter *= nominater; + prev_denom *= denominator; return; } + void MassTraceDetection::run(PeakMap::ConstAreaIterator& begin, PeakMap::ConstAreaIterator& end, std::vector& found_masstraces) { - PeakMap map; - MSSpectrum current_spectrum; - if (begin == end) { return; } - for (; begin != end; ++begin) + PeakMap map; + MSSpectrum current_spectrum; + + while (begin != end) { // AreaIterator points on novel spectrum? if (begin.getRT() != current_spectrum.getRT()) @@ -116,47 +224,55 @@ namespace OpenMS current_spectrum.clear(false); current_spectrum.setRT(begin.getRT()); } + current_spectrum.push_back(*begin); + ++begin; } - map.addSpectrum(current_spectrum); + + map.addSpectrum(std::move(current_spectrum)); run(map, found_masstraces); } // update function for FTL method +//not used + // void updateMeanEstimate(const double& x_t, double& mean_t, Size t) + // { + // mean_t += (1.0 / ((double)t + 1.0)) * (x_t - mean_t); + // } - void updateMeanEstimate(const double& x_t, double& mean_t, Size t) - { - mean_t += (1.0 / ((double)t + 1.0)) * (x_t - mean_t); - } +//not used + // void updateSDEstimate(const double& x_t, const double& mean_t, double& sd_t, Size t) + // { + // double i(t); + // sd_t = (i / (i + 1)) * sd_t + (i / (i + 1) * (i + 1)) * (x_t - mean_t) * (x_t - mean_t); + // // std::cerr << "func: " << tmp << " " << i << std::endl; + // } - void updateSDEstimate(const double& x_t, const double& mean_t, double& sd_t, Size t) - { - double i(t); - sd_t = (i / (i + 1)) * sd_t + (i / (i + 1) * (i + 1)) * (x_t - mean_t) * (x_t - mean_t); - // std::cerr << "func: " << tmp << " " << i << std::endl; - } +//not used + // void updateWeightedSDEstimate(const PeakType& p, const double& mean_t1, double& sd_t, double& last_weights_sum) + // { + // double denom = last_weights_sum * sd_t * sd_t + p.getIntensity() * (p.getMZ() - mean_t1) * (p.getMZ() - mean_t1); + // double weights_sum = last_weights_sum + p.getIntensity(); - void updateWeightedSDEstimate(const PeakType& p, const double& mean_t1, double& sd_t, double& last_weights_sum) - { - double denom = last_weights_sum * sd_t * sd_t + p.getIntensity() * (p.getMZ() - mean_t1) * (p.getMZ() - mean_t1); - double weights_sum = last_weights_sum + p.getIntensity(); + // double tmp_sd = std::sqrt(denom / weights_sum); - double tmp_sd = std::sqrt(denom / weights_sum); + // if (tmp_sd > std::numeric_limits::epsilon()) + // { + // sd_t = tmp_sd; + // } - if (tmp_sd > std::numeric_limits::epsilon()) - { - sd_t = tmp_sd; - } + // last_weights_sum = weights_sum; + // } - last_weights_sum = weights_sum; - } void updateWeightedSDEstimateRobust(const PeakType& p, const double& mean_t1, double& sd_t, double& last_weights_sum) { - double denom1 = std::log(last_weights_sum) + 2 * std::log(sd_t); - double denom2 = std::log(p.getIntensity()) + 2 * std::log(std::abs(p.getMZ() - mean_t1)); - double denom = std::sqrt(std::exp(denom1) + std::exp(denom2)); + double denom = std::sqrt + ( + std::exp(std::log(last_weights_sum) + 2 * std::log(sd_t)) + + std::exp(std::log(p.getIntensity()) + 2 * std::log(std::abs(p.getMZ() - mean_t1))) + ); double weights_sum = last_weights_sum + p.getIntensity(); double tmp_sd = denom / std::sqrt(weights_sum); @@ -164,32 +280,29 @@ namespace OpenMS { sd_t = tmp_sd; } - last_weights_sum = weights_sum; } - void computeWeightedSDEstimate(std::list tmp, const double& mean_t, double& sd_t, const double& /* lower_sd_bound */) - { - double denom(0.0), weights_sum(0.0); - - for (std::list::const_iterator l_it = tmp.begin(); l_it != tmp.end(); ++l_it) - { - denom += l_it->getIntensity() * (l_it->getMZ() - mean_t) * (l_it->getMZ() - mean_t); - weights_sum += l_it->getIntensity(); - } - - double tmp_sd = std::sqrt(denom / (weights_sum)); +//not used + // void computeWeightedSDEstimate(std::list tmp, const double& mean_t, double& sd_t, const double& /* lower_sd_bound */) + // { + // double denom(0.0), weights_sum(0.0); - // std::cout << "tmp_sd" << tmp_sd << std::endl; + // for (std::list::const_iterator l_it = tmp.begin(); l_it != tmp.end(); ++l_it) + // { + // denom += l_it->getIntensity() * (l_it->getMZ() - mean_t) * (l_it->getMZ() - mean_t); + // weights_sum += l_it->getIntensity(); + // } - if (tmp_sd > std::numeric_limits::epsilon()) - { - sd_t = tmp_sd; - } + // double tmp_sd = std::sqrt(denom / (weights_sum)); - return; - } + // if (tmp_sd > std::numeric_limits::epsilon()) + // { + // sd_t = tmp_sd; + // } + // return; + // } void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { @@ -201,16 +314,17 @@ namespace OpenMS // - store potential apices in chrom_apices PeakMap work_exp; std::vector chrom_apices; - - Size total_peak_count(0); std::vector spec_offsets; - spec_offsets.push_back(0); + Size total_peak_count(0); Size spectra_count(0); + spec_offsets.push_back(0); + // *********************************************************** // // Step 1: Detecting potential chromatographic apices // *********************************************************** // + for (const MSSpectrum& it : input_exp) { // check if this is a MS1 survey scan @@ -219,21 +333,21 @@ namespace OpenMS continue; } std::vector indices_passing; - for (Size peak_idx = 0; peak_idx < it.size(); ++peak_idx) + + for(Size peak_idx = 0; + (it[peak_idx].getIntensity() > noise_threshold_int_) && + (peak_idx < it.size()); + ++peak_idx) { - double tmp_peak_int((it)[peak_idx].getIntensity()); - if (tmp_peak_int > noise_threshold_int_) + // Assume that noise_threshold_int_ contains the noise level of the + // data and we want to be chrom_peak_snr times above the noise level + // --> add this peak as possible chromatographic apex + if (it[peak_idx].getIntensity() > chrom_peak_snr_ * noise_threshold_int_) { - // Assume that noise_threshold_int_ contains the noise level of the - // data and we want to be chrom_peak_snr times above the noise level - // --> add this peak as possible chromatographic apex - if (tmp_peak_int > chrom_peak_snr_ * noise_threshold_int_) - { - chrom_apices.emplace_back(tmp_peak_int, spectra_count, indices_passing.size()); - } - indices_passing.push_back(peak_idx); - ++total_peak_count; + chrom_apices.emplace_back(work_exp, spectra_count, indices_passing.size()); } + indices_passing.push_back(peak_idx); + ++total_peak_count; } PeakMap::SpectrumType tmp_spec(it); tmp_spec.select(indices_passing); @@ -252,10 +366,10 @@ namespace OpenMS spec_offsets.pop_back(); std::sort(chrom_apices.begin(), chrom_apices.end(), - [](const Apex & a, + [&chrom_apices](const Apex & a, const Apex & b) -> bool - { - return a.intensity < b.intensity; + { + return a.getIntensity() > b.getIntensity(); }); // ********************************************************************* @@ -267,136 +381,107 @@ namespace OpenMS return; } // end of MassTraceDetection::run - void MassTraceDetection::run_(const std::vector& chrom_apices, - const Size total_peak_count, - const PeakMap& work_exp, - const std::vector& spec_offsets, - std::vector& found_masstraces, - const Size max_traces) + + double MassTraceDetection::findOffset_(const double centroid_mz, const double mass_error_ppm_) { - // boost::dynamic_bitset<> peak_visited(total_peak_count); - // boost::dynamic_bitset<> apex_started(total_peak_count); - - Size trace_number(1); + return (3 * Math::ppmToMass(mass_error_ppm_,centroid_mz)); + } - // check presence of FWHM meta data - int fwhm_meta_idx(-1); + + bool MassTraceDetection::checkFWHMMetaData_(const PeakMap& work_exp) + { Size fwhm_meta_count(0); - for (Size i = 0; i < work_exp.size(); ++i) + + for (Size i = 0; + (i < work_exp.size()) && + (!work_exp[i].getFloatDataArrays().empty()) && + (work_exp[i].getFloatDataArrays()[0].getName() == "FWHM_ppm"); + ++i) { - if (!work_exp[i].getFloatDataArrays().empty() && - work_exp[i].getFloatDataArrays()[0].getName() == "FWHM_ppm") - { - if (work_exp[i].getFloatDataArrays()[0].size() != work_exp[i].size()) - { // float data should always have the same size as the corresponding array - throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, work_exp[i].size()); - } - fwhm_meta_idx = 0; - ++fwhm_meta_count; + if (work_exp[i].getFloatDataArrays()[0].size() != work_exp[i].size()) + { // float data should always have the same size as the corresponding array + throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, work_exp[i].size()); } - } - if (fwhm_meta_count > 0 && fwhm_meta_count != work_exp.size()) - { - throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, - String("FWHM meta arrays are expected to be missing or present for all MS spectra [") + fwhm_meta_count + "/" + work_exp.size() + "]."); + ++fwhm_meta_count; } - - this->startProgress(0, total_peak_count, "mass trace detection"); - Size peaks_detected(0); - - - // bool trace_added = false; - Size trace_count{}; - boost::dynamic_bitset<> allowed_peaks{total_peak_count}; - boost::dynamic_bitset<> apex_started{total_peak_count}; - // Size round{0}; - // Size roundbefore{}; - while(true) - // for(Size i{}; i < 1; ++i) + if (fwhm_meta_count > 0) { - Size current_trace_number{}; - boost::dynamic_bitset<> new_found = searchTraces_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces, max_traces, allowed_peaks, apex_started, apex_started, trace_number, peaks_detected, current_trace_number, fwhm_meta_idx); - - // apex_started; // verodern der bitsets um auch apexes aus gefundenen traces zu ueberspringen - // if(trace_count == (trace_count + current_trace_number)) - if((apex_started | allowed_peaks).all()) - { - // std::cout << "break while\n"; - break; - } else + if (fwhm_meta_count == work_exp.size()) return true; + else { - allowed_peaks |= new_found; - // std::cout << "trace count plus\n"; - trace_count += current_trace_number; - // Size falseBitsCount = (allowed_peaks).count(); - // std::cout << "Number of false bits: " << falseBitsCount << std::endl; - + throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, + String("FWHM meta arrays are expected to be missing or present for all MS spectra [") + fwhm_meta_count + "/" + work_exp.size() + "]."); } - // std::cout << round << "\t" << found_masstraces.size() - roundbefore << '\n'; - // ++round; - // roundbefore = found_masstraces.size(); - // std::cout << allowed_peaks << '\n'; } - // std::cout << "peaks detected: " << chrom_apices.size() - peaks_detected << '\n'; - this->endProgress(); - + else return false; } - void MassTraceDetection::updateMembers_() + void MassTraceDetection::run_(std::vector& chrom_apices, + const Size total_peak_count, + const PeakMap& work_exp, + const std::vector& spec_offsets, + std::vector& found_masstraces, + const Size max_traces) { - mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); - noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); - chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); - quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); + ///check for FWHM meta data & corrupted data + bool fwhm_meta_idx = checkFWHMMetaData_(work_exp); - trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); - trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); - min_sample_rate_ = (double)param_.getValue("min_sample_rate"); - min_trace_length_ = (double)param_.getValue("min_trace_length"); - max_trace_length_ = (double)param_.getValue("max_trace_length"); - reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); - } + ///used variables + boost::dynamic_bitset<> peak_visited(total_peak_count); + Size trace_number(1); + Size peaks_detected(0); + + this->startProgress(0, total_peak_count, "mass trace detection"); + + NextIndex relevant(chrom_apices, total_peak_count, spec_offsets, mass_error_ppm_); // I think relevant is a weird name + #pragma omp parallel + { + #pragma omp single + relevant.setNumberOfThreads(omp_get_num_threads()); // todo: function 'setNumberOfThreads()' implementieren + } - boost::dynamic_bitset<> MassTraceDetection::searchTraces_(const std::vector& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector& spec_offsets, - std::vector& found_masstraces, const Size max_traces, const boost::dynamic_bitset<>& allowed_peaks, - const boost::dynamic_bitset<> apex_started_given, boost::dynamic_bitset<>& apex_started, Size& trace_number, Size& peaks_detected, - Size& current_trace_number, int fwhm_meta_idx) - { - boost::dynamic_bitset<> apex_visited{total_peak_count}; + for(Size i{}; i < relevant.lock_list_.size(); ++i) + { + relevant.lock_list_[i] = std::make_pair(RangeMZ{0,0},RangeRT{0,0}); + } + Size index{}; - #pragma omp parallel for - for (auto m_it = chrom_apices.crbegin(); m_it != chrom_apices.crend(); ++m_it) + #pragma omp parallel + while(true) { - bool outer_loop = false; + + index = relevant.getNextFreeIndex(); + std::cout << index << '\n'; - Size apex_scan_idx(m_it->scan_idx); - Size apex_peak_idx(m_it->peak_idx); + if(index >= chrom_apices.size()) break; - if (apex_started_given[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - // std::cout << "apex_started_given\n"; - continue; - } // damit klappt es nicht warum, der müsste die Peaks mit denen wir schonmal angefangen haben überspringen koennen ? + std::cout << "Start run\n"; - if (allowed_peaks[spec_offsets[apex_scan_idx] + apex_peak_idx]) - { - // std::cout << "allowed_peaks\n"; - continue; - } + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // continue; + // } // I think there is not max_traces - Peak2D apex_peak; - apex_peak.setRT(work_exp[apex_scan_idx].getRT()); - apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); - apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); + // index = relevant.getNextFreeIndex(); - double start_int = work_exp[apex_scan_idx][apex_peak_idx].getIntensity(); + // if (i == chrom_apices.size()) + // { + // continue; + // } - Size trace_up_idx(apex_scan_idx); - Size trace_down_idx(apex_scan_idx); + // std::cout << "After continues\n"; + + Peak2D apex_peak; + apex_peak.setRT(chrom_apices[index].getRT()); + apex_peak.setMZ(chrom_apices[index].getMZ()); + apex_peak.setIntensity(chrom_apices[index].getIntensity()); + + Size trace_up_idx(chrom_apices[index].scan_idx_); + Size trace_down_idx(chrom_apices[index].scan_idx_); - std::list current_trace; + std::deque current_trace; current_trace.push_back(apex_peak); std::vector fwhms_mz; // peak-FWHM meta values of collected peaks @@ -407,11 +492,11 @@ namespace OpenMS updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); - std::vector> gathered_idx; - gathered_idx.emplace_back(apex_scan_idx, apex_peak_idx); - if (fwhm_meta_idx != -1) + std::vector > gathered_idx; + gathered_idx.emplace_back(chrom_apices[index].scan_idx_,chrom_apices[index].scan_idx_); + if (fwhm_meta_idx) { - fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); + fwhms_mz.push_back(work_exp[chrom_apices[index].scan_idx_].getFloatDataArrays()[0][chrom_apices[index].peak_idx_]); } Size up_hitting_peak(0), down_hitting_peak(0); @@ -426,42 +511,36 @@ namespace OpenMS // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); Size min_scans_to_consider(5); - // double outlier_ratio(0.3); - - // double ftl_mean(centroid_mz); - double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); + double ftl_sd(Math::ppmToMass(mass_error_ppm_, centroid_mz)); double intensity_so_far(apex_peak.getIntensity()); while (((trace_down_idx > 0) && toggle_down) || - ((trace_up_idx < work_exp.size() - 1) && toggle_up)) + ((trace_up_idx < work_exp.size() - 1) && toggle_up) + ) { // *********************************************************** // // Step 2.1 MOVE DOWN in RT dim // *********************************************************** // - if ((trace_down_idx > 0) && toggle_down) + if ((trace_down_idx > 0) && + toggle_down && + (!peak_visited[spec_offsets[chrom_apices[index].scan_idx_] + chrom_apices[index].peak_idx_]) + ) { - const MSSpectrum &spec_trace_down = work_exp[trace_down_idx - 1]; + const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; if (!spec_trace_down.empty()) { Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); - double right_bound = centroid_mz + 3 * ftl_sd; double left_bound = centroid_mz - 3 * ftl_sd; if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && - !allowed_peaks[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) + !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] + ) { - if (start_int < next_down_peak_int && !apex_started_given[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]) // Hier geht das auch nicht aber selbe sache, wenn mit dem Peak schonmal gestartet wurde faellt er aus dem Kriterium raus - { - outer_loop = true; - // std::cout << "Down\n"; - break; - } - Peak2D next_peak; next_peak.setRT(spec_trace_down.getRT()); next_peak.setMZ(next_down_peak_mz); @@ -469,16 +548,16 @@ namespace OpenMS current_trace.push_front(next_peak); // FWHM average - if (fwhm_meta_idx != -1) + if (fwhm_meta_idx) { - fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); + fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[0][next_down_peak_idx]); } // Update the m/z mean of the current trace as we added a new peak updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); gathered_idx.emplace_back(trace_down_idx - 1, next_down_peak_idx); // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) + if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) { // if (ftl_t > min_fwhm_scans) { @@ -493,6 +572,7 @@ namespace OpenMS { ++conseq_missed_peak_down; } + } --trace_down_idx; ++down_scan_counter; @@ -522,9 +602,12 @@ namespace OpenMS // *********************************************************** // // Step 2.2 MOVE UP in RT dim // *********************************************************** // - if ((trace_up_idx < work_exp.size() - 1) && toggle_up) + if ((trace_up_idx < work_exp.size() - 1) && + toggle_up && + (!peak_visited[spec_offsets[chrom_apices[index].scan_idx_] + chrom_apices[index].peak_idx_]) + ) { - const MSSpectrum &spec_trace_up = work_exp[trace_up_idx + 1]; + const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; if (!spec_trace_up.empty()) { Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); @@ -536,32 +619,24 @@ namespace OpenMS if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && - !allowed_peaks[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { - - if (start_int < next_up_peak_int && !apex_started_given[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) - { - outer_loop = true; - // std::cout << "Up\n"; - break; - } - Peak2D next_peak; next_peak.setRT(spec_trace_up.getRT()); next_peak.setMZ(next_up_peak_mz); next_peak.setIntensity(next_up_peak_int); current_trace.push_back(next_peak); - if (fwhm_meta_idx != -1) + if (fwhm_meta_idx) { - fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); + fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[0][next_up_peak_idx]); } // Update the m/z mean of the current trace as we added a new peak updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); gathered_idx.emplace_back(trace_up_idx + 1, next_up_peak_idx); // Update the m/z variance dynamically - if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) + if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) { // if (ftl_t > min_fwhm_scans) { @@ -571,11 +646,13 @@ namespace OpenMS ++up_hitting_peak; conseq_missed_peak_up = 0; + } else { ++conseq_missed_peak_up; } + } ++trace_up_idx; @@ -601,81 +678,159 @@ namespace OpenMS } } - // apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // kommt man hier ueberhaubt zwanghaft an wenn man mit einem Peak startet - // eigentlich schon es sei denn man überspringt durch allowed peaks, dafuer wird aber in zeile 320 das bitset verodert - - if (outer_loop) - { - // std::cout << "Continue\n"; - // apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; - continue; - } - - // std::cout << "After\n"; - // std::cout << "current sr: " << current_sample_rate << std::endl; double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); double mt_quality((double)current_trace.size() / (double)num_scans); // std::cout << "mt quality: " << mt_quality << std::endl; double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); - apex_started[spec_offsets[apex_scan_idx] + apex_peak_idx] = true; // maybe critical - // *********************************************************** // // Step 2.3 check if minimum length and quality of mass trace criteria are met // *********************************************************** // bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); - if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) + if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_ ) { - // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; - - // mark all peaks as visited - #pragma omp critical (add_to_peaks_visited) - { - for (Size i = 0; i < gathered_idx.size(); ++i) - { - apex_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; - } - } - // create new MassTrace object and store collected peaks from list current_tracekm - MassTrace new_trace(current_trace); - new_trace.updateWeightedMeanRT(); - new_trace.updateWeightedMeanMZ(); - if (!fwhms_mz.empty()) - { - new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + //create new MassTrace object and store collected peaks from list current_trace + // for (Size i = 0; i < gathered_idx.size(); ++i) + // { + // peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; + // } + relevant.setApexAsProcessed(gathered_idx); + MassTrace new_trace(current_trace.begin(), current_trace.end()); + new_trace.updateWeightedMeanRT(); + new_trace.updateWeightedMeanMZ(); + if (!fwhms_mz.empty()) + { + new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); + } + new_trace.setQuantMethod(quant_method_); + //new_trace.setCentroidSD(ftl_sd); + new_trace.updateWeightedMZsd(); + + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + this->setProgress(peaks_detected); + } - new_trace.setQuantMethod(quant_method_); - // new_trace.setCentroidSD(ftl_sd); - new_trace.updateWeightedMZsd(); - // #pragma omp critical (cout) - // { - // std::cout << apex_scan_idx << "\t" << apex_peak_idx << '\t' << new_trace.getCentroidMZ() << '\t' << new_trace.getCentroidRT() << '\n'; - // } - - #pragma omp critical (add_trace) + // check if we already reached the (optional) maximum number of traces + if (max_traces > 0 && found_masstraces.size() == max_traces) { - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - ++current_trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); - - // check if we already reached the (optional) maximum number of traces - // if (max_traces > 0 && found_masstraces.size() == max_traces) - // { - // apex_visited.set(); - // } + continue; } - } + + relevant.setApexAsProcessed(); } - return apex_visited; + this->endProgress(); } -} - + void MassTraceDetection::updateMembers_() + { + mass_error_ppm_ = (double)param_.getValue("mass_error_ppm"); + noise_threshold_int_ = (double)param_.getValue("noise_threshold_int"); + chrom_peak_snr_ = (double)param_.getValue("chrom_peak_snr"); + quant_method_ = MassTrace::getQuantMethod((String)param_.getValue("quant_method").toString()); + trace_termination_criterion_ = (String)param_.getValue("trace_termination_criterion").toString(); + trace_termination_outliers_ = (Size)param_.getValue("trace_termination_outliers"); + min_sample_rate_ = (double)param_.getValue("min_sample_rate"); + min_trace_length_ = (double)param_.getValue("min_trace_length"); + max_trace_length_ = (double)param_.getValue("max_trace_length"); + reestimate_mt_sd_ = param_.getValue("reestimate_mt_sd").toBool(); + } +} +//notes for modularising: +//Funktion for up, down can be the same + //Apex is just used once and can be way shorter done + // get rid of intensity and but a pointer to the map there done + //also needs a few member functions for easier handling done + // get rid of variables that are called and used just once i.p. + +// function for check max traces and already visited +// function for building the trace and adding it +// function for check quality, because that is just wasted memory +// a lot of size variables are initialized that are a waste of space + + +// void MassTraceDetection::histogramm (std::vector chrom_apices, uint8_t c) +// { +// std::unordered_map histo_mz; +// std::unordered_map histo_rt; + +// for (uint i = 0; i < 60; ++i) +// { +// histo_mz[i] = 0; +// histo_rt[i] = 0; +// } + +// double offset_rt = 2 * 60 * 5; +// for (uint i = 0; i < chrom_apices.size(); ++i) +// { +// OpenMS::MassTraceDetection::Apex m_it = chrom_apices[i]; +// Size apex_scan_idx(m_it.scan_idx); +// Size apex_peak_idx(m_it.peak_idx); +// double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); +// double currentApex_rt = work_exp[apex_scan_idx].getRT(); +// double offset_mz = 2 * findOffset_(currentApex_mz, mass_error_ppm_); + +// RangeRT search_rt{currentApex_rt - offset_rt, currentApex_rt + offset_rt}; +// RangeMZ search_mz{currentApex_mz - offset_mz, currentApex_mz + offset_mz}; + +// // checking for next left apex, and how far that is +// for (uint j = 1; (j<60) && (i+j Date: Tue, 23 May 2023 10:25:05 +0200 Subject: [PATCH 18/24] with peak_visited right --- .../DATAREDUCTION/MassTraceDetection.cpp | 90 ++++++++++--------- 1 file changed, 50 insertions(+), 40 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 17b2c6f9b4c..f74b0e0d21c 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -39,6 +39,10 @@ #include #include +bool isInRange(int value, int lowerBound, int upperBound) { + return value >= lowerBound && value <= upperBound; +} + namespace OpenMS { MassTraceDetection::MassTraceDetection() : @@ -99,22 +103,22 @@ namespace OpenMS peak_visited_(total_peak_count), current_Apex_(0), mass_error_ppm_(mass_error_ppm), - lock_list_(0) + lock_list_2_(0) { } void MassTraceDetection::NextIndex::setNumberOfThreads(const Size thread_num) { - lock_list_.resize(thread_num); + // lock_list_2_.resize(thread_num); } bool MassTraceDetection::NextIndex::isConflictingApex(const MassTraceDetection::Apex a) const { // std::cout << "Here1?\n"; - for (const std::pair& i : lock_list_) + for (const double & i : lock_list_2_) { // std::cout << "Here2?\n"; - if (i.first.containsMZ(a.getMZ()) && i.second.containsRT(a.getRT())) + if (isInRange(i, a.getMZ() - findOffset_(a.getMZ(), mass_error_ppm_), a.getMZ() + findOffset_(a.getMZ(), mass_error_ppm_))) { // std::cout << "Here3?\n"; return true; @@ -135,7 +139,7 @@ namespace OpenMS #pragma omp critical (look_for_lock) { while((current_Apex_ < data_.size()) && - (*this).isVisited(data_[current_Apex_].scan_idx_, data_[current_Apex_].peak_idx_)) + (*this).isVisited(data_[current_Apex_+1].scan_idx_, data_[current_Apex_+1].peak_idx_)) { // std::cout << "Is Visited\n"; ++current_Apex_; @@ -147,39 +151,40 @@ namespace OpenMS else { while((*this).isConflictingApex(data_[current_Apex_])) - { - // std::cout << "Is Conflicting\n"; - // for(auto i : lock_list_) - // { - // std::cout << i.first << " " << i.second << "\n"; - // } - // std::cout << '\n'; - } + {} // std::cout << "After Conflicting\n"; - ++current_Apex_; const double current_mz = data_[current_Apex_].getMZ(); - double offset_mz = findOffset_(current_mz, mass_error_ppm_); - double offset_rt = 2*5*60; - RangeMZ tmp_mz(data_[current_Apex_].getMZ() - offset_mz, data_[current_Apex_].getMZ() + offset_mz); - RangeRT tmp_rt(data_[current_Apex_].getRT() - offset_rt, data_[current_Apex_].getMZ() + offset_rt); - lock_list_[omp_get_thread_num()] = std::make_pair(tmp_mz, tmp_rt); + ++current_Apex_; + // double offset_mz = findOffset_(current_mz, mass_error_ppm_); + // double offset_rt = 2*5*60; + // std::pair tmp_mz(data_[current_Apex_].getMZ() - offset_mz, data_[current_Apex_].getMZ() + offset_mz); + // std::pair tmp_rt(data_[current_Apex_].getRT() - offset_rt, data_[current_Apex_].getMZ() + offset_rt); + + // lock_list_[omp_get_thread_num()] = std::make_pair(tmp_mz, tmp_rt); + lock_list_2_.push_back(current_mz); } } return current_Apex_; //next apex } - void MassTraceDetection::NextIndex::setApexAsProcessed() + void MassTraceDetection::NextIndex::setApexAsProcessed(const double mz) { - lock_list_[omp_get_thread_num()] = std::make_pair(RangeMZ{0,0},RangeRT{0,0}); + #pragma omp critical (remove_lock_from_vec) + { + lock_list_2_.erase(std::remove(lock_list_2_.begin(), lock_list_2_.end(), mz), lock_list_2_.end()); + } } - void MassTraceDetection::NextIndex::setApexAsProcessed(const std::vector >& gathered_idx) + void MassTraceDetection::NextIndex::setApexAsProcessed(const double mz, const std::vector >& gathered_idx) { - (*this).setApexAsProcessed(); - for (Size i = 0; i < gathered_idx.size(); ++i) - { - peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; - } + // #pragma omp critical (remove_lock_from_vec) + // { + (*this).setApexAsProcessed(mz); + for (Size i = 0; i < gathered_idx.size(); ++i) + { + peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; + } + // } } // only set this to true void MassTraceDetection::updateIterativeWeightedMeanMZ(const double added_mz, @@ -428,7 +433,7 @@ namespace OpenMS bool fwhm_meta_idx = checkFWHMMetaData_(work_exp); ///used variables - boost::dynamic_bitset<> peak_visited(total_peak_count); + // boost::dynamic_bitset<> peak_visited(total_peak_count); Size trace_number(1); Size peaks_detected(0); @@ -442,22 +447,22 @@ namespace OpenMS relevant.setNumberOfThreads(omp_get_num_threads()); // todo: function 'setNumberOfThreads()' implementieren } - for(Size i{}; i < relevant.lock_list_.size(); ++i) - { - relevant.lock_list_[i] = std::make_pair(RangeMZ{0,0},RangeRT{0,0}); - } + // for(Size i{}; i < relevant.lock_list_2_.size(); ++i) + // { + // relevant.lock_list_2_[i] = std::make_pair(std::pair{0,0},std::pair{0,0}); + // } Size index{}; #pragma omp parallel while(true) { - index = relevant.getNextFreeIndex(); + index = relevant.getNextFreeIndex() - 1; std::cout << index << '\n'; if(index >= chrom_apices.size()) break; - std::cout << "Start run\n"; + // std::cout << "Start run\n"; // if (max_traces > 0 && found_masstraces.size() == max_traces) // { @@ -523,7 +528,7 @@ namespace OpenMS // *********************************************************** // if ((trace_down_idx > 0) && toggle_down && - (!peak_visited[spec_offsets[chrom_apices[index].scan_idx_] + chrom_apices[index].peak_idx_]) + (!relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) ) { const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; @@ -537,7 +542,8 @@ namespace OpenMS if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] + (!relevant.isVisited(trace_down_idx - 1, next_down_peak_idx)) + // !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] ) { @@ -604,7 +610,7 @@ namespace OpenMS // *********************************************************** // if ((trace_up_idx < work_exp.size() - 1) && toggle_up && - (!peak_visited[spec_offsets[chrom_apices[index].scan_idx_] + chrom_apices[index].peak_idx_]) + (!relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) ) { const MSSpectrum& spec_trace_up = work_exp[trace_up_idx + 1]; @@ -619,7 +625,8 @@ namespace OpenMS if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && - !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) + (!relevant.isVisited(trace_up_idx + 1, next_up_peak_idx))) + // !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { Peak2D next_peak; next_peak.setRT(spec_trace_up.getRT()); @@ -695,7 +702,7 @@ namespace OpenMS // { // peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; // } - relevant.setApexAsProcessed(gathered_idx); + relevant.setApexAsProcessed(chrom_apices[index].getMZ(), gathered_idx); MassTrace new_trace(current_trace.begin(), current_trace.end()); new_trace.updateWeightedMeanRT(); new_trace.updateWeightedMeanMZ(); @@ -713,6 +720,10 @@ namespace OpenMS peaks_detected += new_trace.getSize(); this->setProgress(peaks_detected); + } + else + { + relevant.setApexAsProcessed(chrom_apices[index].getMZ()); } // check if we already reached the (optional) maximum number of traces @@ -721,7 +732,6 @@ namespace OpenMS continue; } - relevant.setApexAsProcessed(); } this->endProgress(); } From 5db5572f33979e30b4d0e376c220f933961009e4 Mon Sep 17 00:00:00 2001 From: Tristan Date: Tue, 23 May 2023 11:56:19 +0200 Subject: [PATCH 19/24] one thread too many traces, more threads deadlock --- .../DATAREDUCTION/MassTraceDetection.cpp | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index f74b0e0d21c..22228de132b 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -135,51 +135,50 @@ namespace OpenMS Size MassTraceDetection::NextIndex::getNextFreeIndex() { //isConflictingApex muss anders behandelt werden als isVisited - Size result{}; #pragma omp critical (look_for_lock) { while((current_Apex_ < data_.size()) && - (*this).isVisited(data_[current_Apex_+1].scan_idx_, data_[current_Apex_+1].peak_idx_)) + (*this).isVisited(data_[current_Apex_].scan_idx_, data_[current_Apex_].peak_idx_)) { - // std::cout << "Is Visited\n"; ++current_Apex_; + if (current_Apex_ == data_.size()) break; } if (current_Apex_ == data_.size()) { - result = current_Apex_; + ++current_Apex_; } else { while((*this).isConflictingApex(data_[current_Apex_])) - {} - // std::cout << "After Conflicting\n"; - const double current_mz = data_[current_Apex_].getMZ(); + { + std::cout << "conflicting\n"; + } + + lock_list_2_.push_back(data_[current_Apex_].getMZ()); ++current_Apex_; - // double offset_mz = findOffset_(current_mz, mass_error_ppm_); - // double offset_rt = 2*5*60; - // std::pair tmp_mz(data_[current_Apex_].getMZ() - offset_mz, data_[current_Apex_].getMZ() + offset_mz); - // std::pair tmp_rt(data_[current_Apex_].getRT() - offset_rt, data_[current_Apex_].getMZ() + offset_rt); - - // lock_list_[omp_get_thread_num()] = std::make_pair(tmp_mz, tmp_rt); - lock_list_2_.push_back(current_mz); } } - return current_Apex_; //next apex + return current_Apex_ - 1; //next apex } - void MassTraceDetection::NextIndex::setApexAsProcessed(const double mz) + void MassTraceDetection::NextIndex::setApexAsProcessed(Size index) { #pragma omp critical (remove_lock_from_vec) { - lock_list_2_.erase(std::remove(lock_list_2_.begin(), lock_list_2_.end(), mz), lock_list_2_.end()); + auto it = std::find(lock_list_2_.begin(), lock_list_2_.end(), data_[index].getMZ()); + if (it != lock_list_2_.end()) + { + // Remove the element + lock_list_2_.erase(it); + } } } - void MassTraceDetection::NextIndex::setApexAsProcessed(const double mz, const std::vector >& gathered_idx) + void MassTraceDetection::NextIndex::setApexAsProcessed(Size index, const std::vector >& gathered_idx) { // #pragma omp critical (remove_lock_from_vec) // { - (*this).setApexAsProcessed(mz); + (*this).setApexAsProcessed(index); for (Size i = 0; i < gathered_idx.size(); ++i) { peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; @@ -457,11 +456,19 @@ namespace OpenMS while(true) { - index = relevant.getNextFreeIndex() - 1; - std::cout << index << '\n'; + index = relevant.getNextFreeIndex(); + // std::cout << "Index: " << index << '\n'; if(index >= chrom_apices.size()) break; + if(relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) + { + relevant.setApexAsProcessed(index); + continue; + } + + // std::cout << "Get here index\n"; + // std::cout << "Start run\n"; // if (max_traces > 0 && found_masstraces.size() == max_traces) @@ -476,7 +483,6 @@ namespace OpenMS // continue; // } - // std::cout << "After continues\n"; Peak2D apex_peak; apex_peak.setRT(chrom_apices[index].getRT()); @@ -523,6 +529,7 @@ namespace OpenMS ((trace_up_idx < work_exp.size() - 1) && toggle_up) ) { + // std::cout << "Get here0\n"; // *********************************************************** // // Step 2.1 MOVE DOWN in RT dim // *********************************************************** // @@ -531,6 +538,7 @@ namespace OpenMS (!relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) ) { + // std::cout << "Get here1\n"; const MSSpectrum& spec_trace_down = work_exp[trace_down_idx - 1]; if (!spec_trace_down.empty()) { @@ -685,6 +693,8 @@ namespace OpenMS } } + // std::cout << "Get here\n"; + double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); double mt_quality((double)current_trace.size() / (double)num_scans); @@ -702,7 +712,7 @@ namespace OpenMS // { // peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; // } - relevant.setApexAsProcessed(chrom_apices[index].getMZ(), gathered_idx); + relevant.setApexAsProcessed(index, gathered_idx); MassTrace new_trace(current_trace.begin(), current_trace.end()); new_trace.updateWeightedMeanRT(); new_trace.updateWeightedMeanMZ(); @@ -723,7 +733,7 @@ namespace OpenMS } else { - relevant.setApexAsProcessed(chrom_apices[index].getMZ()); + relevant.setApexAsProcessed(index); } // check if we already reached the (optional) maximum number of traces From 6b5d2cc907e40b12444ee25d54e62e79fc1568f0 Mon Sep 17 00:00:00 2001 From: Tristan Date: Tue, 23 May 2023 11:56:34 +0200 Subject: [PATCH 20/24] Changed Interface --- .../OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index b1c09e90f45..c7e283b864f 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -151,9 +151,9 @@ namespace OpenMS /// If an apex was processed call this function to remove the apex from the blacklist and increase the current_apex_ /// ... doesn't create a feature - void setApexAsProcessed(); + void setApexAsProcessed(Size index); /// ... does create a feature - void setApexAsProcessed(const std::vector >& gathered_idx); + void setApexAsProcessed(Size index, const std::vector >& gathered_idx); bool isConflictingApex(const Apex a) const; @@ -170,6 +170,7 @@ namespace OpenMS boost::dynamic_bitset<> peak_visited_; Size current_Apex_; std::vector> lock_list_; + std::vector lock_list_2_; double mass_error_ppm_; }; From c4341538c420de29d623ada09d4c4732dfac7a87 Mon Sep 17 00:00:00 2001 From: Tristan Date: Tue, 23 May 2023 12:59:44 +0200 Subject: [PATCH 21/24] more threads lead to memory errors --- .../DATAREDUCTION/MassTraceDetection.cpp | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index 22228de132b..e8b514d35f0 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -102,14 +102,13 @@ namespace OpenMS spec_offsets_(spec_offsets), peak_visited_(total_peak_count), current_Apex_(0), - mass_error_ppm_(mass_error_ppm), - lock_list_2_(0) + mass_error_ppm_(mass_error_ppm) { } void MassTraceDetection::NextIndex::setNumberOfThreads(const Size thread_num) { - // lock_list_2_.resize(thread_num); + lock_list_2_.resize(thread_num); } bool MassTraceDetection::NextIndex::isConflictingApex(const MassTraceDetection::Apex a) const @@ -151,10 +150,13 @@ namespace OpenMS { while((*this).isConflictingApex(data_[current_Apex_])) { - std::cout << "conflicting\n"; + std::cout << "Found conflict\n"; + usleep(10); + // std::cout << lock_list_2_.size() << '\n'; } - - lock_list_2_.push_back(data_[current_Apex_].getMZ()); + // std::cout << "After while\n"; + int threadnum = omp_get_thread_num(); + lock_list_2_[threadnum] = data_[current_Apex_].getMZ(); ++current_Apex_; } } @@ -165,25 +167,27 @@ namespace OpenMS { #pragma omp critical (remove_lock_from_vec) { - auto it = std::find(lock_list_2_.begin(), lock_list_2_.end(), data_[index].getMZ()); - if (it != lock_list_2_.end()) - { - // Remove the element - lock_list_2_.erase(it); - } + // auto it = std::find(lock_list_2_.begin(), lock_list_2_.end(), data_[index].getMZ()); + // if (it != lock_list_2_.end()) + // { + // // Remove the element + // lock_list_2_.erase(it); + // std::cout << "Removed conflict\n"; + // } + lock_list_2_[omp_get_thread_num()] = 0; } } void MassTraceDetection::NextIndex::setApexAsProcessed(Size index, const std::vector >& gathered_idx) { - // #pragma omp critical (remove_lock_from_vec) - // { + #pragma omp critical (remove_lock_from_vec_2) + { (*this).setApexAsProcessed(index); for (Size i = 0; i < gathered_idx.size(); ++i) { peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; } - // } + } } // only set this to true void MassTraceDetection::updateIterativeWeightedMeanMZ(const double added_mz, @@ -446,10 +450,10 @@ namespace OpenMS relevant.setNumberOfThreads(omp_get_num_threads()); // todo: function 'setNumberOfThreads()' implementieren } - // for(Size i{}; i < relevant.lock_list_2_.size(); ++i) - // { - // relevant.lock_list_2_[i] = std::make_pair(std::pair{0,0},std::pair{0,0}); - // } + for(Size i{}; i < relevant.lock_list_2_.size(); ++i) + { + relevant.lock_list_2_[i] = 0; + } Size index{}; #pragma omp parallel @@ -467,6 +471,7 @@ namespace OpenMS continue; } + std::cout << omp_get_thread_num() << '\n'; // std::cout << "Get here index\n"; // std::cout << "Start run\n"; @@ -737,10 +742,10 @@ namespace OpenMS } // check if we already reached the (optional) maximum number of traces - if (max_traces > 0 && found_masstraces.size() == max_traces) - { - continue; - } + // if (max_traces > 0 && found_masstraces.size() == max_traces) + // { + // continue; + // } } this->endProgress(); From 6e676a8aa999fc7cd840c42b8f37de6650d7616f Mon Sep 17 00:00:00 2001 From: Tristan Date: Wed, 24 May 2023 16:26:43 +0200 Subject: [PATCH 22/24] =?UTF-8?q?L=C3=A4uft,=20zu=20viele=20Traces=20noch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../DATAREDUCTION/MassTraceDetection.h | 6 +- .../DATAREDUCTION/MassTraceDetection.cpp | 146 +++++++----------- 2 files changed, 55 insertions(+), 97 deletions(-) diff --git a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h index c7e283b864f..1f7ce1b020f 100644 --- a/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h +++ b/src/openms/include/OpenMS/FILTERING/DATAREDUCTION/MassTraceDetection.h @@ -151,9 +151,9 @@ namespace OpenMS /// If an apex was processed call this function to remove the apex from the blacklist and increase the current_apex_ /// ... doesn't create a feature - void setApexAsProcessed(Size index); + void setApexAsProcessed(); /// ... does create a feature - void setApexAsProcessed(Size index, const std::vector >& gathered_idx); + void setApexAsProcessed(const std::vector >& gathered_idx); bool isConflictingApex(const Apex a) const; @@ -167,7 +167,7 @@ namespace OpenMS const std::vector& spec_offsets_; /// own datastructure - boost::dynamic_bitset<> peak_visited_; + std::vector peak_visited_; Size current_Apex_; std::vector> lock_list_; std::vector lock_list_2_; diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index e8b514d35f0..a7782c8b074 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -117,7 +117,7 @@ namespace OpenMS for (const double & i : lock_list_2_) { // std::cout << "Here2?\n"; - if (isInRange(i, a.getMZ() - findOffset_(a.getMZ(), mass_error_ppm_), a.getMZ() + findOffset_(a.getMZ(), mass_error_ppm_))) + if (isInRange(i, a.getMZ() - (5 * findOffset_(a.getMZ(), mass_error_ppm_)), a.getMZ() + (5 * findOffset_(a.getMZ(), mass_error_ppm_)))) { // std::cout << "Here3?\n"; return true; @@ -133,6 +133,7 @@ namespace OpenMS Size MassTraceDetection::NextIndex::getNextFreeIndex() { + Size result{}; //isConflictingApex muss anders behandelt werden als isVisited #pragma omp critical (look_for_lock) { @@ -140,33 +141,32 @@ namespace OpenMS (*this).isVisited(data_[current_Apex_].scan_idx_, data_[current_Apex_].peak_idx_)) { ++current_Apex_; - if (current_Apex_ == data_.size()) break; } - if (current_Apex_ == data_.size()) - { - ++current_Apex_; - } - else + if (current_Apex_ < data_.size()) { while((*this).isConflictingApex(data_[current_Apex_])) { - std::cout << "Found conflict\n"; - usleep(10); + // std::cout << "Found conflict\n"; + // usleep(1000); // std::cout << lock_list_2_.size() << '\n'; } + // std::cout << "After while\n"; int threadnum = omp_get_thread_num(); lock_list_2_[threadnum] = data_[current_Apex_].getMZ(); - ++current_Apex_; + // result = current_Apex_; + // ++current_Apex_; } + result = current_Apex_; + ++current_Apex_; } - return current_Apex_ - 1; //next apex + return result; //next apex } - void MassTraceDetection::NextIndex::setApexAsProcessed(Size index) + void MassTraceDetection::NextIndex::setApexAsProcessed() { - #pragma omp critical (remove_lock_from_vec) - { + // #pragma omp critical (remove_lock_from_vec) + // { // auto it = std::find(lock_list_2_.begin(), lock_list_2_.end(), data_[index].getMZ()); // if (it != lock_list_2_.end()) // { @@ -175,20 +175,23 @@ namespace OpenMS // std::cout << "Removed conflict\n"; // } lock_list_2_[omp_get_thread_num()] = 0; - } + // std::cout << lock_list_2_[0] << ' ' << lock_list_2_[1] << '\n'; + // } } - void MassTraceDetection::NextIndex::setApexAsProcessed(Size index, const std::vector >& gathered_idx) + void MassTraceDetection::NextIndex::setApexAsProcessed(const std::vector >& gathered_idx) { #pragma omp critical (remove_lock_from_vec_2) { - (*this).setApexAsProcessed(index); for (Size i = 0; i < gathered_idx.size(); ++i) { + // Size peaks_index = spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second; + // peak_visited_.at(spec_offsets_[ gathered_idx[i].first ] + gathered_idx[i].second) = true; peak_visited_[spec_offsets_[gathered_idx[i].first] + gathered_idx[i].second] = true; - } + } // reihenfolge wichtig + (*this).setApexAsProcessed(); } - } // only set this to true + } void MassTraceDetection::updateIterativeWeightedMeanMZ(const double added_mz, const double added_int, @@ -242,37 +245,6 @@ namespace OpenMS run(map, found_masstraces); } -// update function for FTL method -//not used - // void updateMeanEstimate(const double& x_t, double& mean_t, Size t) - // { - // mean_t += (1.0 / ((double)t + 1.0)) * (x_t - mean_t); - // } - -//not used - // void updateSDEstimate(const double& x_t, const double& mean_t, double& sd_t, Size t) - // { - // double i(t); - // sd_t = (i / (i + 1)) * sd_t + (i / (i + 1) * (i + 1)) * (x_t - mean_t) * (x_t - mean_t); - // // std::cerr << "func: " << tmp << " " << i << std::endl; - // } - -//not used - // void updateWeightedSDEstimate(const PeakType& p, const double& mean_t1, double& sd_t, double& last_weights_sum) - // { - // double denom = last_weights_sum * sd_t * sd_t + p.getIntensity() * (p.getMZ() - mean_t1) * (p.getMZ() - mean_t1); - // double weights_sum = last_weights_sum + p.getIntensity(); - - // double tmp_sd = std::sqrt(denom / weights_sum); - - // if (tmp_sd > std::numeric_limits::epsilon()) - // { - // sd_t = tmp_sd; - // } - - // last_weights_sum = weights_sum; - // } - void updateWeightedSDEstimateRobust(const PeakType& p, const double& mean_t1, double& sd_t, double& last_weights_sum) { @@ -291,27 +263,6 @@ namespace OpenMS last_weights_sum = weights_sum; } -//not used - // void computeWeightedSDEstimate(std::list tmp, const double& mean_t, double& sd_t, const double& /* lower_sd_bound */) - // { - // double denom(0.0), weights_sum(0.0); - - // for (std::list::const_iterator l_it = tmp.begin(); l_it != tmp.end(); ++l_it) - // { - // denom += l_it->getIntensity() * (l_it->getMZ() - mean_t) * (l_it->getMZ() - mean_t); - // weights_sum += l_it->getIntensity(); - // } - - // double tmp_sd = std::sqrt(denom / (weights_sum)); - - // if (tmp_sd > std::numeric_limits::epsilon()) - // { - // sd_t = tmp_sd; - // } - - // return; - // } - void MassTraceDetection::run(const PeakMap& input_exp, std::vector& found_masstraces, const Size max_traces) { // make sure the output vector is empty @@ -336,15 +287,15 @@ namespace OpenMS for (const MSSpectrum& it : input_exp) { // check if this is a MS1 survey scan - if (it.getMSLevel() != 1) + if (it.getMSLevel() != 1 || it.empty()) { continue; } std::vector indices_passing; for(Size peak_idx = 0; - (it[peak_idx].getIntensity() > noise_threshold_int_) && - (peak_idx < it.size()); + (peak_idx < it.size()) && + (it[peak_idx].getIntensity() > noise_threshold_int_); ++peak_idx) { // Assume that noise_threshold_int_ contains the noise level of the @@ -431,7 +382,7 @@ namespace OpenMS const std::vector& spec_offsets, std::vector& found_masstraces, const Size max_traces) - { + { ///check for FWHM meta data & corrupted data bool fwhm_meta_idx = checkFWHMMetaData_(work_exp); @@ -450,28 +401,32 @@ namespace OpenMS relevant.setNumberOfThreads(omp_get_num_threads()); // todo: function 'setNumberOfThreads()' implementieren } - for(Size i{}; i < relevant.lock_list_2_.size(); ++i) - { - relevant.lock_list_2_[i] = 0; - } - Size index{}; + // for(Size i{}; i < relevant.lock_list_2_.size(); ++i) + // { + // relevant.lock_list_2_[i] = 0; + // } + + // Size index{}; #pragma omp parallel while(true) { - + Size index{}; + index = relevant.getNextFreeIndex(); // std::cout << "Index: " << index << '\n'; if(index >= chrom_apices.size()) break; + // #pragma omp critical (visited) + // { if(relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) { - relevant.setApexAsProcessed(index); + relevant.setApexAsProcessed(); continue; } - - std::cout << omp_get_thread_num() << '\n'; + // } + // std::cout << omp_get_thread_num() << '\n'; // std::cout << "Get here index\n"; // std::cout << "Start run\n"; @@ -540,7 +495,7 @@ namespace OpenMS // *********************************************************** // if ((trace_down_idx > 0) && toggle_down && - (!relevant.isVisited(chrom_apices[index].scan_idx_, chrom_apices[index].peak_idx_)) + (!relevant.isVisited(relevant.data_[index].scan_idx_, relevant.data_[index].peak_idx_)) ) { // std::cout << "Get here1\n"; @@ -717,7 +672,7 @@ namespace OpenMS // { // peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; // } - relevant.setApexAsProcessed(index, gathered_idx); + relevant.setApexAsProcessed(gathered_idx); MassTrace new_trace(current_trace.begin(), current_trace.end()); new_trace.updateWeightedMeanRT(); new_trace.updateWeightedMeanMZ(); @@ -729,18 +684,21 @@ namespace OpenMS //new_trace.setCentroidSD(ftl_sd); new_trace.updateWeightedMZsd(); - new_trace.setLabel("T" + String(trace_number)); - ++trace_number; - found_masstraces.push_back(new_trace); - peaks_detected += new_trace.getSize(); - this->setProgress(peaks_detected); - + #pragma omp critical (add_trace) + { + new_trace.setLabel("T" + String(trace_number)); + ++trace_number; + found_masstraces.push_back(new_trace); + peaks_detected += new_trace.getSize(); + // std::cout << new_trace.getSize() << " " << trace_number << '\n'; + this->setProgress(peaks_detected); + } } else { - relevant.setApexAsProcessed(index); + relevant.setApexAsProcessed(); } - + // if(peaks_detected > chrom_apices.size()) break; // check if we already reached the (optional) maximum number of traces // if (max_traces > 0 && found_masstraces.size() == max_traces) // { From 7d5e4e874ee91c39784e36348091f64205816a1c Mon Sep 17 00:00:00 2001 From: Tristan Date: Wed, 24 May 2023 16:49:30 +0200 Subject: [PATCH 23/24] klappt --- .../DATAREDUCTION/MassTraceDetection.cpp | 105 ++---------------- 1 file changed, 8 insertions(+), 97 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index a7782c8b074..cbdb1465ca3 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -117,11 +117,12 @@ namespace OpenMS for (const double & i : lock_list_2_) { // std::cout << "Here2?\n"; - if (isInRange(i, a.getMZ() - (5 * findOffset_(a.getMZ(), mass_error_ppm_)), a.getMZ() + (5 * findOffset_(a.getMZ(), mass_error_ppm_)))) - { - // std::cout << "Here3?\n"; - return true; - } + // if (isInRange(i, a.getMZ() - (2 * findOffset_(a.getMZ(), mass_error_ppm_)), a.getMZ() + (2 * findOffset_(a.getMZ(), mass_error_ppm_)))) + // { + // // std::cout << "Here3?\n"; + // return true; + // } + if(i != 0) return true; } return false; } @@ -464,7 +465,7 @@ namespace OpenMS updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); std::vector > gathered_idx; - gathered_idx.emplace_back(chrom_apices[index].scan_idx_,chrom_apices[index].scan_idx_); + gathered_idx.emplace_back(chrom_apices[index].scan_idx_,chrom_apices[index].peak_idx_); if (fwhm_meta_idx) { fwhms_mz.push_back(work_exp[chrom_apices[index].scan_idx_].getFloatDataArrays()[0][chrom_apices[index].peak_idx_]); @@ -726,94 +727,4 @@ namespace OpenMS } -//notes for modularising: -//Funktion for up, down can be the same - //Apex is just used once and can be way shorter done - // get rid of intensity and but a pointer to the map there done - //also needs a few member functions for easier handling done - // get rid of variables that are called and used just once i.p. - -// function for check max traces and already visited -// function for building the trace and adding it -// function for check quality, because that is just wasted memory -// a lot of size variables are initialized that are a waste of space - - -// void MassTraceDetection::histogramm (std::vector chrom_apices, uint8_t c) -// { -// std::unordered_map histo_mz; -// std::unordered_map histo_rt; - -// for (uint i = 0; i < 60; ++i) -// { -// histo_mz[i] = 0; -// histo_rt[i] = 0; -// } - -// double offset_rt = 2 * 60 * 5; -// for (uint i = 0; i < chrom_apices.size(); ++i) -// { -// OpenMS::MassTraceDetection::Apex m_it = chrom_apices[i]; -// Size apex_scan_idx(m_it.scan_idx); -// Size apex_peak_idx(m_it.peak_idx); -// double currentApex_mz = work_exp[apex_scan_idx][apex_peak_idx].getMZ(); -// double currentApex_rt = work_exp[apex_scan_idx].getRT(); -// double offset_mz = 2 * findOffset_(currentApex_mz, mass_error_ppm_); - -// RangeRT search_rt{currentApex_rt - offset_rt, currentApex_rt + offset_rt}; -// RangeMZ search_mz{currentApex_mz - offset_mz, currentApex_mz + offset_mz}; - -// // checking for next left apex, and how far that is -// for (uint j = 1; (j<60) && (i+j Date: Wed, 24 May 2023 16:50:51 +0200 Subject: [PATCH 24/24] klappt --- .../FILTERING/DATAREDUCTION/MassTraceDetection.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp index cbdb1465ca3..101c82f3650 100644 --- a/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp +++ b/src/openms/source/FILTERING/DATAREDUCTION/MassTraceDetection.cpp @@ -117,12 +117,12 @@ namespace OpenMS for (const double & i : lock_list_2_) { // std::cout << "Here2?\n"; - // if (isInRange(i, a.getMZ() - (2 * findOffset_(a.getMZ(), mass_error_ppm_)), a.getMZ() + (2 * findOffset_(a.getMZ(), mass_error_ppm_)))) - // { - // // std::cout << "Here3?\n"; - // return true; - // } - if(i != 0) return true; + if (isInRange(i, a.getMZ() - (2 * findOffset_(a.getMZ(), mass_error_ppm_)), a.getMZ() + (2 * findOffset_(a.getMZ(), mass_error_ppm_)))) + { + // std::cout << "Here3?\n"; + return true; + } + // if(i != 0) return true; } return false; }