From d2f3da2a8960c11c647ca852702c0f959a3282e3 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Tue, 16 Jul 2024 20:22:05 +0200
Subject: [PATCH 1/6] test wird nachgereicht

---
 .../include/OpenMS/ANALYSIS/ID/NeighborSeq.h  |  78 ++++++++
 .../include/OpenMS/ANALYSIS/ID/sources.cmake  |   1 +
 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp | 137 +++++++++++++
 src/openms/source/ANALYSIS/ID/sources.cmake   |   1 +
 .../openms/source/NeighborSeq_test.cpp        | 180 ++++++++++++++++++
 5 files changed, 397 insertions(+)
 create mode 100644 src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
 create mode 100644 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
 create mode 100644 src/tests/class_tests/openms/source/NeighborSeq_test.cpp
diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
new file mode 100644
index 00000000000..9db2d319893
--- /dev/null
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
@@ -0,0 +1,78 @@
+// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// --------------------------------------------------------------------------
+// $Maintainer: Sven Nahnsen $
+// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $
+// --------------------------------------------------------------------------
+
+// #define NEIGHBORSEQ_H
+#pragma once
+#include <OpenMS/ANALYSIS/OPENSWATH/MRMDecoy.h>
+#include <OpenMS/APPLICATIONS/TOPPBase.h>
+#include <OpenMS/CHEMISTRY/DigestionEnzyme.h>
+#include <OpenMS/CHEMISTRY/ProteaseDB.h>
+#include <OpenMS/CHEMISTRY/ProteaseDigestion.h>
+#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
+#include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
+#include <OpenMS/FORMAT/FASTAFile.h>
+#include <OpenMS/KERNEL/MSSpectrum.h>
+#include <OpenMS/MATH/MathFunctions.h>
+#include <OpenMS/METADATA/ProteinIdentification.h>
+#include <cmath>
+#include <regex>
+#include <set>
+
+using namespace OpenMS;
+
+/**
+ * @brief The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file)
+ * that are similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities
+ * specifically related to the Neighbor Peptide search.
+ *
+ * The paper on subset neighbor search is www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/
+ * PMID: 34236864 PMCID: PMC8489664 DOI: 10.1021/acs.jproteome.1c00483
+ */
+class OPENMS_DLLAPI NeighborSeq
+{
+
+public:
+  /**
+   * @brief Calculates the monoisotopic mass of a peptide.
+   * @param peptide The peptide sequence.
+   * @return The monoisotopic mass of the peptide.
+   */
+  static double calculateMass(const AASequence& peptide);
+
+  /**
+   * @brief Generates a theoretical spectrum for a given peptide sequence.
+   * @param peptide_sequence The peptide sequence for which to generate the spectrum.
+   * @return The generated theoretical spectrum.
+   */
+  static MSSpectrum generateSpectrum(const String& peptide_sequence);
+
+  /**
+   * @brief Compares two spectra to determine if they share a sufficient number of ions.
+   * @param spec1 The first spectrum.
+   * @param spec2 The second spectrum.
+   * @param ion_tolerance The tolerance for the proportion of shared ions.
+   * @param resolution The resolution setting for the comparison ("high" or "low").
+   * @return True if the spectra share a sufficient number of ions, false otherwise.
+   */
+  static bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const std::string& resolution);
+
+  /**
+   * @brief Finds neighbor peptides that are similar to a given peptide.
+   * @param peptides The peptide sequence to find neighbors for.
+   * @param neighbor_file The list of neighbor sequences to search in.
+   * @param mass_tolerance The mass tolerance for neighbor peptides.
+   * @param ion_tolerance The ion tolerance for neighbor peptides.
+   * @param resolution The resolution setting for the comparison ("high" or "low").
+   * @return A vector of FASTA entries that are considered neighbor peptides.
+   */
+  static std::vector<FASTAFile::FASTAEntry> findNeighborPeptides(const AASequence& peptides,
+                                                                 const std::vector<FASTAFile::FASTAEntry>& neighbor_file,
+                                                                 const double& mass_tolerance,
+                                                                 const double& ion_tolerance,
+                                                                 const String& resolution);
+};
\ No newline at end of file
diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake
index 9484ece68a7..823a3cfb83d 100644
--- a/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/sources.cmake
@@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.h
 MessagePasserFactory.h
 MetaboliteSpectralMatching.h
 MorpheusScore.h
+NeighborSeq.h
 PeptideIndexing.h
 PeptideProteinResolution.h
 PercolatorFeatureSetHelper.h
diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
new file mode 100644
index 00000000000..ff6a2ab8e3b
--- /dev/null
+++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
@@ -0,0 +1,137 @@
+// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// --------------------------------------------------------------------------
+// $Maintainer: Sven Nahnsen $
+// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $
+// --------------------------------------------------------------------------
+
+#include <OpenMS/FORMAT/FASTAFile.h>
+#include <OpenMS/METADATA/ProteinIdentification.h>
+#include <OpenMS/CHEMISTRY/ProteaseDB.h>
+#include <OpenMS/CHEMISTRY/ProteaseDigestion.h>
+#include <OpenMS/ANALYSIS/OPENSWATH/MRMDecoy.h>
+#include <OpenMS/CHEMISTRY/DigestionEnzyme.h>
+#include <OpenMS/APPLICATIONS/TOPPBase.h>
+#include <OpenMS/MATH/MathFunctions.h>
+#include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
+#include <regex>
+#include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
+#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
+#include <OpenMS/KERNEL/MSSpectrum.h>
+#include <cmath>
+#include <set>
+
+
+using namespace OpenMS;
+using namespace std;
+
+
+// Function to calculate the mass of a peptide
+double NeighborSeq::calculateMass(const AASequence& peptide)
+{
+return peptide.getMonoWeight();
+}
+
+
+// Function to generate the theoretical spectrum for a given peptide sequence
+MSSpectrum NeighborSeq::generateSpectrum(const String& peptide_sequence)
+{
+AASequence peptide = AASequence::fromString(peptide_sequence);
+TheoreticalSpectrumGenerator spec_gen;
+// Set parameters for the spectrum generator
+Param params;
+params.setValue("add_b_ions", "true");
+params.setValue("add_y_ions", "true");
+spec_gen.setParameters(params);
+// Generate the spectrum and store it in an MSSpectrum object
+MSSpectrum spectrum;
+spec_gen.getSpectrum(spectrum, peptide, 1, 1);
+return spectrum;
+}
+
+// Function to compare two spectra and determine if they are similar
+bool NeighborSeq::compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const string& resolution)
+{
+// Determine the bin size based on resolution
+double bin_size;
+if (resolution == "high") { bin_size = 1.0005079; }
+else { bin_size = 0.05; }
+
+// Maps to hold the binned spectra
+map<int, int> bins1, bins2;
+
+// Lambda function to bin a spectrum
+auto bin_spectrum = [&](const MSSpectrum& spec, map<int, int>& bins) {
+    for (const auto& peak : spec)
+    {
+    int bin = static_cast<int>(peak.getMZ() / bin_size);
+    bins[bin]++;
+    }
+};
+
+// Bin both spectra
+bin_spectrum(spec1, bins1);
+bin_spectrum(spec2, bins2);
+
+// Calculate the size of the spectra and the number of shared bins
+int B1 = spec1.size();
+int B2 = spec2.size();
+int B12 = 0;
+
+for (const auto& bin : bins1)
+{
+    if (bins2.find(bin.first) != bins2.end()) { B12 += min(bin.second, bins2[bin.first]); }
+}
+
+// Calculate the fraction of shared bins
+double fraction_shared = static_cast<double>(2 * B12) / (B1 + B2);
+// cout << fraction_shared << endl;
+return fraction_shared > ion_tolerance;
+}
+
+// Method to find neighbor peptides in a given FASTA file
+vector<FASTAFile::FASTAEntry> NeighborSeq::findNeighborPeptides(const AASequence& peptides,
+                                                                const vector<FASTAFile::FASTAEntry>& neighbor_file,
+                                                                const double& mass_tolerance,
+                                                                const double& ion_tolerance,
+                                                                const String& resolution)
+
+{
+vector<FASTAFile::FASTAEntry> result_entries;
+
+// Calculate the mass and generate the spectrum for the input peptide
+size_t size = peptides.size();
+double mass = calculateMass(peptides);
+String peptide_sequence = peptides.toString();
+MSSpectrum spec = generateSpectrum(peptide_sequence);
+// Iterate through the neighbor file entries
+for (const auto& entry : neighbor_file)
+{
+    for (size_t offset = 0; offset + size <= entry.sequence.size(); ++offset)
+    {
+    String neighbor_sequence = entry.sequence.substr(offset, size);
+    // Check if the sequence contains an 'X'
+    if (neighbor_sequence.find('X') == String::npos)
+    {
+        double neighbor_mass = calculateMass(AASequence::fromString(neighbor_sequence));
+        double mass_diff = std::abs(mass - neighbor_mass) / ((mass + neighbor_mass) / 2);
+        // Check if the mass difference is within the tolerance
+        if (mass_diff <= mass_tolerance)
+        {
+        // cout << mass_diff << endl;
+        MSSpectrum neighbor_spec = generateSpectrum(neighbor_sequence);
+        // Compare the spectra and add to results if they are similar
+        if (compareSpectra(spec, neighbor_spec, ion_tolerance, resolution))
+        {
+            // Construct FASTAFile::FASTAEntry and add to results
+            FASTAFile::FASTAEntry neighbor_entry(entry.identifier, entry.description, neighbor_sequence);
+            result_entries.push_back(neighbor_entry);
+        }
+        }
+    }
+    }
+}
+return result_entries;
+}
+
diff --git a/src/openms/source/ANALYSIS/ID/sources.cmake b/src/openms/source/ANALYSIS/ID/sources.cmake
index 41cc05a6d59..de1522abfb9 100644
--- a/src/openms/source/ANALYSIS/ID/sources.cmake
+++ b/src/openms/source/ANALYSIS/ID/sources.cmake
@@ -34,6 +34,7 @@ IonIdentityMolecularNetworking.cpp
 MessagePasserFactory.cpp
 MetaboliteSpectralMatching.cpp
 MorpheusScore.cpp
+NeighborSeq.cpp
 PeptideProteinResolution.cpp
 PeptideIndexing.cpp
 PercolatorFeatureSetHelper.cpp
diff --git a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
new file mode 100644
index 00000000000..a53a8ebe691
--- /dev/null
+++ b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
@@ -0,0 +1,180 @@
+// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
+// SPDX-License-Identifier: BSD-3-Clause
+// 
+// --------------------------------------------------------------------------
+// $Maintainer: Chris Bielow $
+// $Authors: Andreas Bertsch $
+// --------------------------------------------------------------------------
+
+#include <OpenMS/CONCEPT/ClassTest.h>
+#include <OpenMS/test_config.h>
+#include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
+
+using namespace OpenMS;
+using namespace std;
+
+START_TEST(NeighborSeq, "$Id$")
+
+//NS()=delete;
+
+
+START_SECTION(double calculateMass(const AASequence& peptide))
+{
+  double mass_1 = NeighborSeq::calculateMass("PEPTIDE");
+  double mass_2 = NeighborSeq::calculateMass("PPPPPPP");
+  double mass_3 = NeighborSeq::calculateMass("PEPEP");
+  // Compare the calculated masses with expected values
+  TEST_REAL_SIMILAR(mass_1, 799.815);
+  TEST_REAL_SIMILAR(mass_2, 697.814);
+  TEST_REAL_SIMILAR(mass_3, 567.579);
+}
+END_SECTION
+
+// Test section for the generateSpectrum function
+START_SECTION(MSSpectrum generateSpectrum(const String& peptide_sequence))
+{
+  MSSpectrum spec_1 = NeighborSeq::generateSpectrum("PEPT");
+  MSSpectrum spec_2 = NeighborSeq::generateSpectrum("PPP");
+  MSSpectrum spec_3 = NeighborSeq::generateSpectrum("AR");
+  
+  // Define the expected spectrum for "PEPT"
+  MSSpectrum spec_1_test;
+ 
+  Peak1D peak1;
+  peak1.setMZ(120.06552075);
+  peak1.setIntensity(1);
+  spec_1_test.push_back(peak1);
+
+  Peak1D peak2;
+  peak2.setMZ(217.11828498);
+  peak2.setIntensity(1);
+  spec_1_test.push_back(peak2);
+
+  Peak1D peak3;
+  peak3.setMZ(227.10263491);
+  peak3.setIntensity(1);
+  spec_1_test.push_back(peak3);
+
+  Peak1D peak4;
+  peak4.setMZ(324.15539914);
+  peak4.setIntensity(1);
+  spec_1_test.push_back(peak4);
+
+  Peak1D peak5;
+  peak5.setMZ(346.16087920);
+  peak5.setIntensity(1);
+  spec_1_test.push_back(peak5);
+
+// Define the expected spectrum for "PPP"
+  MSSpectrum spec_2_test;
+  Peak1D peak1;
+  peak1.setMZ(116.07060575);
+  peak1.setIntensity(1);
+  spec_2_test.push_back(peak1);
+
+  Peak1D peak2;
+  peak2.setMZ(195.11280491);
+  peak2.setIntensity(1);
+  spec_2_test.push_back(peak2);
+
+  Peak1D peak3;
+  peak3.setMZ(213.12336998);
+  peak3.setIntensity(1);
+  spec_3_test.push_back(peak3);
+
+// Define the expected spectrum for "AR"
+  MSSpectrum spec_3_test;
+  Peak1D peak1;
+  peak1.setMZ(175.11895291);
+  peak1.setIntensity(1);
+  spec_3_test.push_back(peak1);
+
+// Compare the generated spectra with the expected spectra
+  TEST_EQUAL(spec_1, spec_1_test);
+  TEST_EQUAL(spec_1, spec_2_test);
+  TEST_EQUAL(spec_1, spec_3_test);
+  TEST_EQUAL(spec_2, spec_1_test);
+  TEST_EQUAL(spec_2, spec_2_test);
+  TEST_EQUAL(spec_2, spec_3_test);
+  TEST_EQUAL(spec_3, spec_1_test);
+  TEST_EQUAL(spec_3, spec_2_test);
+  TEST_EQUAL(spec_3, spec_3_test);
+}
+END_SECTION
+
+// Test section for the compareSpectra function
+START_SECTION(bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const string& resolution))
+{
+  MSSpectrum spec1;
+  spec1.push_back(Peak1D(100.0, 1.0));
+  spec1.push_back(Peak1D(200.0, 1.0));
+  spec1.push_back(Peak1D(300.0, 1.0));
+  spec1.push_back(Peak1D(400.0, 1.0));
+
+
+  MSSpectrum spec2;
+  spec2.push_back(Peak1D(100.05, 1.0));
+  spec2.push_back(Peak1D(200.05, 1.0));
+  spec2.push_back(Peak1D(300.05, 1.0));
+  spec2.push_back(Peak1D(400.06, 1.0));
+
+
+  MSSpectrum spec3;
+  spec3.push_back(Peak1D(101.05, 1.0));
+  spec3.push_back(Peak1D(201.05, 1.0));
+  spec3.push_back(Peak1D(301.05, 1.0));
+  spec3.push_back(Peak1D(301.06, 1.0));
+
+
+  double ion_tolerance = 0.25;
+  
+  bool compare_1_2_low = NeighborSeq::compareSpectra(spec1, spec2, ion_tolerance, "low");
+  bool compare_1_3_low = NeighborSeq::compareSpectra(spec1, spec3, ion_tolerance, "low");
+  bool compare_2_3_low = NeighborSeq::compareSpectra(spec2, spec3, ion_tolerance, "low");
+
+  bool compare_1_2_high = NeighborSeq::compareSpectra(spec1, spec2, ion_tolerance, "high");
+  bool compare_1_3_high = NeighborSeq::compareSpectra(spec1, spec3, ion_tolerance, "high");
+  bool compare_2_3_high = NeighborSeq::compareSpectra(spec2, spec3, ion_tolerance, "high");
+
+// Verify the results of the comparisons
+  TEST_EQUAL(compare_1_2_low, true)
+  TEST_EQUAL(compare_1_3_low, false)
+  TEST_EQUAL(compare_1_3_low, false)
+  TEST_EQUAL(compare_1_2_high, true)
+  TEST_EQUAL(compare_1_3_high, false)
+  TEST_EQUAL(compare_2_3_high, true)
+}
+END_SECTION
+
+
+// Test section for the findNeighborPeptides_ function
+START_SECTION(vector<FASTAFile::FASTAEntry> findNeighborPeptides(const AASequence& peptides,
+                             const vector<FASTAFile::FASTAEntry>& neighbor_file,
+                             const double& mass_tolerance,
+                             const double& ion_tolerance,
+                             const String& resolution))
+{
+  AASequence peptide = AASequence::fromString("VGLPINQR")
+
+  vector<FASTAFile::FASTAEntry> neighbor_file;
+  neighbor_file.push_back(FASTAFile::FASTAEntry("protein1", "RIPLANGR"));
+  neighbor_file.push_back(FASTAFile::FASTAEntry("protein2", "PPPPPPPP"));
+  neighbor_file.push_back(FASTAFile::FASTAEntry("protein3", "VGLPINQR"));
+  neighbor_file.push_back(FASTAFile::FASTAEntry("protein4", "RIPLANGR"));
+
+  double mass_tolerance = 0.00004;
+  double ion_tolerance = 0.25;
+  String resolution = "low";
+
+  vector<FASTAFile::FASTAEntry> neighbors = findNeighborPeptides(peptide, neighbor_file, mass_tolerance, ion_tolerance, resolution);
+  vector<FASTAFile::FASTAEntry> neighbors_test;
+  neighbor_test.push_back(FASTAFile::FASTAEntry("RIPLANGR"));
+  neighbor_test.push_back(FASTAFile::FASTAEntry("VGLPINQRgit clone git@github.com:_YOURUSERNAME_/OpenMS.git"));
+  
+  // Verify the found neighbors match the expected neighbors
+  TEST_EQUAL(neighbor, neighbor_test);
+
+}
+END_SECTION
+
+END_TEST

From 25a840eaabbc1caa7387b35fe4477fc69ee5bb63 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Tue, 16 Jul 2024 20:26:38 +0200
Subject: [PATCH 2/6] test wird nachgereicht

---
 src/topp/DecoyDatabase.cpp | 322 ++++++++++++++++++++++---------------
 1 file changed, 193 insertions(+), 129 deletions(-)

diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp
index c67f7caf6d0..9cb3792f5b9 100644
--- a/src/topp/DecoyDatabase.cpp
+++ b/src/topp/DecoyDatabase.cpp
@@ -16,6 +16,13 @@
 #include <OpenMS/MATH/MathFunctions.h>
 #include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
 #include <regex>
+#include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
+#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
+#include <OpenMS/KERNEL/MSSpectrum.h>
+#include <cmath>
+#include <set>
+
+
 
 using namespace OpenMS;
 using namespace std;
@@ -54,6 +61,11 @@ and terminates the program if decoys are found.
 @verbinclude TOPP_DecoyDatabase.cli
 <B>INI file documentation of this tool:</B>
 @htmlinclude TOPP_DecoyDatabase.html
+
+
+The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file) that are
+similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities specifically related
+to the Neighbor Peptide search.
 */
 
 // We do not want this class to show up in the docu:
@@ -73,7 +85,7 @@ class TOPPDecoyDatabase :
   {
     registerInputFileList_("in", "<file(s)>", ListUtils::create<String>(""), "Input FASTA file(s), each containing a database. It is recommended to include a contaminant database as well.");
     setValidFormats_("in", ListUtils::create<String>("fasta"));
-    registerOutputFile_("out", "<file>", "", "Output FASTA file where the decoy database will be written to.");
+    registerOutputFile_("out", "<file>", "", "Output FASTA file where the decoy database or neighbor peptide will be written to.");
     setValidFormats_("out", ListUtils::create<String>("fasta"));
     registerStringOption_("decoy_string", "<string>", "DECOY_", "String that is combined with the accession of the protein identifier to indicate a decoy protein.", false);
     registerStringOption_("decoy_string_position", "<choice>", "prefix", "Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?", false);
@@ -96,6 +108,15 @@ class TOPPDecoyDatabase :
     setValidStrings_("enzyme", all_enzymes);
 
     registerSubsection_("Decoy", "Decoy parameters section");
+
+    // New options for neighbor peptide search
+    registerTOPPSubsection_("Neighbor_Search", "Parameters for neighbor peptide search");
+    registerInputFile_("Neighbor_Search:neighbor_in", "<file>","", "Input FASTA file for neighbor peptide search", false);
+    setValidFormats_("Neighbor_Search:neighbor_in", {"fasta"});
+    registerStringOption_("Neighbor_Search:resolution", "<choice>", "low", "Sets the resolution of the neighbor peptide search. Options are 'low'(0.05 Da) and 'high'(1.0005079 Da)",false);
+    setValidStrings_("Neighbor_Search:resolution", ListUtils::create<String>("low,high"));
+    registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00004, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
+    registerDoubleOption_("Neighbor_Search:ion_tolerance", "<double>", 0.25, "Tolerance of the proportion of b and y ions shared by the neighbor peptide 2*B12/B1+B2 > ti", false, true);
   }
 
   Param getSubsectionDefaults_(const String& /* name */) const override
@@ -111,6 +132,7 @@ class TOPPDecoyDatabase :
     if (as_prefix) return decoy_string + identifier;
     else return identifier + decoy_string;
   }
+  
 
   ExitCodes main_(int, const char**) override
   {
@@ -165,173 +187,215 @@ class TOPPDecoyDatabase :
     f.writeStart(out);
     FASTAFile::FASTAEntry entry;
 
-    // Configure Enzymatic digestion
-    // TODO: allow user-specified regex
-    ProteaseDigestion digestion;
-    String enzyme = getStringOption_("enzyme").trim();
-    if ((input_type == SeqType::protein) && !enzyme.empty())
-    {
-      digestion.setEnzyme(enzyme);
-    }
-    // check if decoy_string is common decoy string (e.g. decoy, rev, ...)
-    String decoy_string_lower = decoy_string;
-    decoy_string_lower.toLower();
-    bool is_common = false;
-    for (const auto& a : DecoyHelper::affixes)
+
+ // Neighbor peptide search   
+    String neighbor_file = getStringOption_("Neighbor_Search:neighbor_in");
+ // Check if the neighbor file option is provided
+    if (! neighbor_file.empty())
     {
-      if ((decoy_string_lower.hasPrefix(a) && decoy_string_position_prefix) || (decoy_string_lower.hasSuffix(a) && !decoy_string_position_prefix))
+      String resolution = getStringOption_("Neighbor_Search:resolution");
+      double ion_tolerance = getDoubleOption_("Neighbor_Search:ion_tolerance");
+      double mass_tolerance = getDoubleOption_("Neighbor_Search:mass_tolerance");
+ // Create a ProteaseDigestion object for peptide digestion
+      ProteaseDigestion digestion;
+      String enzyme = getStringOption_("enzyme").trim();
+      if ((input_type == SeqType::protein) && ! enzyme.empty()) 
       {
-        is_common = true;
+          digestion.setEnzyme(enzyme); 
       }
-    }
-    // terminate, if decoy_string is not one of the allowed decoy strings (exit code 11)
-    if (!is_common)
-    {
-      if (getFlag_("force"))
+      
+ // Vector to hold all digested peptides from the input files
+      vector<AASequence> all_peptides;
+
+      for (Size i = 0; i < in.size(); ++i)
       {
-        OPENMS_LOG_WARN << "Force Flag is enabled, decoys with custom decoy string (not in DecoyHelper::affixes) will not be detected.\n";
+        //FASTAContainer<TFI_File> in_entries {in[i]};
+        f.readStart(in[i]);
+        while (f.readNext(entry))
+        {
+        
+          // Vector to hold the peptides generated from digestion
+          vector<AASequence> peptides;
+          // Digest the current entry sequence into peptides
+          digestion.digest(AASequence::fromString(entry.sequence), peptides);
+          // Add the generated peptides to the all_peptides vector
+          all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
+        }
       }
-      else
-      {
-        OPENMS_LOG_FATAL_ERROR << "Given decoy string is not allowed. Please use one of the strings in DecoyHelper::affixes as either prefix or suffix (case insensitive): \n";
-        return INCOMPATIBLE_INPUT_DATA;
+ // Load the neighbor peptides from the neighbor file
+      FASTAFile neighbor_fasta;
+      vector<FASTAFile::FASTAEntry> neighbor_entries;
+      neighbor_fasta.load(neighbor_file, neighbor_entries);
+      for (auto i = all_peptides.begin(); i != all_peptides.end(); ++i)
+      { 
+          {
+          // Find neighbor peptides for the current peptide
+            vector<FASTAFile::FASTAEntry> result = NeighborSeq::findNeighborPeptides(*i, neighbor_entries, mass_tolerance, ion_tolerance, resolution);
+            for (const auto& sub : result)
+            {
+            // Write the neighbor peptide to the output file
+                f.writeNext(sub);
+            }
+          }
       }
     }
-    MRMDecoy m;
-    m.setParameters(decoy_param);
-
-    Math::RandomShuffler shuffler(seed);
-    for (Size i = 0; i < in.size(); ++i)
+    else
     {
-      // check input files for decoys
-      FASTAContainer<TFI_File> in_entries{in[i]};
-      auto r = DecoyHelper::countDecoys(in_entries);
-      // if decoys found, throw exception
-      if (static_cast<double>(r.all_prefix_occur + r.all_suffix_occur) >= 0.4 * static_cast<double>(r.all_proteins_count))
+      // Configure Enzymatic digestion
+      // TODO: allow user-specified regex
+      ProteaseDigestion digestion;
+      String enzyme = getStringOption_("enzyme").trim();
+      if ((input_type == SeqType::protein) && ! enzyme.empty()) { digestion.setEnzyme(enzyme); }
+      // check if decoy_string is common decoy string (e.g. decoy, rev, ...)
+      String decoy_string_lower = decoy_string;
+      decoy_string_lower.toLower();
+      bool is_common = false;
+      for (const auto& a : DecoyHelper::affixes)
       {
-        // if decoys found, program terminates with exit code 11
-        OPENMS_LOG_FATAL_ERROR << "Invalid input in " + in[i] + ": Input file already contains decoys." << '\n';
-        return INCOMPATIBLE_INPUT_DATA;
+        if ((decoy_string_lower.hasPrefix(a) && decoy_string_position_prefix) || (decoy_string_lower.hasSuffix(a) && ! decoy_string_position_prefix))
+        {
+          is_common = true;
+        }
       }
-
-      f.readStart(in[i]);
-      //-------------------------------------------------------------
-      // calculations
-      //-------------------------------------------------------------
-      while (f.readNext(entry))
+      // terminate, if decoy_string is not one of the allowed decoy strings (exit code 11)
+      if (! is_common)
       {
-        if (identifiers.find(entry.identifier) != identifiers.end())
+        if (getFlag_("force"))
         {
-          OPENMS_LOG_WARN << "DecoyDatabase: Warning, identifier '" << entry.identifier << "' occurs more than once!" << endl;
+          OPENMS_LOG_WARN << "Force Flag is enabled, decoys with custom decoy string (not in DecoyHelper::affixes) will not be detected.\n";
         }
-        identifiers.insert(entry.identifier);
-
-        if (append)
+        else
         {
-          f.writeNext(entry);
+          OPENMS_LOG_FATAL_ERROR << "Given decoy string is not allowed. Please use one of the strings in DecoyHelper::affixes as either prefix or "
+                                    "suffix (case insensitive): \n";
+          return INCOMPATIBLE_INPUT_DATA;
         }
+      }
+      MRMDecoy m;
+      m.setParameters(decoy_param);
 
-        // identifier
-        entry.identifier = getIdentifier_(entry.identifier, decoy_string, decoy_string_position_prefix);
+      Math::RandomShuffler shuffler(seed);
+      for (Size i = 0; i < in.size(); ++i)
+      {
+        // check input files for decoys
+        FASTAContainer<TFI_File> in_entries {in[i]};
+        auto r = DecoyHelper::countDecoys(in_entries);
+        // if decoys found, throw exception
+        if (static_cast<double>(r.all_prefix_occur + r.all_suffix_occur) >= 0.4 * static_cast<double>(r.all_proteins_count))
+        {
+          // if decoys found, program terminates with exit code 11
+          OPENMS_LOG_FATAL_ERROR << "Invalid input in " + in[i] + ": Input file already contains decoys." << '\n';
+          return INCOMPATIBLE_INPUT_DATA;
+        }
 
-        // sequence
-        if (input_type == SeqType::RNA)
+        f.readStart(in[i]);
+        //-------------------------------------------------------------
+        // calculations
+        //-------------------------------------------------------------
+        while (f.readNext(entry))
         {
-          string quick_seq = entry.sequence;
-          bool five_p = (entry.sequence.front() == 'p');
-          bool three_p = (entry.sequence.back() == 'p');
-          if (five_p) //we don't want to reverse terminal phosphates
+          if (identifiers.find(entry.identifier) != identifiers.end())
           {
-            quick_seq.erase(0, 1);
-          }
-          if (three_p)
-          {
-            quick_seq.pop_back();
+            OPENMS_LOG_WARN << "DecoyDatabase: Warning, identifier '" << entry.identifier << "' occurs more than once!" << endl;
           }
+          identifiers.insert(entry.identifier);
+
+          if (append) { f.writeNext(entry); }
 
-          vector<String> tokenized;
-          std::smatch m;
-          std::string pattern = R"([^\[]|(\[[^\[\]]*\]))";
-          std::regex re(pattern);
+          // identifier
+          entry.identifier = getIdentifier_(entry.identifier, decoy_string, decoy_string_position_prefix);
 
-          while (std::regex_search(quick_seq, m, re))
+          // sequence
+          if (input_type == SeqType::RNA)
           {
+            string quick_seq = entry.sequence;
+            bool five_p = (entry.sequence.front() == 'p');
+            bool three_p = (entry.sequence.back() == 'p');
+            if (five_p) // we don't want to reverse terminal phosphates
+            {
+              quick_seq.erase(0, 1);
+            }
+            if (three_p) { quick_seq.pop_back(); }
+
+            vector<String> tokenized;
+            std::smatch m;
+            std::string pattern = R"([^\[]|(\[[^\[\]]*\]))";
+            std::regex re(pattern);
+
+            while (std::regex_search(quick_seq, m, re))
+            {
               tokenized.emplace_back(m.str(0));
               quick_seq = m.suffix();
-          }
+            }
 
-          if (shuffle)
-          {
-            shuffler.portable_random_shuffle(tokenized.begin(), tokenized.end());
-          }
-          else  // reverse
-          {
-            reverse(tokenized.begin(), tokenized.end()); //reverse the tokens
-          }
-          if (five_p)  //add back 5'
-          {
-            tokenized.insert(tokenized.begin(), String("p"));
-          }
-          if (three_p) //add back 3'
-          {
-            tokenized.emplace_back("p");
+            if (shuffle) { shuffler.portable_random_shuffle(tokenized.begin(), tokenized.end()); }
+            else // reverse
+            {
+              reverse(tokenized.begin(), tokenized.end()); // reverse the tokens
+            }
+            if (five_p) // add back 5'
+            {
+              tokenized.insert(tokenized.begin(), String("p"));
+            }
+            if (three_p) // add back 3'
+            {
+              tokenized.emplace_back("p");
+            }
+            entry.sequence = ListUtils::concatenate(tokenized, "");
           }
-          entry.sequence = ListUtils::concatenate(tokenized, "");
-        }
-        else // protein input
-        {
-          // if (terminal_aminos != "none")
-          if (enzyme != "no cleavage" && (keepN || keepC))
+          else // protein input
           {
-            std::vector<AASequence> peptides;
-            digestion.digest(AASequence::fromString(entry.sequence), peptides);
-            String new_sequence = "";
-            for (auto const& peptide : peptides)
+            // if (terminal_aminos != "none")
+            if (enzyme != "no cleavage" && (keepN || keepC))
             {
-              //TODO why are the functions from TargetedExperiment and MRMDecoy not anywhere more general?
-              // No soul would look there.
+              std::vector<AASequence> peptides;
+              digestion.digest(AASequence::fromString(entry.sequence), peptides);
+              String new_sequence = "";
+              for (auto const& peptide : peptides)
+              {
+                // TODO why are the functions from TargetedExperiment and MRMDecoy not anywhere more general?
+                //  No soul would look there.
+                if (shuffle)
+                {
+                  OpenMS::TargetedExperiment::Peptide p;
+                  p.sequence = peptide.toString();
+                  OpenMS::TargetedExperiment::Peptide decoy_p = m.shufflePeptide(p, identity_threshold, seed, max_attempts);
+                  new_sequence += decoy_p.sequence;
+                }
+                else
+                {
+                  OpenMS::TargetedExperiment::Peptide p;
+                  p.sequence = peptide.toString();
+                  OpenMS::TargetedExperiment::Peptide decoy_p = MRMDecoy::reversePeptide(p, keepN, keepC, keep_const_pattern);
+                  new_sequence += decoy_p.sequence;
+                }
+              }
+              entry.sequence = new_sequence;
+            }
+            else
+            {
+              // sequence
               if (shuffle)
               {
-                OpenMS::TargetedExperiment::Peptide p;
-                p.sequence = peptide.toString();
-                OpenMS::TargetedExperiment::Peptide decoy_p = m.shufflePeptide(p, identity_threshold, seed, max_attempts);
-                new_sequence += decoy_p.sequence;
+                shuffler.seed(seed); // identical proteins are shuffled the same way -> re-seed
+                shuffler.portable_random_shuffle(entry.sequence.begin(), entry.sequence.end());
               }
-              else
+              else // reverse
               {
-                OpenMS::TargetedExperiment::Peptide p;
-                p.sequence = peptide.toString();
-                OpenMS::TargetedExperiment::Peptide decoy_p = MRMDecoy::reversePeptide(p, keepN, keepC, keep_const_pattern);
-                new_sequence += decoy_p.sequence;
+                entry.sequence.reverse();
               }
             }
-            entry.sequence = new_sequence;
-          }
-          else
-          {
-            // sequence
-            if (shuffle)
-            {
-              shuffler.seed(seed); // identical proteins are shuffled the same way -> re-seed
-              shuffler.portable_random_shuffle(entry.sequence.begin(), entry.sequence.end());
-            }
-            else // reverse
-            {
-              entry.sequence.reverse();
-            }
           }
-        }
-
-        //-------------------------------------------------------------
-        // writing output
-        //-------------------------------------------------------------
-        f.writeNext(entry);
-      } // next protein
-    } // input files
 
+          //-------------------------------------------------------------
+          // writing output
+          //-------------------------------------------------------------
+          f.writeNext(entry);
+        } // next protein
+      }   // input files
+    }
     return EXECUTION_OK;
   }
-
 };
 
 

From f4ffd32e7f10270d661673a6b90d0d7104b84501 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Tue, 23 Jul 2024 02:12:30 +0200
Subject: [PATCH 3/6] Decoy Database Zeile 239-248 wird noch in ein eigene
 Funktion geschrieben

---
 .../include/OpenMS/ANALYSIS/ID/NeighborSeq.h  |  53 +++----
 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp | 146 ++++++++----------
 src/topp/DecoyDatabase.cpp                    | 102 +++++++-----
 3 files changed, 145 insertions(+), 156 deletions(-)

diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
index 9db2d319893..9eaf1ae3a73 100644
--- a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
@@ -2,27 +2,17 @@
 // SPDX-License-Identifier: BSD-3-Clause
 //
 // --------------------------------------------------------------------------
-// $Maintainer: Sven Nahnsen $
-// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $
+// $Maintainer: Chris Bielow, Philipp Wang $
+// $Authors: Philipp Wang $
 // --------------------------------------------------------------------------
 
-// #define NEIGHBORSEQ_H
 #pragma once
-#include <OpenMS/ANALYSIS/OPENSWATH/MRMDecoy.h>
-#include <OpenMS/APPLICATIONS/TOPPBase.h>
-#include <OpenMS/CHEMISTRY/DigestionEnzyme.h>
-#include <OpenMS/CHEMISTRY/ProteaseDB.h>
-#include <OpenMS/CHEMISTRY/ProteaseDigestion.h>
-#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
-#include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
+
 #include <OpenMS/FORMAT/FASTAFile.h>
+#include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
+#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
 #include <OpenMS/KERNEL/MSSpectrum.h>
-#include <OpenMS/MATH/MathFunctions.h>
-#include <OpenMS/METADATA/ProteinIdentification.h>
 #include <cmath>
-#include <regex>
-#include <set>
-
 using namespace OpenMS;
 
 /**
@@ -37,12 +27,6 @@ class OPENMS_DLLAPI NeighborSeq
 {
 
 public:
-  /**
-   * @brief Calculates the monoisotopic mass of a peptide.
-   * @param peptide The peptide sequence.
-   * @return The monoisotopic mass of the peptide.
-   */
-  static double calculateMass(const AASequence& peptide);
 
   /**
    * @brief Generates a theoretical spectrum for a given peptide sequence.
@@ -55,24 +39,27 @@ class OPENMS_DLLAPI NeighborSeq
    * @brief Compares two spectra to determine if they share a sufficient number of ions.
    * @param spec1 The first spectrum.
    * @param spec2 The second spectrum.
-   * @param ion_tolerance The tolerance for the proportion of shared ions.
-   * @param resolution The resolution setting for the comparison ("high" or "low").
+   * @param min_shared_ion_fraction The tolerance for the proportion of shared ions.
+   * @param mz_bin_size The mz_bin_size setting for the comparison.
    * @return True if the spectra share a sufficient number of ions, false otherwise.
    */
-  static bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const std::string& resolution);
+  static bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size);
+
+
+  static std::map<double, std::vector<int>> NeighborSeq::createMassPositonMap(const std::vector<AASequence>& candidates);
 
   /**
    * @brief Finds neighbor peptides that are similar to a given peptide.
-   * @param peptides The peptide sequence to find neighbors for.
-   * @param neighbor_file The list of neighbor sequences to search in.
+   * @param peptide The peptide sequence to find neighbors for.
+   * @param neighbor_candidate The list of neighbor sequences to search in.
    * @param mass_tolerance The mass tolerance for neighbor peptides.
-   * @param ion_tolerance The ion tolerance for neighbor peptides.
-   * @param resolution The resolution setting for the comparison ("high" or "low").
+   * @param min_shared_ion_fraction The ion tolerance for neighbor peptides.
+   * @param mz_bin_size The mz_bin_size setting for the comparison is default 0.05 (the original study suggests �high� (0.05 Da) and �low� (1.0005079 Da) mz_bin_size).
    * @return A vector of FASTA entries that are considered neighbor peptides.
    */
-  static std::vector<FASTAFile::FASTAEntry> findNeighborPeptides(const AASequence& peptides,
-                                                                 const std::vector<FASTAFile::FASTAEntry>& neighbor_file,
-                                                                 const double& mass_tolerance,
-                                                                 const double& ion_tolerance,
-                                                                 const String& resolution);
+  static std::vector<int> findNeighborPeptides(const AASequence& peptides,
+                                                                 const std::vector<AASequence>& neighbor_candidate,
+                                                                 const std::vector<int>& candidate_position,
+                                                                 const double& min_shared_ion_fraction,
+                                                                 const double& mz_bin_size);
 };
\ No newline at end of file
diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
index ff6a2ab8e3b..0c16e04c442 100644
--- a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
+++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
@@ -7,31 +7,17 @@
 // --------------------------------------------------------------------------
 
 #include <OpenMS/FORMAT/FASTAFile.h>
-#include <OpenMS/METADATA/ProteinIdentification.h>
-#include <OpenMS/CHEMISTRY/ProteaseDB.h>
-#include <OpenMS/CHEMISTRY/ProteaseDigestion.h>
-#include <OpenMS/ANALYSIS/OPENSWATH/MRMDecoy.h>
-#include <OpenMS/CHEMISTRY/DigestionEnzyme.h>
-#include <OpenMS/APPLICATIONS/TOPPBase.h>
-#include <OpenMS/MATH/MathFunctions.h>
-#include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
-#include <regex>
 #include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
 #include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
 #include <OpenMS/KERNEL/MSSpectrum.h>
 #include <cmath>
-#include <set>
+
 
 
 using namespace OpenMS;
 using namespace std;
 
 
-// Function to calculate the mass of a peptide
-double NeighborSeq::calculateMass(const AASequence& peptide)
-{
-return peptide.getMonoWeight();
-}
 
 
 // Function to generate the theoretical spectrum for a given peptide sequence
@@ -51,87 +37,85 @@ return spectrum;
 }
 
 // Function to compare two spectra and determine if they are similar
-bool NeighborSeq::compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const string& resolution)
+bool NeighborSeq::compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size)
 {
-// Determine the bin size based on resolution
-double bin_size;
-if (resolution == "high") { bin_size = 1.0005079; }
-else { bin_size = 0.05; }
-
-// Maps to hold the binned spectra
-map<int, int> bins1, bins2;
+  std::map<int, int> bins1, bins2;
 
-// Lambda function to bin a spectrum
-auto bin_spectrum = [&](const MSSpectrum& spec, map<int, int>& bins) {
+  // Lambda function to bin a spectrum into a map
+  auto bin_spectrum = [&](const MSSpectrum& spec, std::map<int, int>& bins) {
     for (const auto& peak : spec)
-    {
-    int bin = static_cast<int>(peak.getMZ() / bin_size);
-    bins[bin]++;
-    }
-};
+      bins[static_cast<int>(peak.getMZ() / mz_bin_size)]++;
+  };
 
-// Bin both spectra
-bin_spectrum(spec1, bins1);
-bin_spectrum(spec2, bins2);
+  // Bin both spectra
+  bin_spectrum(spec1, bins1);
+  bin_spectrum(spec2, bins2);
 
-// Calculate the size of the spectra and the number of shared bins
-int B1 = spec1.size();
-int B2 = spec2.size();
-int B12 = 0;
+  // Extract bin keys as vectors
+  std::vector<int> vec_bins1, vec_bins2;
+  for (const auto& bin : bins1)
+    vec_bins1.push_back(bin.first);
+  for (const auto& bin : bins2)
+    vec_bins2.push_back(bin.first);
 
-for (const auto& bin : bins1)
-{
-    if (bins2.find(bin.first) != bins2.end()) { B12 += min(bin.second, bins2[bin.first]); }
+  // Calculate the intersection of the bin vectors
+  std::vector<int> intersection;
+  std::set_intersection(vec_bins1.begin(), vec_bins1.end(), vec_bins2.begin(), vec_bins2.end(), std::back_inserter(intersection));
+
+  // Calculate the number of shared bins considering the bin frequencies
+  int B12 = 0;
+  for (int bin : intersection)
+    B12 += std::min(bins1[bin], bins2[bin]);
+
+  // Calculate the fraction of shared bins
+  double fraction_shared = static_cast<double>(2 * B12) / (spec1.size() + spec2.size());
+
+
+  return fraction_shared > min_shared_ion_fraction;
 }
 
-// Calculate the fraction of shared bins
-double fraction_shared = static_cast<double>(2 * B12) / (B1 + B2);
-// cout << fraction_shared << endl;
-return fraction_shared > ion_tolerance;
+map<double, vector<int>> NeighborSeq::createMassPositonMap(const vector<AASequence>& candidates)
+{
+  map<double, vector<int>> mass_position_map;
+
+  //Iterate through the vector of AASequence objects 
+  for (size_t i = 0; i < candidates.size(); ++i)
+  {
+    // Calculate the mono-isotopic mass of the sequence
+    double mass = candidates[i].getMonoWeight();
+    // Insert the mass and the position into the map
+    mass_position_map[mass].push_back(i);
+  }
+  return mass_position_map;
 }
 
 // Method to find neighbor peptides in a given FASTA file
-vector<FASTAFile::FASTAEntry> NeighborSeq::findNeighborPeptides(const AASequence& peptides,
-                                                                const vector<FASTAFile::FASTAEntry>& neighbor_file,
-                                                                const double& mass_tolerance,
-                                                                const double& ion_tolerance,
-                                                                const String& resolution)
+vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptides,
+                                                                const vector<AASequence>& neighbor_candidate,
+                                                                const vector<int>& candidate_position,
+                                                                const double& min_shared_ion_fraction,
+                                                                const double& mz_bin_size)
 
 {
-vector<FASTAFile::FASTAEntry> result_entries;
-
-// Calculate the mass and generate the spectrum for the input peptide
-size_t size = peptides.size();
-double mass = calculateMass(peptides);
-String peptide_sequence = peptides.toString();
-MSSpectrum spec = generateSpectrum(peptide_sequence);
-// Iterate through the neighbor file entries
-for (const auto& entry : neighbor_file)
-{
-    for (size_t offset = 0; offset + size <= entry.sequence.size(); ++offset)
-    {
-    String neighbor_sequence = entry.sequence.substr(offset, size);
-    // Check if the sequence contains an 'X'
-    if (neighbor_sequence.find('X') == String::npos)
+    vector<int> result_entries;
+    String rel_sequence = peptides.toString();
+    MSSpectrum spec = generateSpectrum(rel_sequence);
+    for (size_t i = 0; i < candidate_position.size(); i++)
     {
-        double neighbor_mass = calculateMass(AASequence::fromString(neighbor_sequence));
-        double mass_diff = std::abs(mass - neighbor_mass) / ((mass + neighbor_mass) / 2);
-        // Check if the mass difference is within the tolerance
-        if (mass_diff <= mass_tolerance)
-        {
-        // cout << mass_diff << endl;
-        MSSpectrum neighbor_spec = generateSpectrum(neighbor_sequence);
-        // Compare the spectra and add to results if they are similar
-        if (compareSpectra(spec, neighbor_spec, ion_tolerance, resolution))
-        {
-            // Construct FASTAFile::FASTAEntry and add to results
-            FASTAFile::FASTAEntry neighbor_entry(entry.identifier, entry.description, neighbor_sequence);
-            result_entries.push_back(neighbor_entry);
-        }
-        }
+          String neighbor_sequence = neighbor_candidate[candidate_position[i]].toString();
+          //cout << neighbor_sequence << endl;
+          // Check if the sequence contains an 'X'
+              if (neighbor_sequence.find('X') == String::npos)
+              {     
+                  MSSpectrum neighbor_spec = generateSpectrum(neighbor_sequence);
+
+                  // Compare the spectra and add to results if they are similar
+                      if (compareSpectra(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size))
+                      {      
+                          result_entries.push_back(candidate_position[i]);
+                      }
+               }
     }
-    }
-}
 return result_entries;
 }
 
diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp
index 9cb3792f5b9..7be71111cdd 100644
--- a/src/topp/DecoyDatabase.cpp
+++ b/src/topp/DecoyDatabase.cpp
@@ -17,10 +17,7 @@
 #include <OpenMS/DATASTRUCTURES/FASTAContainer.h>
 #include <regex>
 #include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
-#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
-#include <OpenMS/KERNEL/MSSpectrum.h>
-#include <cmath>
-#include <set>
+
 
 
 
@@ -65,7 +62,8 @@ and terminates the program if decoys are found.
 
 The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file) that are
 similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities specifically related
-to the Neighbor Peptide search.
+to the Neighbor Peptide search. 
+NeighborSeq class has more details look at *openms\source\ANALYSIS\ID\NeighborSeq.cpp
 */
 
 // We do not want this class to show up in the docu:
@@ -85,7 +83,8 @@ class TOPPDecoyDatabase :
   {
     registerInputFileList_("in", "<file(s)>", ListUtils::create<String>(""), "Input FASTA file(s), each containing a database. It is recommended to include a contaminant database as well.");
     setValidFormats_("in", ListUtils::create<String>("fasta"));
-    registerOutputFile_("out", "<file>", "", "Output FASTA file where the decoy database or neighbor peptide will be written to.");
+    registerOutputFile_("out", "<file>", "", "Output FASTA file where the decoy database will be written to.");
+    registerOutputFile_("out_neighbor", "<file>", "", "Output FASTA file where the neighbor peptide will be written to.");
     setValidFormats_("out", ListUtils::create<String>("fasta"));
     registerStringOption_("decoy_string", "<string>", "DECOY_", "String that is combined with the accession of the protein identifier to indicate a decoy protein.", false);
     registerStringOption_("decoy_string_position", "<choice>", "prefix", "Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?", false);
@@ -111,12 +110,12 @@ class TOPPDecoyDatabase :
 
     // New options for neighbor peptide search
     registerTOPPSubsection_("Neighbor_Search", "Parameters for neighbor peptide search");
-    registerInputFile_("Neighbor_Search:neighbor_in", "<file>","", "Input FASTA file for neighbor peptide search", false);
+    registerInputFile_("Neighbor_Search:neighbor_in", "<file>","", "neighbor peptides are searched for in the Fasta file entered for candiate ", false);
     setValidFormats_("Neighbor_Search:neighbor_in", {"fasta"});
-    registerStringOption_("Neighbor_Search:resolution", "<choice>", "low", "Sets the resolution of the neighbor peptide search. Options are 'low'(0.05 Da) and 'high'(1.0005079 Da)",false);
-    setValidStrings_("Neighbor_Search:resolution", ListUtils::create<String>("low,high"));
-    registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00004, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
-    registerDoubleOption_("Neighbor_Search:ion_tolerance", "<double>", 0.25, "Tolerance of the proportion of b and y ions shared by the neighbor peptide 2*B12/B1+B2 > ti", false, true);
+    registerIntOption_("Neighbor_Search:missed_cleavages", "<int>", 0,"Number of missed cleavages",false);
+    registerDoubleOption_("Neighbor_Search:mz_bin_size", "<num>", 0.05,"Sets the mz_bin_size of the neighbor peptide search.(the original study suggests high (0.05 Da) and low (1.0005079 Da) mz_bin_size)", false);
+    registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00001, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
+    registerDoubleOption_("Neighbor_Search:min_shared_ion_fraction", "<double>", 0.25, "Tolerance of the proportion of b and y ions shared by the neighbor peptide 2*B12/B1+B2 > ti", false, true);
   }
 
   Param getSubsectionDefaults_(const String& /* name */) const override
@@ -187,33 +186,34 @@ class TOPPDecoyDatabase :
     f.writeStart(out);
     FASTAFile::FASTAEntry entry;
 
-
- // Neighbor peptide search   
+    // Create a ProteaseDigestion object for peptide digestion
+    ProteaseDigestion digestion;
+    String enzyme = getStringOption_("enzyme").trim();
+    if ((input_type == SeqType::protein) && ! enzyme.empty()) { digestion.setEnzyme(enzyme); }
+    
+    
+ // create neighbor peptides for the relevant peptides given in the FASTA from '-in'
     String neighbor_file = getStringOption_("Neighbor_Search:neighbor_in");
  // Check if the neighbor file option is provided
     if (! neighbor_file.empty())
     {
-      String resolution = getStringOption_("Neighbor_Search:resolution");
-      double ion_tolerance = getDoubleOption_("Neighbor_Search:ion_tolerance");
+      String out_neighbor = getStringOption_("out_neighbor");
+      FASTAFile neig;
+      neig.writeStart(out_neighbor);
+      double mz_bin_size = getDoubleOption_("Neighbor_Search:mz_bin_size");
+      double min_shared_ion_fraction = getDoubleOption_("Neighbor_Search:min_shared_ion_fraction");
       double mass_tolerance = getDoubleOption_("Neighbor_Search:mass_tolerance");
- // Create a ProteaseDigestion object for peptide digestion
-      ProteaseDigestion digestion;
-      String enzyme = getStringOption_("enzyme").trim();
-      if ((input_type == SeqType::protein) && ! enzyme.empty()) 
-      {
-          digestion.setEnzyme(enzyme); 
-      }
-      
+      int missed_cleavages = getIntOption_("Neighbor_Search:missed_cleavages");
+      digestion.setMissedCleavages(missed_cleavages);
  // Vector to hold all digested peptides from the input files
       vector<AASequence> all_peptides;
 
       for (Size i = 0; i < in.size(); ++i)
       {
         //FASTAContainer<TFI_File> in_entries {in[i]};
-        f.readStart(in[i]);
-        while (f.readNext(entry))
+        neig.readStart(in[i]);
+        while (neig.readNext(entry))
         {
-        
           // Vector to hold the peptides generated from digestion
           vector<AASequence> peptides;
           // Digest the current entry sequence into peptides
@@ -222,30 +222,48 @@ class TOPPDecoyDatabase :
           all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
         }
       }
- // Load the neighbor peptides from the neighbor file
+ // Load the neighbor peptides candidates from the neighbor file
       FASTAFile neighbor_fasta;
       vector<FASTAFile::FASTAEntry> neighbor_entries;
       neighbor_fasta.load(neighbor_file, neighbor_entries);
+      vector<AASequence> digested_candidate_peptides;
+      for (auto& entry : neighbor_entries)
+      {
+        digestion.digest(AASequence::fromString(entry.sequence), digested_candidate_peptides);
+      }
+
+      map<double, vector<int>> mass_position_map = NeighborSeq::createMassPositonMap(digested_candidate_peptides);
+      
       for (auto i = all_peptides.begin(); i != all_peptides.end(); ++i)
       { 
-          {
-          // Find neighbor peptides for the current peptide
-            vector<FASTAFile::FASTAEntry> result = NeighborSeq::findNeighborPeptides(*i, neighbor_entries, mass_tolerance, ion_tolerance, resolution);
-            for (const auto& sub : result)
-            {
-            // Write the neighbor peptide to the output file
-                f.writeNext(sub);
-            }
-          }
+        double lower_bound_key = i->getMonoWeight() - mass_tolerance;
+        double upper_bound_key = i->getMonoWeight() + mass_tolerance;
+        auto lower = mass_position_map.lower_bound(lower_bound_key); 
+        auto upper = mass_position_map.upper_bound(upper_bound_key);
+        vector<int> candidate_position;
+        for (auto it = lower; it != upper; ++it)
+        {
+         candidate_position.insert(candidate_position.end(), it->second.begin(), it->second.end());
+        }
+
+        // Find neighbor peptides for the current peptide
+        vector<int> result = NeighborSeq::findNeighborPeptides(*i, digested_candidate_peptides, candidate_position, min_shared_ion_fraction, mz_bin_size);
+        for (size_t j = 0; j < result.size(); ++j)
+        {
+          // Get the corresponding neighbor entry
+          const auto& neighbor_entry = neighbor_entries[result[j]];
+          // Create a new FASTAEntry with the neighbor peptide sequence
+          FASTAFile::FASTAEntry new_entry(entry.identifier, entry.description, digested_candidate_peptides[result[j]].toString());
+          // Write the neighbor peptide to the output file
+          neig.writeNext(new_entry);
+  
+        }    
       }
+      in.push_back(out_neighbor);
     }
-    else
-    {
+    
       // Configure Enzymatic digestion
       // TODO: allow user-specified regex
-      ProteaseDigestion digestion;
-      String enzyme = getStringOption_("enzyme").trim();
-      if ((input_type == SeqType::protein) && ! enzyme.empty()) { digestion.setEnzyme(enzyme); }
       // check if decoy_string is common decoy string (e.g. decoy, rev, ...)
       String decoy_string_lower = decoy_string;
       decoy_string_lower.toLower();
@@ -393,7 +411,7 @@ class TOPPDecoyDatabase :
           f.writeNext(entry);
         } // next protein
       }   // input files
-    }
+    
     return EXECUTION_OK;
   }
 };

From 15d985a60ff65198d4e6adb65e0117d90a468400 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Wed, 24 Jul 2024 10:11:34 +0200
Subject: [PATCH 4/6] =?UTF-8?q?Wie=20kann=20ich=20am=20besten=20in=20.test?=
 =?UTF-8?q?=20ein=20map=20pr=C3=BCfen=3F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../include/OpenMS/ANALYSIS/ID/NeighborSeq.h  | 108 ++++---
 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp |  65 +++-
 .../openms/source/NeighborSeq_test.cpp        | 292 ++++++++++--------
 src/topp/DecoyDatabase.cpp                    |  46 +--
 4 files changed, 301 insertions(+), 210 deletions(-)

diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
index 9eaf1ae3a73..7b6a4fb3845 100644
--- a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
@@ -8,58 +8,72 @@
 
 #pragma once
 
-#include <OpenMS/FORMAT/FASTAFile.h>
-#include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
-#include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
 #include <OpenMS/KERNEL/MSSpectrum.h>
-#include <cmath>
-using namespace OpenMS;
 
-/**
- * @brief The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file)
- * that are similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities
- * specifically related to the Neighbor Peptide search.
- *
- * The paper on subset neighbor search is www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/
- * PMID: 34236864 PMCID: PMC8489664 DOI: 10.1021/acs.jproteome.1c00483
- */
-class OPENMS_DLLAPI NeighborSeq
-{
 
-public:
 
-  /**
-   * @brief Generates a theoretical spectrum for a given peptide sequence.
-   * @param peptide_sequence The peptide sequence for which to generate the spectrum.
-   * @return The generated theoretical spectrum.
-   */
-  static MSSpectrum generateSpectrum(const String& peptide_sequence);
+namespace OpenMS
+{
+    /**
+     * @brief The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file)
+     * that are similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities
+     * specifically related to the Neighbor Peptide search.
+     *
+     * The paper on subset neighbor search is www.ncbi.nlm.nih.gov/pmc/articles/PMC8489664/
+     * PMID: 34236864 PMCID: PMC8489664 DOI: 10.1021/acs.jproteome.1c00483
+     */
+    class OPENMS_DLLAPI NeighborSeq
+    {
+
+    public:
+      /**
+       * @brief Generates a theoretical spectrum for a given peptide sequence.
+       * @param peptide_sequence The peptide sequence for which to generate the spectrum.
+       * @return The generated theoretical spectrum.
+       */
+      static MSSpectrum generateSpectrum(const String& peptide_sequence);
 
-  /**
-   * @brief Compares two spectra to determine if they share a sufficient number of ions.
-   * @param spec1 The first spectrum.
-   * @param spec2 The second spectrum.
-   * @param min_shared_ion_fraction The tolerance for the proportion of shared ions.
-   * @param mz_bin_size The mz_bin_size setting for the comparison.
-   * @return True if the spectra share a sufficient number of ions, false otherwise.
-   */
-  static bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size);
+      /**
+       * @brief Compares two spectra to determine if they share a sufficient number of ions.
+       * @param spec1 The first spectrum.
+       * @param spec2 The second spectrum.
+       * @param min_shared_ion_fraction The tolerance for the proportion of shared ions.
+       * @param mz_bin_size The mz_bin_size setting for the comparison.
+       * @return True if the spectra share a sufficient number of ions, false otherwise.
+       */
+      static bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size);
 
+      /**
+       * @brief Finds candidate positions based on a given mono-isotopic weight and mass tolerance.
+       * @param mass_position_map A map where the key is the mass and the value is a vector of positions.
+       * @param mono_weight The mono-isotopic weight to find candidates for.
+       * @param mass_tolerance The allowed tolerance for matching the mass.
+       * @return A vector of candidate positions within the specified mass tolerance range.
+       */
+      static std::vector<int> findCandidatePositions(const std::map<double, std::vector<int>>& mass_position_map, const double& mono_weight, const double& mass_tolerance);
 
-  static std::map<double, std::vector<int>> NeighborSeq::createMassPositonMap(const std::vector<AASequence>& candidates);
+      
+      /**
+       * @brief Creates a map of masses to positions from a vector of peptides. (not protein)
+       * @param candidates A vector of AASequence objects.
+       * @return A map where the key is the mass and the value is a vector of positions.
+       */
+      static std::map<double, std::vector<int>> NeighborSeq::createMassPositionMap(const std::vector<AASequence>& candidates);
 
-  /**
-   * @brief Finds neighbor peptides that are similar to a given peptide.
-   * @param peptide The peptide sequence to find neighbors for.
-   * @param neighbor_candidate The list of neighbor sequences to search in.
-   * @param mass_tolerance The mass tolerance for neighbor peptides.
-   * @param min_shared_ion_fraction The ion tolerance for neighbor peptides.
-   * @param mz_bin_size The mz_bin_size setting for the comparison is default 0.05 (the original study suggests �high� (0.05 Da) and �low� (1.0005079 Da) mz_bin_size).
-   * @return A vector of FASTA entries that are considered neighbor peptides.
-   */
-  static std::vector<int> findNeighborPeptides(const AASequence& peptides,
-                                                                 const std::vector<AASequence>& neighbor_candidate,
-                                                                 const std::vector<int>& candidate_position,
-                                                                 const double& min_shared_ion_fraction,
-                                                                 const double& mz_bin_size);
-};
\ No newline at end of file
+      /**
+       * @brief Finds neighbor peptides that are similar to a given peptide.
+       * @param peptide The peptide sequence (from a relevant protein) to find neighbors for.
+       * @param neighbor_candidate The list of neighbor sequences to search in.
+       * @param mass_tolerance The mass tolerance for neighbor peptides.
+       * @param min_shared_ion_fraction The ion tolerance for neighbor peptides.
+       * @param mz_bin_size The mz_bin_size setting for the comparison is default 0.05 (the original study suggests �high� (0.05 Da) and �low� (1.0005079
+       * Da) mz_bin_size).
+       * @return A vector of FASTA entries that are considered neighbor peptides.
+       */
+      static std::vector<int> findNeighborPeptides(const AASequence& peptides,
+                                                   const std::vector<AASequence>& neighbor_candidate,
+                                                   const std::vector<int>& candidate_position,
+                                                   const double& min_shared_ion_fraction,
+                                                   const double& mz_bin_size);
+    };
+}
diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
index 0c16e04c442..2df9207ac77 100644
--- a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
+++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
@@ -2,15 +2,15 @@
 // SPDX-License-Identifier: BSD-3-Clause
 //
 // --------------------------------------------------------------------------
-// $Maintainer: Sven Nahnsen $
-// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $
+// $Maintainer: Chris Bielow, Philipp Wang $
+// $Authors: Philipp Wang $
 // --------------------------------------------------------------------------
 
 #include <OpenMS/FORMAT/FASTAFile.h>
 #include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
 #include <OpenMS/CHEMISTRY/TheoreticalSpectrumGenerator.h>
 #include <OpenMS/KERNEL/MSSpectrum.h>
-#include <cmath>
+
 
 
 
@@ -70,19 +70,56 @@ bool NeighborSeq::compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec
   // Calculate the fraction of shared bins
   double fraction_shared = static_cast<double>(2 * B12) / (spec1.size() + spec2.size());
 
-
   return fraction_shared > min_shared_ion_fraction;
 }
 
-map<double, vector<int>> NeighborSeq::createMassPositonMap(const vector<AASequence>& candidates)
+//Finds candidate positions based on a given mono-isotopic weight and mass tolerance.
+vector<int> NeighborSeq::findCandidatePositions(const map<double, vector<int>>& mass_position_map,const double& mono_weight, const double& mass_tolerance)
 {
+  // Vector to store the candidate positions
+  vector<int> candidate_position;
+  // Calculate the lower and upper bounds for the mass tolerance range
+  double lower_bound_key = mono_weight;
+  double upper_bound_key = mono_weight;
+  if (mass_tolerance < 0) 
+  {
+     lower_bound_key += mass_tolerance;
+     upper_bound_key -= mass_tolerance;
+  }
+  else
+  {
+    lower_bound_key -= mass_tolerance;
+    upper_bound_key += mass_tolerance;
+  }
+ 
+  // Find the lower bound iterator in the map
+  auto lower = mass_position_map.lower_bound(lower_bound_key);
+
+  // Find the upper bound iterator in the map
+  auto upper = mass_position_map.upper_bound(upper_bound_key);
+
+  // Iterate through the map from the lower bound to the upper bound
+  for (auto it = lower; it != upper; ++it)
+  {
+    // Insert the positions from the current map entry into the candidate positions vector
+    candidate_position.insert(candidate_position.end(), it->second.begin(), it->second.end());
+  }
+  return candidate_position;
+}
+
+
+//Creates a map of masses to positions from a vector of peptides.(not protein)
+map<double, vector<int>> NeighborSeq::createMassPositionMap(const vector<AASequence>& candidates)
+{
+  // Map to store the mass and corresponding positions
   map<double, vector<int>> mass_position_map;
 
-  //Iterate through the vector of AASequence objects 
+  // Iterate through the vector of AASequence objects
   for (size_t i = 0; i < candidates.size(); ++i)
   {
     // Calculate the mono-isotopic mass of the sequence
     double mass = candidates[i].getMonoWeight();
+
     // Insert the mass and the position into the map
     mass_position_map[mass].push_back(i);
   }
@@ -90,20 +127,18 @@ map<double, vector<int>> NeighborSeq::createMassPositonMap(const vector<AASequen
 }
 
 // Method to find neighbor peptides in a given FASTA file
-vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptides,
-                                                                const vector<AASequence>& neighbor_candidate,
-                                                                const vector<int>& candidate_position,
+vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptide,
+                                                                const vector<AASequence>& neighbor_candidates,
+                                                                const vector<int>& candidates_position,
                                                                 const double& min_shared_ion_fraction,
                                                                 const double& mz_bin_size)
 
 {
     vector<int> result_entries;
-    String rel_sequence = peptides.toString();
-    MSSpectrum spec = generateSpectrum(rel_sequence);
-    for (size_t i = 0; i < candidate_position.size(); i++)
+    MSSpectrum spec = generateSpectrum(peptide.toString());
+    for (size_t i = 0; i < candidates_position.size(); i++)
     {
-          String neighbor_sequence = neighbor_candidate[candidate_position[i]].toString();
-          //cout << neighbor_sequence << endl;
+          String neighbor_sequence = neighbor_candidates[candidates_position[i]].toString();
           // Check if the sequence contains an 'X'
               if (neighbor_sequence.find('X') == String::npos)
               {     
@@ -112,7 +147,7 @@ vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptides,
                   // Compare the spectra and add to results if they are similar
                       if (compareSpectra(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size))
                       {      
-                          result_entries.push_back(candidate_position[i]);
+                          result_entries.push_back(candidates_position[i]);
                       }
                }
     }
diff --git a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
index a53a8ebe691..664dc71c13a 100644
--- a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
+++ b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
@@ -5,176 +5,212 @@
 // $Maintainer: Chris Bielow $
 // $Authors: Andreas Bertsch $
 // --------------------------------------------------------------------------
-
+#include <OpenMS/FORMAT/FASTAFile.h>
 #include <OpenMS/CONCEPT/ClassTest.h>
 #include <OpenMS/test_config.h>
 #include <OpenMS/ANALYSIS/ID/NeighborSeq.h>
+#include <vector>
+
 
 using namespace OpenMS;
 using namespace std;
 
 START_TEST(NeighborSeq, "$Id$")
 
-//NS()=delete;
-
+// NS()=delete;
 
-START_SECTION(double calculateMass(const AASequence& peptide))
-{
-  double mass_1 = NeighborSeq::calculateMass("PEPTIDE");
-  double mass_2 = NeighborSeq::calculateMass("PPPPPPP");
-  double mass_3 = NeighborSeq::calculateMass("PEPEP");
-  // Compare the calculated masses with expected values
-  TEST_REAL_SIMILAR(mass_1, 799.815);
-  TEST_REAL_SIMILAR(mass_2, 697.814);
-  TEST_REAL_SIMILAR(mass_3, 567.579);
-}
-END_SECTION
 
 // Test section for the generateSpectrum function
+// The spectra were generated via TOPPView and contained b-and y-ion
 START_SECTION(MSSpectrum generateSpectrum(const String& peptide_sequence))
 {
   MSSpectrum spec_1 = NeighborSeq::generateSpectrum("PEPT");
-  MSSpectrum spec_2 = NeighborSeq::generateSpectrum("PPP");
-  MSSpectrum spec_3 = NeighborSeq::generateSpectrum("AR");
+  MSSpectrum spec_2 = NeighborSeq::generateSpectrum("AR");
+  MSSpectrum spec_3 = NeighborSeq::generateSpectrum("VGLPINQR");
+
+  // Test "PEPT"
+  TEST_REAL_SIMILAR(spec_1[0].getPosition()[0], 120.0655);
+  TEST_REAL_SIMILAR(spec_1[1].getPosition()[0], 217.1182);
+  TEST_REAL_SIMILAR(spec_1[2].getPosition()[0], 227.1026);
+  TEST_REAL_SIMILAR(spec_1[3].getPosition()[0], 324.1553);
+  TEST_REAL_SIMILAR(spec_1[4].getPosition()[0], 346.1608);
   
-  // Define the expected spectrum for "PEPT"
-  MSSpectrum spec_1_test;
- 
-  Peak1D peak1;
-  peak1.setMZ(120.06552075);
-  peak1.setIntensity(1);
-  spec_1_test.push_back(peak1);
-
-  Peak1D peak2;
-  peak2.setMZ(217.11828498);
-  peak2.setIntensity(1);
-  spec_1_test.push_back(peak2);
-
-  Peak1D peak3;
-  peak3.setMZ(227.10263491);
-  peak3.setIntensity(1);
-  spec_1_test.push_back(peak3);
-
-  Peak1D peak4;
-  peak4.setMZ(324.15539914);
-  peak4.setIntensity(1);
-  spec_1_test.push_back(peak4);
-
-  Peak1D peak5;
-  peak5.setMZ(346.16087920);
-  peak5.setIntensity(1);
-  spec_1_test.push_back(peak5);
-
-// Define the expected spectrum for "PPP"
-  MSSpectrum spec_2_test;
-  Peak1D peak1;
-  peak1.setMZ(116.07060575);
-  peak1.setIntensity(1);
-  spec_2_test.push_back(peak1);
-
-  Peak1D peak2;
-  peak2.setMZ(195.11280491);
-  peak2.setIntensity(1);
-  spec_2_test.push_back(peak2);
-
-  Peak1D peak3;
-  peak3.setMZ(213.12336998);
-  peak3.setIntensity(1);
-  spec_3_test.push_back(peak3);
-
-// Define the expected spectrum for "AR"
-  MSSpectrum spec_3_test;
-  Peak1D peak1;
-  peak1.setMZ(175.11895291);
-  peak1.setIntensity(1);
-  spec_3_test.push_back(peak1);
-
-// Compare the generated spectra with the expected spectra
-  TEST_EQUAL(spec_1, spec_1_test);
-  TEST_EQUAL(spec_1, spec_2_test);
-  TEST_EQUAL(spec_1, spec_3_test);
-  TEST_EQUAL(spec_2, spec_1_test);
-  TEST_EQUAL(spec_2, spec_2_test);
-  TEST_EQUAL(spec_2, spec_3_test);
-  TEST_EQUAL(spec_3, spec_1_test);
-  TEST_EQUAL(spec_3, spec_2_test);
-  TEST_EQUAL(spec_3, spec_3_test);
+
+  // Test "AR"
+  TEST_REAL_SIMILAR(spec_2[0].getPosition()[0], 175.1189);
+
+  // Test "VGLPINQR"
+  TEST_REAL_SIMILAR(spec_3[0].getPosition()[0], 157.0971);
+  TEST_REAL_SIMILAR(spec_3[1].getPosition()[0], 175.1189);
+  TEST_REAL_SIMILAR(spec_3[2].getPosition()[0], 270.1812);
+  TEST_REAL_SIMILAR(spec_3[3].getPosition()[0], 303.1775);
+  TEST_REAL_SIMILAR(spec_3[4].getPosition()[0], 367.2339);
+  TEST_REAL_SIMILAR(spec_3[5].getPosition()[0], 417.2204);
+  TEST_REAL_SIMILAR(spec_3[6].getPosition()[0], 480.3180);
+  TEST_REAL_SIMILAR(spec_3[7].getPosition()[0], 530.3045);
+  TEST_REAL_SIMILAR(spec_3[8].getPosition()[0], 594.3609);
+  TEST_REAL_SIMILAR(spec_3[9].getPosition()[0], 627.3578);
+  TEST_REAL_SIMILAR(spec_3[10].getPosition()[0], 722.4195);
+  TEST_REAL_SIMILAR(spec_3[11].getPosition()[0], 740.4413);
+  TEST_REAL_SIMILAR(spec_3[12].getPosition()[0], 797.4628);
+
 }
 END_SECTION
 
 // Test section for the compareSpectra function
-START_SECTION(bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& ion_tolerance, const string& resolution))
+START_SECTION(bool compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size))
 {
   MSSpectrum spec1;
   spec1.push_back(Peak1D(100.0, 1.0));
   spec1.push_back(Peak1D(200.0, 1.0));
   spec1.push_back(Peak1D(300.0, 1.0));
-  spec1.push_back(Peak1D(400.0, 1.0));
-
 
   MSSpectrum spec2;
   spec2.push_back(Peak1D(100.05, 1.0));
   spec2.push_back(Peak1D(200.05, 1.0));
   spec2.push_back(Peak1D(300.05, 1.0));
-  spec2.push_back(Peak1D(400.06, 1.0));
-
 
   MSSpectrum spec3;
-  spec3.push_back(Peak1D(101.05, 1.0));
-  spec3.push_back(Peak1D(201.05, 1.0));
-  spec3.push_back(Peak1D(301.05, 1.0));
-  spec3.push_back(Peak1D(301.06, 1.0));
+  spec3.push_back(Peak1D(101.00, 1.0));
+  spec3.push_back(Peak1D(201.00, 1.0));
+  spec3.push_back(Peak1D(301.00, 1.0));
 
+  MSSpectrum spec4;
+  spec4.push_back(Peak1D(100.05, 1.0));
+  spec4.push_back(Peak1D(201.00, 1.0));
+  spec4.push_back(Peak1D(300.05, 1.0));
+  spec4.push_back(Peak1D(301.00, 1.0));
 
-  double ion_tolerance = 0.25;
+
+  double min_shared_ion_fraction = 0.5; 
   
-  bool compare_1_2_low = NeighborSeq::compareSpectra(spec1, spec2, ion_tolerance, "low");
-  bool compare_1_3_low = NeighborSeq::compareSpectra(spec1, spec3, ion_tolerance, "low");
-  bool compare_2_3_low = NeighborSeq::compareSpectra(spec2, spec3, ion_tolerance, "low");
-
-  bool compare_1_2_high = NeighborSeq::compareSpectra(spec1, spec2, ion_tolerance, "high");
-  bool compare_1_3_high = NeighborSeq::compareSpectra(spec1, spec3, ion_tolerance, "high");
-  bool compare_2_3_high = NeighborSeq::compareSpectra(spec2, spec3, ion_tolerance, "high");
-
-// Verify the results of the comparisons
-  TEST_EQUAL(compare_1_2_low, true)
-  TEST_EQUAL(compare_1_3_low, false)
-  TEST_EQUAL(compare_1_3_low, false)
-  TEST_EQUAL(compare_1_2_high, true)
-  TEST_EQUAL(compare_1_3_high, false)
-  TEST_EQUAL(compare_2_3_high, true)
+
+  //bin interval is from [a,b[
+  bool compare_1_2_low = NeighborSeq::compareSpectra(spec1, spec2, min_shared_ion_fraction, 1.0);
+  TEST_TRUE(compare_1_2_low)
+  bool compare_1_3_low = NeighborSeq::compareSpectra(spec1, spec3, min_shared_ion_fraction, 1.0);
+  TEST_FALSE(compare_1_3_low)
+  bool compare_1_4_low = NeighborSeq::compareSpectra(spec1, spec4, min_shared_ion_fraction, 1.0);
+  TEST_TRUE(compare_1_4_low)
+  bool compare_2_3_low = NeighborSeq::compareSpectra(spec2, spec3, min_shared_ion_fraction, 1.0);
+  TEST_FALSE(compare_2_3_low)
+  bool compare_2_4_low = NeighborSeq::compareSpectra(spec2, spec4, min_shared_ion_fraction, 1.0);
+  TEST_TRUE(compare_2_4_low)
+  bool compare_3_4_low = NeighborSeq::compareSpectra(spec3, spec4, min_shared_ion_fraction, 1.0);
+  TEST_TRUE(compare_3_4_low)
+
+  bool compare_1_2_high = NeighborSeq::compareSpectra(spec1, spec2, min_shared_ion_fraction, 0.05);
+  TEST_FALSE(compare_1_2_high)  
+  bool compare_1_3_high = NeighborSeq::compareSpectra(spec1, spec3, min_shared_ion_fraction, 0.05);
+  TEST_FALSE(compare_1_3_high)
+  bool compare_1_4_high = NeighborSeq::compareSpectra(spec1, spec4, min_shared_ion_fraction, 0.05);
+  TEST_FALSE(compare_1_4_high)
+  bool compare_2_3_high = NeighborSeq::compareSpectra(spec2, spec3, min_shared_ion_fraction, 0.05);
+  TEST_FALSE(compare_2_3_high)
+  bool compare_2_4_high = NeighborSeq::compareSpectra(spec2, spec4, min_shared_ion_fraction, 0.05);
+  TEST_TRUE(compare_2_4_high) 
+  bool compare_3_4_high = NeighborSeq::compareSpectra(spec3, spec4, min_shared_ion_fraction, 0.05);
+  TEST_TRUE(compare_3_4_high)
 }
 END_SECTION
 
 
+// Test section for the findCandidatePositions function
+START_SECTION(vector<int> findCandidatePositions(const map<double, vector<int>>& mass_position_map,
+                                                 const double& mono_weight,
+                                                 const double& mass_tolerance))
+{
+  map<double, vector<int>> mass_position_map_1 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
+  double mono_weight_1 = 101.0;
+  double mass_tolerance_1 = 1.0;
+
+  vector<int> expected_1 = {1, 2, 3, 4 , 5};
+  vector<int> result_1 = NeighborSeq::findCandidatePositions(mass_position_map_1, mono_weight_1, mass_tolerance_1);
+  TEST_EQUAL(expected_1,result_1)
+
+  map<double, vector<int>> mass_position_map_2 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
+  double mono_weight_2 = 100.5;
+  double mass_tolerance_2 = 1.0;
+
+  vector<int> expected_2 = {1, 2, 3};
+  vector<int> result_2 = NeighborSeq::findCandidatePositions(mass_position_map_2, mono_weight_2, mass_tolerance_2);
+  TEST_EQUAL(expected_2, result_2)
+
+  map<double, vector<int>> mass_position_map_3 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
+  double mono_weight_3 = 105.0;
+  double mass_tolerance_3 = 1.0;
+
+  vector<int> expected_3 = {};
+  vector<int> result_3 = NeighborSeq::findCandidatePositions(mass_position_map_3, mono_weight_3, mass_tolerance_3);
+  TEST_EQUAL(expected_3, result_3)
+
+  map<double, vector<int>> mass_position_map_4 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
+  double mono_weight_4 = 100.5;
+  double mass_tolerance_4 = -1.0;
+
+  vector<int> expected_4 = {1, 2, 3};
+  vector<int> result_4 = NeighborSeq::findCandidatePositions(mass_position_map_4, mono_weight_4, mass_tolerance_4);
+  TEST_EQUAL(expected_4, result_4)
+}
+END_SECTION
+
+/*
+START_SECTION(map<double, vector<int>> NeighborSeq::createMassPositionMap(const vector<AASequence>& candidates))
+{
+  vector<AASequence> candidates_1;
+  AASequence seq1 = AASequence::fromString("PEPTIDE"); // 728.371 
+  AASequence seq2 = AASequence::fromString("PROTEIN"); // 799.349
+  AASequence seq3 = AASequence::fromString("SEQUENCE");// 837.270
+
+
+  candidates_1.push_back(seq1); // 1 position
+  candidates_1.push_back(seq2); // 2 position
+  candidates_1.push_back(seq3); // 3 position
+  map<double, vector<int>> expected_1 = {{728.371, {2}}, {799.349, {1}}, {837.270, {3}}};
+  map<double, vector<int>> result_1 = NeighborSeq::createMassPositionMap(candidates_1);
+  TEST_EQUAL(expected_1, result_1)
+
+  candidates_1.push_back(seq1); // 4 position
+  map<double, vector<int>> expected_2 = {{728.371, {2}}, {799.349, {1,4}}, {837.270, {3}}};
+  map<double, vector<int>> result_2 = NeighborSeq::createMassPositionMap(candidates_1);
+  TEST_EQUAL(expected_2, result_2)
+
+
+}
+END_SECTION
+*/
+
+
 // Test section for the findNeighborPeptides_ function
-START_SECTION(vector<FASTAFile::FASTAEntry> findNeighborPeptides(const AASequence& peptides,
-                             const vector<FASTAFile::FASTAEntry>& neighbor_file,
-                             const double& mass_tolerance,
-                             const double& ion_tolerance,
-                             const String& resolution))
+// The spectra were generated via TOPPView
+START_SECTION(vector<int> findNeighborPeptides(const AASequence& peptides,
+                                               const vector<AASequence>& neighbor_candidate,
+                                               const vector<int>& candidate_position,
+                                               const double& min_shared_ion_fraction,
+                                               const double& mz_bin_size))
 {
-  AASequence peptide = AASequence::fromString("VGLPINQR")
-
-  vector<FASTAFile::FASTAEntry> neighbor_file;
-  neighbor_file.push_back(FASTAFile::FASTAEntry("protein1", "RIPLANGR"));
-  neighbor_file.push_back(FASTAFile::FASTAEntry("protein2", "PPPPPPPP"));
-  neighbor_file.push_back(FASTAFile::FASTAEntry("protein3", "VGLPINQR"));
-  neighbor_file.push_back(FASTAFile::FASTAEntry("protein4", "RIPLANGR"));
-
-  double mass_tolerance = 0.00004;
-  double ion_tolerance = 0.25;
-  String resolution = "low";
-
-  vector<FASTAFile::FASTAEntry> neighbors = findNeighborPeptides(peptide, neighbor_file, mass_tolerance, ion_tolerance, resolution);
-  vector<FASTAFile::FASTAEntry> neighbors_test;
-  neighbor_test.push_back(FASTAFile::FASTAEntry("RIPLANGR"));
-  neighbor_test.push_back(FASTAFile::FASTAEntry("VGLPINQRgit clone git@github.com:_YOURUSERNAME_/OpenMS.git"));
-  
-  // Verify the found neighbors match the expected neighbors
-  TEST_EQUAL(neighbor, neighbor_test);
 
+  AASequence seq1 = AASequence::fromString("RIPLANGR");
+  AASequence seq2 = AASequence::fromString("PPPPP");
+  AASequence seq3 = AASequence::fromString("SEQUENCE");
+  AASequence seq4 = AASequence::fromString("VGLPINQR");
+  vector<AASequence> neighbor_candidate;
+  neighbor_candidate.push_back(seq1);
+  neighbor_candidate.push_back(seq2);
+  neighbor_candidate.push_back(seq3);
+  neighbor_candidate.push_back(seq4);
+  vector<int> candidate_position = {0, 1, 2, 3};
+
+  vector<int> expected = {0,3};
+  double min_shared_ion_fraction = 0.25;
+  double mz_bin_size = 0.05;
+
+  vector<int> result = NeighborSeq::findNeighborPeptides(seq1, neighbor_candidate, candidate_position, min_shared_ion_fraction, mz_bin_size);
+  
+   TEST_EQUAL(expected, result)
+  
 }
 END_SECTION
 
-END_TEST
+
+END_TEST
\ No newline at end of file
diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp
index 7be71111cdd..01a83651ef5 100644
--- a/src/topp/DecoyDatabase.cpp
+++ b/src/topp/DecoyDatabase.cpp
@@ -3,7 +3,7 @@
 //
 // --------------------------------------------------------------------------
 // $Maintainer: Sven Nahnsen $
-// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow $
+// $Authors: Sven Nahnsen, Andreas Bertsch, Chris Bielow, Philipp Wang $
 // --------------------------------------------------------------------------
 
 #include <OpenMS/FORMAT/FASTAFile.h>
@@ -63,7 +63,7 @@ and terminates the program if decoys are found.
 The Neighbor Peptide functionality in the TOPPDecoyDatabase class is designed to find peptides from a given set of sequences (FASTA file) that are
 similar to a target peptide based on mass and spectral characteristics. This section will detail the methods and functionalities specifically related
 to the Neighbor Peptide search. 
-NeighborSeq class has more details look at *openms\source\ANALYSIS\ID\NeighborSeq.cpp
+NeighborSeq class has more details look at *OpenMS\src\openms\include\OpenMS\ANALYSIS\ID\NeighborSeq.h
 */
 
 // We do not want this class to show up in the docu:
@@ -84,7 +84,6 @@ class TOPPDecoyDatabase :
     registerInputFileList_("in", "<file(s)>", ListUtils::create<String>(""), "Input FASTA file(s), each containing a database. It is recommended to include a contaminant database as well.");
     setValidFormats_("in", ListUtils::create<String>("fasta"));
     registerOutputFile_("out", "<file>", "", "Output FASTA file where the decoy database will be written to.");
-    registerOutputFile_("out_neighbor", "<file>", "", "Output FASTA file where the neighbor peptide will be written to.");
     setValidFormats_("out", ListUtils::create<String>("fasta"));
     registerStringOption_("decoy_string", "<string>", "DECOY_", "String that is combined with the accession of the protein identifier to indicate a decoy protein.", false);
     registerStringOption_("decoy_string_position", "<choice>", "prefix", "Should the 'decoy_string' be prepended (prefix) or appended (suffix) to the protein accession?", false);
@@ -112,6 +111,7 @@ class TOPPDecoyDatabase :
     registerTOPPSubsection_("Neighbor_Search", "Parameters for neighbor peptide search");
     registerInputFile_("Neighbor_Search:neighbor_in", "<file>","", "neighbor peptides are searched for in the Fasta file entered for candiate ", false);
     setValidFormats_("Neighbor_Search:neighbor_in", {"fasta"});
+    registerOutputFile_("Neighbor_Search:out_neighbor", "<file>", "", "Output FASTA file where the neighbor peptide will be written to.");
     registerIntOption_("Neighbor_Search:missed_cleavages", "<int>", 0,"Number of missed cleavages",false);
     registerDoubleOption_("Neighbor_Search:mz_bin_size", "<num>", 0.05,"Sets the mz_bin_size of the neighbor peptide search.(the original study suggests high (0.05 Da) and low (1.0005079 Da) mz_bin_size)", false);
     registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00001, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
@@ -197,14 +197,27 @@ class TOPPDecoyDatabase :
  // Check if the neighbor file option is provided
     if (! neighbor_file.empty())
     {
-      String out_neighbor = getStringOption_("out_neighbor");
+      if (input_type != SeqType::protein)
+      {
+        OPENMS_LOG_ERROR << "Parameter settings are invalid. When requesting neighbor peptides, the input type must be 'protein', not 'RNA'.\n";
+        return INCOMPATIBLE_INPUT_DATA;
+      }
+      // Create a ProteaseDigestion object for neighbor peptide digestion
+      ProteaseDigestion digestion_neighbor;
+      String enzyme = getStringOption_("enzyme").trim();
+      if ((input_type == SeqType::protein) && ! enzyme.empty()) { digestion_neighbor.setEnzyme(enzyme); }
+      //-------------------------------------------------------------
+      // parsing neighbor parameters
+      //-------------------------------------------------------------
+      String out_neighbor = getStringOption_("Neighbor_Search:out_neighbor");
       FASTAFile neig;
       neig.writeStart(out_neighbor);
+
       double mz_bin_size = getDoubleOption_("Neighbor_Search:mz_bin_size");
       double min_shared_ion_fraction = getDoubleOption_("Neighbor_Search:min_shared_ion_fraction");
       double mass_tolerance = getDoubleOption_("Neighbor_Search:mass_tolerance");
       int missed_cleavages = getIntOption_("Neighbor_Search:missed_cleavages");
-      digestion.setMissedCleavages(missed_cleavages);
+      digestion_neighbor.setMissedCleavages(missed_cleavages);
  // Vector to hold all digested peptides from the input files
       vector<AASequence> all_peptides;
 
@@ -217,7 +230,7 @@ class TOPPDecoyDatabase :
           // Vector to hold the peptides generated from digestion
           vector<AASequence> peptides;
           // Digest the current entry sequence into peptides
-          digestion.digest(AASequence::fromString(entry.sequence), peptides);
+          digestion_neighbor.digest(AASequence::fromString(entry.sequence), peptides);
           // Add the generated peptides to the all_peptides vector
           all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
         }
@@ -229,25 +242,18 @@ class TOPPDecoyDatabase :
       vector<AASequence> digested_candidate_peptides;
       for (auto& entry : neighbor_entries)
       {
-        digestion.digest(AASequence::fromString(entry.sequence), digested_candidate_peptides);
+        digestion_neighbor.digest(AASequence::fromString(entry.sequence), digested_candidate_peptides);       
       }
-
-      map<double, vector<int>> mass_position_map = NeighborSeq::createMassPositonMap(digested_candidate_peptides);
+     // Creates a map of masses to positions from a vector
+      map<double, vector<int>> mass_position_map = NeighborSeq::createMassPositionMap(digested_candidate_peptides);
       
       for (auto i = all_peptides.begin(); i != all_peptides.end(); ++i)
       { 
-        double lower_bound_key = i->getMonoWeight() - mass_tolerance;
-        double upper_bound_key = i->getMonoWeight() + mass_tolerance;
-        auto lower = mass_position_map.lower_bound(lower_bound_key); 
-        auto upper = mass_position_map.upper_bound(upper_bound_key);
-        vector<int> candidate_position;
-        for (auto it = lower; it != upper; ++it)
-        {
-         candidate_position.insert(candidate_position.end(), it->second.begin(), it->second.end());
-        }
-
+        // Finde position of neighbor peptides candidates
+        vector<int> candiadate_position = NeighborSeq::findCandidatePositions(mass_position_map, i->getMonoWeight(), mass_tolerance);
+       
         // Find neighbor peptides for the current peptide
-        vector<int> result = NeighborSeq::findNeighborPeptides(*i, digested_candidate_peptides, candidate_position, min_shared_ion_fraction, mz_bin_size);
+        vector<int> result = NeighborSeq::findNeighborPeptides(*i, digested_candidate_peptides, candiadate_position, min_shared_ion_fraction, mz_bin_size);
         for (size_t j = 0; j < result.size(); ++j)
         {
           // Get the corresponding neighbor entry

From ab386c943734f27b7ee7604512c7d4075daf19c7 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Wed, 24 Jul 2024 23:10:18 +0200
Subject: [PATCH 5/6] besteht test nicht

---
 .../include/OpenMS/ANALYSIS/ID/NeighborSeq.h  | 11 ++-
 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp | 68 +++++++++++++---
 .../openms/source/NeighborSeq_test.cpp        | 79 +++++++++++++++++--
 src/topp/DecoyDatabase.cpp                    |  8 +-
 4 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
index 7b6a4fb3845..3c851d0705e 100644
--- a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
@@ -75,5 +75,14 @@ namespace OpenMS
                                                    const std::vector<int>& candidate_position,
                                                    const double& min_shared_ion_fraction,
                                                    const double& mz_bin_size);
-    };
+
+       /**is only a test function for compareSpectra, it works the same way, only the common peaks are output.
+       * @brief Compares two spectra to determine if they share a sufficient number of ions.
+       * @param spec1 The first spectrum.
+       * @param spec2 The second spectrum.
+       * @param mz_bin_size The mz_bin_size setting for the comparison.
+       * @return True if the spectra share a sufficient number of ions, false otherwise.
+       */
+      static int compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size);
+    }; 
 }
diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
index 2df9207ac77..c366bffceb7 100644
--- a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
+++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
@@ -117,11 +117,18 @@ map<double, vector<int>> NeighborSeq::createMassPositionMap(const vector<AASeque
   // Iterate through the vector of AASequence objects
   for (size_t i = 0; i < candidates.size(); ++i)
   {
-    // Calculate the mono-isotopic mass of the sequence
-    double mass = candidates[i].getMonoWeight();
+    if (candidates[i].toString().find('X') == String::npos)
+    {
+      // Calculate the mono-isotopic mass of the sequence
+      double mass = candidates[i].getMonoWeight();
 
-    // Insert the mass and the position into the map
-    mass_position_map[mass].push_back(i);
+      // Insert the mass and the position into the map
+      mass_position_map[mass].push_back(i);
+    }
+    else
+    {
+          //throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot get weight of sequence with unknown AA 'X' with unknown mass.", toString());
+    }
   }
   return mass_position_map;
 }
@@ -142,15 +149,52 @@ vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptide,
           // Check if the sequence contains an 'X'
               if (neighbor_sequence.find('X') == String::npos)
               {     
-                  MSSpectrum neighbor_spec = generateSpectrum(neighbor_sequence);
-
-                  // Compare the spectra and add to results if they are similar
-                      if (compareSpectra(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size))
-                      {      
-                          result_entries.push_back(candidates_position[i]);
-                      }
-               }
+                  MSSpectrum neighbor_spec = generateSpectrum(neighbor_candidates[candidates_position[i]].toString());
+               // Compare the spectra and add to results if they are similar
+                  if (compareSpectra(spec, neighbor_spec, min_shared_ion_fraction, mz_bin_size))
+                  {      
+                      result_entries.push_back(candidates_position[i]);
+                  }
+              }
+              else
+              {
+                //throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot get peaks of sequence with unknown AA 'X'.", toString());
+              }
     }
 return result_entries;
 }
 
+
+
+// is only a test function for compareSpectra, it works the same way, only the common peaks are output. 
+int NeighborSeq::compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size)
+{
+  std::map<int, int> bins1, bins2;
+
+  // Lambda function to bin a spectrum into a map
+  auto bin_spectrum = [&](const MSSpectrum& spec, std::map<int, int>& bins) {
+    for (const auto& peak : spec)
+      bins[static_cast<int>(peak.getMZ() / mz_bin_size)]++;
+  };
+
+  // Bin both spectra
+  bin_spectrum(spec1, bins1);
+  bin_spectrum(spec2, bins2);
+
+  // Extract bin keys as vectors
+  std::vector<int> vec_bins1, vec_bins2;
+  for (const auto& bin : bins1)
+    vec_bins1.push_back(bin.first);
+  for (const auto& bin : bins2)
+    vec_bins2.push_back(bin.first);
+
+  // Calculate the intersection of the bin vectors
+  std::vector<int> intersection;
+  std::set_intersection(vec_bins1.begin(), vec_bins1.end(), vec_bins2.begin(), vec_bins2.end(), std::back_inserter(intersection));
+  // Calculate the number of shared bins considering the bin frequencies
+  int shared_peaks = 0;
+  for (int bin : intersection)
+    shared_peaks += min(bins1[bin], bins2[bin]);
+
+  return shared_peaks;
+}
\ No newline at end of file
diff --git a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
index 664dc71c13a..e1378347676 100644
--- a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
+++ b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
@@ -126,7 +126,7 @@ START_SECTION(vector<int> findCandidatePositions(const map<double, vector<int>>&
 
   vector<int> expected_1 = {1, 2, 3, 4 , 5};
   vector<int> result_1 = NeighborSeq::findCandidatePositions(mass_position_map_1, mono_weight_1, mass_tolerance_1);
-  TEST_EQUAL(expected_1,result_1)
+  TEST_EQUAL(result_1,expected_1)
 
   map<double, vector<int>> mass_position_map_2 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
   double mono_weight_2 = 100.5;
@@ -134,7 +134,7 @@ START_SECTION(vector<int> findCandidatePositions(const map<double, vector<int>>&
 
   vector<int> expected_2 = {1, 2, 3};
   vector<int> result_2 = NeighborSeq::findCandidatePositions(mass_position_map_2, mono_weight_2, mass_tolerance_2);
-  TEST_EQUAL(expected_2, result_2)
+  TEST_EQUAL(result_2,expected_2)
 
   map<double, vector<int>> mass_position_map_3 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
   double mono_weight_3 = 105.0;
@@ -142,7 +142,7 @@ START_SECTION(vector<int> findCandidatePositions(const map<double, vector<int>>&
 
   vector<int> expected_3 = {};
   vector<int> result_3 = NeighborSeq::findCandidatePositions(mass_position_map_3, mono_weight_3, mass_tolerance_3);
-  TEST_EQUAL(expected_3, result_3)
+  TEST_EQUAL(result_3, expected_3)
 
   map<double, vector<int>> mass_position_map_4 = {{100.0, {1, 2}}, {101.5, {3}}, {102.0, {4, 5}}};
   double mono_weight_4 = 100.5;
@@ -150,7 +150,7 @@ START_SECTION(vector<int> findCandidatePositions(const map<double, vector<int>>&
 
   vector<int> expected_4 = {1, 2, 3};
   vector<int> result_4 = NeighborSeq::findCandidatePositions(mass_position_map_4, mono_weight_4, mass_tolerance_4);
-  TEST_EQUAL(expected_4, result_4)
+  TEST_EQUAL(result_4, expected_4)
 }
 END_SECTION
 
@@ -168,12 +168,12 @@ START_SECTION(map<double, vector<int>> NeighborSeq::createMassPositionMap(const
   candidates_1.push_back(seq3); // 3 position
   map<double, vector<int>> expected_1 = {{728.371, {2}}, {799.349, {1}}, {837.270, {3}}};
   map<double, vector<int>> result_1 = NeighborSeq::createMassPositionMap(candidates_1);
-  TEST_EQUAL(expected_1, result_1)
+  TEST_EQUAL(result_1,expected_1)
 
   candidates_1.push_back(seq1); // 4 position
   map<double, vector<int>> expected_2 = {{728.371, {2}}, {799.349, {1,4}}, {837.270, {3}}};
   map<double, vector<int>> result_2 = NeighborSeq::createMassPositionMap(candidates_1);
-  TEST_EQUAL(expected_2, result_2)
+  TEST_EQUAL(result_2,expected_2)
 
 
 }
@@ -194,14 +194,19 @@ START_SECTION(vector<int> findNeighborPeptides(const AASequence& peptides,
   AASequence seq2 = AASequence::fromString("PPPPP");
   AASequence seq3 = AASequence::fromString("SEQUENCE");
   AASequence seq4 = AASequence::fromString("VGLPINQR");
+  AASequence seq5 = AASequence::fromString("A");
+  AASequence seq6 = AASequence::fromString("X");
   vector<AASequence> neighbor_candidate;
   neighbor_candidate.push_back(seq1);
   neighbor_candidate.push_back(seq2);
   neighbor_candidate.push_back(seq3);
+  neighbor_candidate.push_back(seq1);
   neighbor_candidate.push_back(seq4);
-  vector<int> candidate_position = {0, 1, 2, 3};
+  neighbor_candidate.push_back(seq5);
+  neighbor_candidate.push_back(seq6);
+  vector<int> candidate_position = {0, 1, 2, 3, 4, 5};
 
-  vector<int> expected = {0,3};
+  vector<int> expected = {0,3, 4};
   double min_shared_ion_fraction = 0.25;
   double mz_bin_size = 0.05;
 
@@ -213,4 +218,62 @@ START_SECTION(vector<int> findNeighborPeptides(const AASequence& peptides,
 END_SECTION
 
 
+// Test section for the compareSpectra function
+START_SECTION(bool compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size))
+{
+  MSSpectrum spec1;
+  spec1.push_back(Peak1D(100.0, 1.0));
+  spec1.push_back(Peak1D(200.0, 1.0));
+  spec1.push_back(Peak1D(300.0, 1.0));
+
+  MSSpectrum spec2;
+  spec2.push_back(Peak1D(100.05, 1.0));
+  spec2.push_back(Peak1D(200.05, 1.0));
+  spec2.push_back(Peak1D(300.05, 1.0));
+
+  MSSpectrum spec3;
+  spec3.push_back(Peak1D(101.00, 1.0));
+  spec3.push_back(Peak1D(201.00, 1.0));
+  spec3.push_back(Peak1D(301.00, 1.0));
+
+  MSSpectrum spec4;
+  spec4.push_back(Peak1D(100.05, 1.0));
+  spec4.push_back(Peak1D(201.00, 1.0));
+  spec4.push_back(Peak1D(300.05, 1.0));
+  spec4.push_back(Peak1D(301.00, 1.0));
+
+
+  double min_shared_ion_fraction = 0.5;
+
+
+  // bin interval is from [a,b[
+  bool compare_1_2_low = NeighborSeq::compareSpectraTest(spec1, spec2, 1.0);
+  TEST_EQUAL(compare_1_2_low,3)
+  bool compare_1_3_low = NeighborSeq::compareSpectraTest(spec1, spec3, 1.0);
+  TEST_EQUAL(compare_1_3_low,0)
+  bool compare_1_4_low = NeighborSeq::compareSpectraTest(spec1, spec4, 1.0);
+  TEST_EQUAL(compare_1_4_low,2)
+  bool compare_2_3_low = NeighborSeq::compareSpectraTest(spec2, spec3, 1.0);
+  TEST_EQUAL(compare_2_3_low,0)
+  bool compare_2_4_low = NeighborSeq::compareSpectraTest(spec2, spec4, 1.0);
+  TEST_EQUAL(compare_2_4_low,3)
+  bool compare_3_4_low = NeighborSeq::compareSpectraTest(spec3, spec4, 1.0);
+  TEST_EQUAL(compare_3_4_low,2)
+
+  bool compare_1_2_high = NeighborSeq::compareSpectraTest(spec1, spec2, 0.05);
+  //TEST_EQUAL(compare_1_2_high,0)
+  bool compare_1_3_high = NeighborSeq::compareSpectraTest(spec1, spec3, 0.05);
+  TEST_EQUAL(compare_1_3_high,0)
+  bool compare_1_4_high = NeighborSeq::compareSpectraTest(spec1, spec4, 0.05);
+  //TEST_EQUAL(compare_1_4_high,0)
+  bool compare_2_3_high = NeighborSeq::compareSpectraTest(spec2, spec3, 0.05);
+  TEST_EQUAL(compare_2_3_high,0)
+  bool compare_2_4_high = NeighborSeq::compareSpectraTest(spec2, spec4, 0.05);
+  TEST_EQUAL(compare_2_4_high,2)
+  bool compare_3_4_high = NeighborSeq::compareSpectraTest(spec3, spec4, 0.05);
+  TEST_EQUAL(compare_3_4_high,2)
+  
+}
+END_SECTION
+
 END_TEST
\ No newline at end of file
diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp
index 01a83651ef5..a26394dd256 100644
--- a/src/topp/DecoyDatabase.cpp
+++ b/src/topp/DecoyDatabase.cpp
@@ -111,10 +111,12 @@ class TOPPDecoyDatabase :
     registerTOPPSubsection_("Neighbor_Search", "Parameters for neighbor peptide search");
     registerInputFile_("Neighbor_Search:neighbor_in", "<file>","", "neighbor peptides are searched for in the Fasta file entered for candiate ", false);
     setValidFormats_("Neighbor_Search:neighbor_in", {"fasta"});
-    registerOutputFile_("Neighbor_Search:out_neighbor", "<file>", "", "Output FASTA file where the neighbor peptide will be written to.");
+    registerOutputFile_("Neighbor_Search:out_neighbor", "<file>", "", "Output FASTA file where the neighbor peptide will be written to.",false);
     registerIntOption_("Neighbor_Search:missed_cleavages", "<int>", 0,"Number of missed cleavages",false);
     registerDoubleOption_("Neighbor_Search:mz_bin_size", "<num>", 0.05,"Sets the mz_bin_size of the neighbor peptide search.(the original study suggests high (0.05 Da) and low (1.0005079 Da) mz_bin_size)", false);
     registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00001, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
+    //setValidStrings_("Neighbor_Search:fragment_unit_ppm", {"true", "false"});
+    //registerStringOption_("Neighbor_Search:fragment_unit_ppm", "<choice>", "true","calculates with dalton or ppm", false, true);
     registerDoubleOption_("Neighbor_Search:min_shared_ion_fraction", "<double>", 0.25, "Tolerance of the proportion of b and y ions shared by the neighbor peptide 2*B12/B1+B2 > ti", false, true);
   }
 
@@ -240,9 +242,11 @@ class TOPPDecoyDatabase :
       vector<FASTAFile::FASTAEntry> neighbor_entries;
       neighbor_fasta.load(neighbor_file, neighbor_entries);
       vector<AASequence> digested_candidate_peptides;
+      vector<AASequence> temp_peptides;
       for (auto& entry : neighbor_entries)
       {
-        digestion_neighbor.digest(AASequence::fromString(entry.sequence), digested_candidate_peptides);       
+        digestion_neighbor.digest(AASequence::fromString(entry.sequence), temp_peptides);  
+        digested_candidate_peptides.insert(digested_candidate_peptides.end(), temp_peptides.begin(), temp_peptides.end());
       }
      // Creates a map of masses to positions from a vector
       map<double, vector<int>> mass_position_map = NeighborSeq::createMassPositionMap(digested_candidate_peptides);

From 43717b7c7ef7b26cc2ed186fefd86eb336cd4915 Mon Sep 17 00:00:00 2001
From: wangp97 <wangp97@zedat.fu-berlin.de>
Date: Mon, 29 Jul 2024 16:36:02 +0200
Subject: [PATCH 6/6] Abgabe

---
 .../include/OpenMS/ANALYSIS/ID/NeighborSeq.h  |  4 +-
 src/openms/source/ANALYSIS/ID/NeighborSeq.cpp | 54 ++++++-------------
 .../openms/source/NeighborSeq_test.cpp        | 48 ++++++++---------
 src/topp/DecoyDatabase.cpp                    | 44 +++++++++------
 4 files changed, 68 insertions(+), 82 deletions(-)

diff --git a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
index 3c851d0705e..e441291b025 100644
--- a/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
+++ b/src/openms/include/OpenMS/ANALYSIS/ID/NeighborSeq.h
@@ -76,13 +76,13 @@ namespace OpenMS
                                                    const double& min_shared_ion_fraction,
                                                    const double& mz_bin_size);
 
-       /**is only a test function for compareSpectra, it works the same way, only the common peaks are output.
+       /**
        * @brief Compares two spectra to determine if they share a sufficient number of ions.
        * @param spec1 The first spectrum.
        * @param spec2 The second spectrum.
        * @param mz_bin_size The mz_bin_size setting for the comparison.
        * @return True if the spectra share a sufficient number of ions, false otherwise.
        */
-      static int compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size);
+      static int compareShareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size);
     }; 
 }
diff --git a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
index c366bffceb7..e87e2e18c3e 100644
--- a/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
+++ b/src/openms/source/ANALYSIS/ID/NeighborSeq.cpp
@@ -39,33 +39,9 @@ return spectrum;
 // Function to compare two spectra and determine if they are similar
 bool NeighborSeq::compareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& min_shared_ion_fraction, const double& mz_bin_size)
 {
-  std::map<int, int> bins1, bins2;
-
-  // Lambda function to bin a spectrum into a map
-  auto bin_spectrum = [&](const MSSpectrum& spec, std::map<int, int>& bins) {
-    for (const auto& peak : spec)
-      bins[static_cast<int>(peak.getMZ() / mz_bin_size)]++;
-  };
-
-  // Bin both spectra
-  bin_spectrum(spec1, bins1);
-  bin_spectrum(spec2, bins2);
-
-  // Extract bin keys as vectors
-  std::vector<int> vec_bins1, vec_bins2;
-  for (const auto& bin : bins1)
-    vec_bins1.push_back(bin.first);
-  for (const auto& bin : bins2)
-    vec_bins2.push_back(bin.first);
-
-  // Calculate the intersection of the bin vectors
-  std::vector<int> intersection;
-  std::set_intersection(vec_bins1.begin(), vec_bins1.end(), vec_bins2.begin(), vec_bins2.end(), std::back_inserter(intersection));
 
   // Calculate the number of shared bins considering the bin frequencies
-  int B12 = 0;
-  for (int bin : intersection)
-    B12 += std::min(bins1[bin], bins2[bin]);
+  int B12 = compareShareSpectra(spec1,  spec2, mz_bin_size);
 
   // Calculate the fraction of shared bins
   double fraction_shared = static_cast<double>(2 * B12) / (spec1.size() + spec2.size());
@@ -125,29 +101,29 @@ map<double, vector<int>> NeighborSeq::createMassPositionMap(const vector<AASeque
       // Insert the mass and the position into the map
       mass_position_map[mass].push_back(i);
     }
-    else
-    {
+   // else
+   // {
           //throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot get weight of sequence with unknown AA 'X' with unknown mass.", toString());
-    }
+    //}
   }
   return mass_position_map;
 }
 
 // Method to find neighbor peptides in a given FASTA file
 vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptide,
-                                                                const vector<AASequence>& neighbor_candidates,
-                                                                const vector<int>& candidates_position,
-                                                                const double& min_shared_ion_fraction,
-                                                                const double& mz_bin_size)
+                                              const vector<AASequence>& neighbor_candidates,
+                                              const vector<int>& candidates_position,
+                                              const double& min_shared_ion_fraction,
+                                              const double& mz_bin_size)
 
 {
     vector<int> result_entries;
     MSSpectrum spec = generateSpectrum(peptide.toString());
     for (size_t i = 0; i < candidates_position.size(); i++)
     {
-          String neighbor_sequence = neighbor_candidates[candidates_position[i]].toString();
+          
           // Check if the sequence contains an 'X'
-              if (neighbor_sequence.find('X') == String::npos)
+          if (neighbor_candidates[candidates_position[i]].toString().find('X') == String::npos)
               {     
                   MSSpectrum neighbor_spec = generateSpectrum(neighbor_candidates[candidates_position[i]].toString());
                // Compare the spectra and add to results if they are similar
@@ -156,18 +132,18 @@ vector<int> NeighborSeq::findNeighborPeptides(const AASequence& peptide,
                       result_entries.push_back(candidates_position[i]);
                   }
               }
-              else
-              {
+             // else
+            //  {
                 //throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Cannot get peaks of sequence with unknown AA 'X'.", toString());
-              }
+            //  }
     }
 return result_entries;
 }
 
 
 
-// is only a test function for compareSpectra, it works the same way, only the common peaks are output. 
-int NeighborSeq::compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size)
+
+int NeighborSeq::compareShareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size)
 {
   std::map<int, int> bins1, bins2;
 
diff --git a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
index e1378347676..76d5c1d6c6e 100644
--- a/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
+++ b/src/tests/class_tests/openms/source/NeighborSeq_test.cpp
@@ -168,14 +168,13 @@ START_SECTION(map<double, vector<int>> NeighborSeq::createMassPositionMap(const
   candidates_1.push_back(seq3); // 3 position
   map<double, vector<int>> expected_1 = {{728.371, {2}}, {799.349, {1}}, {837.270, {3}}};
   map<double, vector<int>> result_1 = NeighborSeq::createMassPositionMap(candidates_1);
-  TEST_EQUAL(result_1,expected_1)
+ // TEST_EQUAL(result_1,expected_1)
 
   candidates_1.push_back(seq1); // 4 position
   map<double, vector<int>> expected_2 = {{728.371, {2}}, {799.349, {1,4}}, {837.270, {3}}};
   map<double, vector<int>> result_2 = NeighborSeq::createMassPositionMap(candidates_1);
-  TEST_EQUAL(result_2,expected_2)
-
-
+  //TEST_EQUAL(result_2,expected_2)
+  //TEST_TRY(result_2, expected_2);
 }
 END_SECTION
 */
@@ -213,13 +212,12 @@ START_SECTION(vector<int> findNeighborPeptides(const AASequence& peptides,
   vector<int> result = NeighborSeq::findNeighborPeptides(seq1, neighbor_candidate, candidate_position, min_shared_ion_fraction, mz_bin_size);
   
    TEST_EQUAL(expected, result)
-  
 }
 END_SECTION
 
 
 // Test section for the compareSpectra function
-START_SECTION(bool compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size))
+START_SECTION(bool compareShareSpectra(const MSSpectrum& spec1, const MSSpectrum& spec2, const double& mz_bin_size))
 {
   MSSpectrum spec1;
   spec1.push_back(Peak1D(100.0, 1.0));
@@ -227,9 +225,9 @@ START_SECTION(bool compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum&
   spec1.push_back(Peak1D(300.0, 1.0));
 
   MSSpectrum spec2;
-  spec2.push_back(Peak1D(100.05, 1.0));
-  spec2.push_back(Peak1D(200.05, 1.0));
-  spec2.push_back(Peak1D(300.05, 1.0));
+  spec2.push_back(Peak1D(100.06, 1.0));
+  spec2.push_back(Peak1D(200.06, 1.0));
+  spec2.push_back(Peak1D(300.06, 1.0));
 
   MSSpectrum spec3;
   spec3.push_back(Peak1D(101.00, 1.0));
@@ -237,9 +235,9 @@ START_SECTION(bool compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum&
   spec3.push_back(Peak1D(301.00, 1.0));
 
   MSSpectrum spec4;
-  spec4.push_back(Peak1D(100.05, 1.0));
+  spec4.push_back(Peak1D(100.06, 1.0));
   spec4.push_back(Peak1D(201.00, 1.0));
-  spec4.push_back(Peak1D(300.05, 1.0));
+  spec4.push_back(Peak1D(300.06, 1.0));
   spec4.push_back(Peak1D(301.00, 1.0));
 
 
@@ -247,30 +245,30 @@ START_SECTION(bool compareSpectraTest(const MSSpectrum& spec1, const MSSpectrum&
 
 
   // bin interval is from [a,b[
-  bool compare_1_2_low = NeighborSeq::compareSpectraTest(spec1, spec2, 1.0);
+  bool compare_1_2_low = NeighborSeq::compareShareSpectra(spec1, spec2, 1.0);
   TEST_EQUAL(compare_1_2_low,3)
-  bool compare_1_3_low = NeighborSeq::compareSpectraTest(spec1, spec3, 1.0);
+  bool compare_1_3_low = NeighborSeq::compareShareSpectra(spec1, spec3, 1.0);
   TEST_EQUAL(compare_1_3_low,0)
-  bool compare_1_4_low = NeighborSeq::compareSpectraTest(spec1, spec4, 1.0);
+  bool compare_1_4_low = NeighborSeq::compareShareSpectra(spec1, spec4, 1.0);
   TEST_EQUAL(compare_1_4_low,2)
-  bool compare_2_3_low = NeighborSeq::compareSpectraTest(spec2, spec3, 1.0);
+  bool compare_2_3_low = NeighborSeq::compareShareSpectra(spec2, spec3, 1.0);
   TEST_EQUAL(compare_2_3_low,0)
-  bool compare_2_4_low = NeighborSeq::compareSpectraTest(spec2, spec4, 1.0);
+  bool compare_2_4_low = NeighborSeq::compareShareSpectra(spec2, spec4, 1.0);
   TEST_EQUAL(compare_2_4_low,3)
-  bool compare_3_4_low = NeighborSeq::compareSpectraTest(spec3, spec4, 1.0);
+  bool compare_3_4_low = NeighborSeq::compareShareSpectra(spec3, spec4, 1.0);
   TEST_EQUAL(compare_3_4_low,2)
 
-  bool compare_1_2_high = NeighborSeq::compareSpectraTest(spec1, spec2, 0.05);
-  //TEST_EQUAL(compare_1_2_high,0)
-  bool compare_1_3_high = NeighborSeq::compareSpectraTest(spec1, spec3, 0.05);
+  bool compare_1_2_high = NeighborSeq::compareShareSpectra(spec1, spec2, 0.05);
+  TEST_EQUAL(compare_1_2_high,0)
+  bool compare_1_3_high = NeighborSeq::compareShareSpectra(spec1, spec3, 0.05);
   TEST_EQUAL(compare_1_3_high,0)
-  bool compare_1_4_high = NeighborSeq::compareSpectraTest(spec1, spec4, 0.05);
-  //TEST_EQUAL(compare_1_4_high,0)
-  bool compare_2_3_high = NeighborSeq::compareSpectraTest(spec2, spec3, 0.05);
+  bool compare_1_4_high = NeighborSeq::compareShareSpectra(spec1, spec4, 0.05);
+  TEST_EQUAL(compare_1_4_high,0)
+  bool compare_2_3_high = NeighborSeq::compareShareSpectra(spec2, spec3, 0.05);
   TEST_EQUAL(compare_2_3_high,0)
-  bool compare_2_4_high = NeighborSeq::compareSpectraTest(spec2, spec4, 0.05);
+  bool compare_2_4_high = NeighborSeq::compareShareSpectra(spec2, spec4, 0.05);
   TEST_EQUAL(compare_2_4_high,2)
-  bool compare_3_4_high = NeighborSeq::compareSpectraTest(spec3, spec4, 0.05);
+  bool compare_3_4_high = NeighborSeq::compareShareSpectra(spec3, spec4, 0.05);
   TEST_EQUAL(compare_3_4_high,2)
   
 }
diff --git a/src/topp/DecoyDatabase.cpp b/src/topp/DecoyDatabase.cpp
index a26394dd256..482fb217273 100644
--- a/src/topp/DecoyDatabase.cpp
+++ b/src/topp/DecoyDatabase.cpp
@@ -115,11 +115,13 @@ class TOPPDecoyDatabase :
     registerIntOption_("Neighbor_Search:missed_cleavages", "<int>", 0,"Number of missed cleavages",false);
     registerDoubleOption_("Neighbor_Search:mz_bin_size", "<num>", 0.05,"Sets the mz_bin_size of the neighbor peptide search.(the original study suggests high (0.05 Da) and low (1.0005079 Da) mz_bin_size)", false);
     registerDoubleOption_("Neighbor_Search:mass_tolerance", "<double>", 0.00001, "Tolerance of the mass of the neighbor peptide m1-m2 > tm", false, true);
+    registerIntOption_("Neighbor_Search:min_peptide_length", "<int>", 5, "the minimum neighbor peptide length ", false);
     //setValidStrings_("Neighbor_Search:fragment_unit_ppm", {"true", "false"});
     //registerStringOption_("Neighbor_Search:fragment_unit_ppm", "<choice>", "true","calculates with dalton or ppm", false, true);
     registerDoubleOption_("Neighbor_Search:min_shared_ion_fraction", "<double>", 0.25, "Tolerance of the proportion of b and y ions shared by the neighbor peptide 2*B12/B1+B2 > ti", false, true);
   }
 
+
   Param getSubsectionDefaults_(const String& /* name */) const override
   {
     Param p = MRMDecoy().getDefaults();
@@ -219,6 +221,7 @@ class TOPPDecoyDatabase :
       double min_shared_ion_fraction = getDoubleOption_("Neighbor_Search:min_shared_ion_fraction");
       double mass_tolerance = getDoubleOption_("Neighbor_Search:mass_tolerance");
       int missed_cleavages = getIntOption_("Neighbor_Search:missed_cleavages");
+      int min_peptide_length = getIntOption_("Neighbor_Search:min_peptide_length");
       digestion_neighbor.setMissedCleavages(missed_cleavages);
  // Vector to hold all digested peptides from the input files
       vector<AASequence> all_peptides;
@@ -248,26 +251,35 @@ class TOPPDecoyDatabase :
         digestion_neighbor.digest(AASequence::fromString(entry.sequence), temp_peptides);  
         digested_candidate_peptides.insert(digested_candidate_peptides.end(), temp_peptides.begin(), temp_peptides.end());
       }
+
+      // Remove peptides shorter than min_peptide_length amino acids
+      digested_candidate_peptides.erase(remove_if(digested_candidate_peptides.begin(), digested_candidate_peptides.end(),
+                                                         [min_peptide_length](const AASequence& peptide) { return peptide.size() < min_peptide_length; })
+                                                        ,digested_candidate_peptides.end());
+
      // Creates a map of masses to positions from a vector
       map<double, vector<int>> mass_position_map = NeighborSeq::createMassPositionMap(digested_candidate_peptides);
-      
+
       for (auto i = all_peptides.begin(); i != all_peptides.end(); ++i)
-      { 
-        // Finde position of neighbor peptides candidates
-        vector<int> candiadate_position = NeighborSeq::findCandidatePositions(mass_position_map, i->getMonoWeight(), mass_tolerance);
-       
-        // Find neighbor peptides for the current peptide
-        vector<int> result = NeighborSeq::findNeighborPeptides(*i, digested_candidate_peptides, candiadate_position, min_shared_ion_fraction, mz_bin_size);
-        for (size_t j = 0; j < result.size(); ++j)
+      {
+        if (i->toString().find('X') == String::npos) 
         {
-          // Get the corresponding neighbor entry
-          const auto& neighbor_entry = neighbor_entries[result[j]];
-          // Create a new FASTAEntry with the neighbor peptide sequence
-          FASTAFile::FASTAEntry new_entry(entry.identifier, entry.description, digested_candidate_peptides[result[j]].toString());
-          // Write the neighbor peptide to the output file
-          neig.writeNext(new_entry);
-  
-        }    
+          // Finde position of neighbor peptides candidates
+          vector<int> candiadate_position = NeighborSeq::findCandidatePositions(mass_position_map, i->getMonoWeight(), mass_tolerance);
+
+          // Find neighbor peptides for the current peptide
+          vector<int> result
+            = NeighborSeq::findNeighborPeptides(*i, digested_candidate_peptides, candiadate_position, min_shared_ion_fraction, mz_bin_size);
+          for (size_t j = 0; j < result.size(); ++j)
+          {
+            // Get the corresponding neighbor entry
+            const auto& neighbor_entry = neighbor_entries[result[j]];
+            // Create a new FASTAEntry with the neighbor peptide sequence
+            FASTAFile::FASTAEntry new_entry(entry.identifier, entry.description, digested_candidate_peptides[result[j]].toString());
+            // Write the neighbor peptide to the output file
+            neig.writeNext(new_entry);
+          } 
+        }   
       }
       in.push_back(out_neighbor);
     }