OpenMS  2.8.0
IsotopeWaveletTransform.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
46 
47 #include <cmath>
48 #include <vector>
49 #include <map>
50 #include <sstream>
51 #include <fstream>
52 #include <iomanip>
53 
54 // TODO: move this to cpp and use STL once it is available in clang on Mac
55 #include <boost/math/special_functions/bessel.hpp>
56 
57 // This code has quite a few strange things in it triggering warnings which
58 // clutters the rest of the diagnostics
59 #pragma clang diagnostic push
60 #pragma clang diagnostic ignored "-Wfloat-equal"
61 #pragma clang diagnostic ignored "-Wconversion"
62 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
63 
64 namespace OpenMS
65 {
66 
70  template <typename PeakType>
72  {
73 public:
74 
75 
77  struct BoxElement
78  {
79  double mz;
80  UInt c;
81  double score;
82  double intens;
83  double ref_intens;
84  double RT;
88  };
89 
90  typedef std::multimap<UInt, BoxElement> Box;
91 
92 
97  {
99 
100 public:
101 
104  reference_(nullptr), trans_intens_(nullptr)
105  {
106  }
107 
109  TransSpectrum(const MSSpectrum* reference) :
110  reference_(reference)
111  {
112  trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
113  }
114 
116  virtual ~TransSpectrum()
117  {
118  delete (trans_intens_);
119  }
120 
121  virtual void destroy()
122  {
123  delete (trans_intens_);
124  trans_intens_ = NULL;
125  delete (reference_);
126  reference_ = NULL;
127  }
128 
130  inline double getRT() const
131  {
132  return reference_->getRT();
133  }
134 
136  inline double getMZ(const UInt i) const
137  {
138  return (*reference_)[i].getMZ();
139  }
140 
142  inline double getRefIntensity(const UInt i) const
143  {
144  return (*reference_)[i].getIntensity();
145  }
146 
148  inline double getTransIntensity(const UInt i) const
149  {
150  return (*trans_intens_)[i];
151  }
152 
154  inline void setTransIntensity(const UInt i, const double intens)
155  {
156  (*trans_intens_)[i] = intens;
157  }
158 
160  inline Size size() const
161  {
162  return trans_intens_->size();
163  }
164 
166  inline const MSSpectrum* getRefSpectrum()
167  {
168  return reference_;
169  }
170 
172  inline const MSSpectrum* getRefSpectrum() const
173  {
174  return reference_;
175  }
176 
179  inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
180  {
181  return reference_->MZBegin(mz);
182  }
183 
186  inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
187  {
188  return reference_->MZEnd(mz);
189  }
190 
193  inline typename MSSpectrum::const_iterator end() const
194  {
195  return reference_->end();
196  }
197 
200  inline typename MSSpectrum::const_iterator begin() const
201  {
202  return reference_->begin();
203  }
204 
205 protected:
206 
208  std::vector<float>* trans_intens_;
209 
210  };
211 
212 
213 
219  IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
220 
222  virtual ~IsotopeWaveletTransform();
223 
224 
229  virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
230 
235  virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
236 
256  virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
257  const double ampl_cutoff, const bool check_PPMs);
258 
259  virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
260 
267  void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
268  const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
269 
270 
275  FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
276 
278  virtual std::multimap<double, Box> getClosedBoxes()
279  { return closed_boxes_; }
280 
281 
286  inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
287  {
288  return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
289  }
290 
297  inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
298  {
299  return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
300  }
301 
302  inline double getSigma() const
303  {
304  return sigma_;
305  }
306 
307  inline void setSigma(const double sigma)
308  {
309  sigma_ = sigma;
310  }
311 
312  virtual void computeMinSpacing(const MSSpectrum& c_ref);
313 
314  inline double getMinSpacing() const
315  {
316  return min_spacing_;
317  }
318 
319  inline Size getMaxScanSize() const
320  {
321  return max_scan_size_;
322  }
323 
324 protected:
325 
326 
330 
331 
332  inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
333 
334 
341  virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
342  const double seed_mz, const UInt c, const double ampl_cutoff);
343 
350  virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
351  const double seed_mz, const UInt c, const double ampl_cutoff);
352 
353 
361  virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
362  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
363 
371  virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
372  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
373 
374  virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
375 
376 
378  inline double getAvIntens_(const TransSpectrum& scan);
380  inline double getAvIntens_(const MSSpectrum& scan);
381 
383  inline double getSdIntens_(const TransSpectrum& scan, const double mean);
385  inline double getSdIntens_(const MSSpectrum& scan, const double mean);
386 
397  virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
398  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
399 
415  virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
416  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
417 
423  inline double getAvMZSpacing_(const MSSpectrum& scan);
424 
425 
430  void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
431  const UInt scan_index, const UInt c, const bool check_PPMs);
432 
437  virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
438  const UInt scan_index, const UInt c, const bool check_PPMs);
439 
440 
445  void extendBox_(const PeakMap& map, const Box& box);
446 
449  inline double peptideMassRule_(const double c_mass) const
450  {
451  double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
452  double old_frac_mass = c_mass - (Int)(c_mass);
453  double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
454  double new_frac_mass = new_mass - (Int)(new_mass);
455 
456  if (new_frac_mass - old_frac_mass > 0.5)
457  {
458  new_mass -= 1.;
459  }
460 
461  if (new_frac_mass - old_frac_mass < -0.5)
462  {
463  new_mass += 1.;
464  }
465 
466  return new_mass;
467  }
468 
472  inline double getPPMs_(const double mass_a, const double mass_b) const
473  {
474  return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
475  }
476 
477  //internally used data structures for the sweep line algorithm
478  std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
479  std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
480 
482  std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
483  std::vector<double> interpol_xs_, interpol_ys_;
484 
487  bool hr_data_;
490  std::vector<int> indices_;
491 
493  std::vector<float> scores_, zeros_;
494  };
495 
496  template <typename PeakType>
497  bool intensityComparator(const PeakType& a, const PeakType& b)
498  {
499  return a.getIntensity() > b.getIntensity();
500  }
501 
502  template <typename PeakType>
504  {
505  return a.getIntensity() < b.getIntensity();
506  }
507 
508  template <typename PeakType>
510  {
511  return a->getIntensity() > b->getIntensity();
512  }
513 
514  template <typename PeakType>
515  bool positionComparator(const PeakType& a, const PeakType& b)
516  {
517  return a.getMZ() < b.getMZ();
518  }
519 
520  template <typename PeakType>
522  {
523  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
524  av_MZ_spacing_ = 1;
525  max_scan_size_ = 0;
526  max_mz_cutoff_ = 3;
527  max_num_peaks_per_pattern_ = 3;
528  hr_data_ = false;
529  intenstype_ = "ref";
530  }
531 
532  template <typename PeakType>
533  IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
534  {
535  max_charge_ = max_charge;
536  max_scan_size_ = max_scan_size;
537  hr_data_ = hr_data;
538  intenstype_ = intenstype;
539  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
540  if (max_scan_size <= 0) //only important for the CPU
541  {
542  IsotopeWavelet::init(max_mz, max_charge);
543  }
544 
545  av_MZ_spacing_ = 1;
546  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
547  max_num_peaks_per_pattern_ = IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
548 
549  Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
550  Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
551  psi_.reserve(to_reserve); //The wavelet
552  prod_.reserve(to_reserve);
553  xs_.reserve(to_reserve);
556  }
557 
558  template <typename PeakType>
560  {
561  delete (tmp_boxes_);
562  }
563 
564  template <typename PeakType>
566  {
567  Int spec_size((Int)c_ref.size());
568  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
569  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
570  UInt charge = c + 1;
571  double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
572 
573  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
574  {
575  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
576  old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
577  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
578  c_diff = 0;
579  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
580 
581  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
582  {
583  if (current_conv_pos >= spec_size)
584  {
585  value += 0.5 * old * min_spacing_;
586  break;
587  }
588 
589  c_mz = c_ref[current_conv_pos].getMZ();
590  c_diff = c_mz + origin;
591 
592  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
593  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
594 
595  value += 0.5 * (current + old) * (c_mz - old_pos);
596 
597  old = current;
598  old_pos = c_mz;
599  }
600 
601 
602 
603  c_trans[my_local_pos].setIntensity(value);
604  }
605  }
606 
607  template <typename PeakType>
609  {
610  Int spec_size((Int)c_ref.size());
611  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
612  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
613  UInt charge = c + 1;
614  double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
615 
616  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
617  {
618  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
619 
620 
621  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
622  c_diff = 0;
623  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
624 
625  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
626  {
627  if (current_conv_pos >= spec_size)
628  {
629  break;
630  }
631 
632  c_mz = c_ref[current_conv_pos].getMZ();
633  c_diff = c_mz + origin;
634 
635  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
636  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
637 
638  value += current;
639  }
640 
641  c_trans[my_local_pos].setIntensity(value);
642  }
643  }
644 
645  template <typename PeakType>
647  {
648  data_length_ = (UInt) c_ref.size();
649  computeMinSpacing(c_ref);
650  Int wavelet_length = 0, quarter_length = 0;
651 
652  if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
653  {
654  UInt c_mz_cutoff;
655  typename MSSpectrum::const_iterator start_iter, end_iter;
656  for (UInt i = 0; i < data_length_; ++i)
657  {
658  c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
659  start_iter = c_ref.MZEnd(c_ref[i].getMZ());
660  end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
661  wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
662  end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
663  quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
664  }
665  }
666  else
667  {
668  //CHANGED
669  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_ - 1].getMZ(), max_charge_);
670  wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
671  }
672  //... done
673 
674 
675  if (wavelet_length > (Int) c_ref.size())
676  {
677  std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
678  std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
679  std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
680  }
681 
682  Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS / min_spacing_);
683  from_max_to_left_ = max_index;
684  from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
685  }
686 
687  template <typename PeakType>
689  {
690  min_spacing_ = INT_MAX;
691  for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
692  {
693  min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
694  }
695  }
696 
697  template <typename PeakType>
699  const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
700  {
701  Size scan_size(candidates.size());
703  typename MSSpectrum::const_iterator iter_start, iter_end, seed_iter, iter2;
704  double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
705  UInt MZ_start, MZ_end;
706 
707  MSSpectrum diffed(candidates);
708  diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
709 
710 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
711  std::stringstream stream;
712  stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
713  std::ofstream ofile(stream.str().c_str());
714 #endif
715 
716  if (!hr_data_) //LowRes data
717  {
718  for (UInt i = 0; i < scan_size - 2; ++i)
719  {
720  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
721  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
722  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
723 
724  if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
725  {
726  diffed[i + 1].setIntensity(0);
727  }
728 
729 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
730  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
731 #endif
732  }
733  }
734  else //HighRes data
735  {
736  for (UInt i = 0; i < scan_size - 2; ++i)
737  {
738  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
739  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
740  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
741 
742  if (!(bwd >= 0 && fwd <= 0))
743  {
744  diffed[i + 1].setIntensity(0);
745  }
746 
747 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
748  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
749 #endif
750  }
751  }
752 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
753  ofile.close();
754 #endif
755 
756  ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
757 
758  //Sort the transform in descending order according to the intensities present in the transform
759  c_sorted_candidate.sortByIntensity();
760 
761 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
762  std::stringstream stream2;
763  stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
764  std::ofstream ofile2(stream2.str().c_str());
765  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
766  {
767  ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
768  }
769  ofile2.close();
770 #endif
771 
772  std::vector<UInt> processed(scan_size, 0);
773 
774  if (ampl_cutoff < 0)
775  {
776  threshold = 0;
777  }
778  else
779  {
780  c_av_intens = getAvIntens_(candidates);
781  c_sd_intens = getSdIntens_(candidates, c_av_intens);
782  threshold = ampl_cutoff * c_sd_intens + c_av_intens;
783  }
784 
785  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
786  {
787  if (iter->getIntensity() <= 0)
788  {
789  break;
790  }
791 
792  seed_mz = iter->getMZ();
793  seed_iter = ref.MZBegin(seed_mz);
794 
795  if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
796  {
797  continue;
798  }
799 
800  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
801  //Mark the region as processed
802  //Do not move this further down, since we have to mark this as processed in any case,
803  //even when score <=0; otherwise we would look around the maximum's position unless
804  //any significant point is found
805  iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
806  iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
807  if (iter_end == ref.end())
808  {
809  --iter_end;
810  }
811 
812  MZ_start = distance(ref.begin(), iter_start);
813  MZ_end = distance(ref.begin(), iter_end);
814 
815  memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
816 
817  c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
818 
819 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
820  if (trunc(seed_mz) == 874)
821  std::cout << seed_mz << "\t" << c_score << std::endl;
822 #endif
823 
824  if (c_score <= 0 && c_score != -1000)
825  {
826  continue;
827  }
828 
829  //Push the seed into its corresponding box (or create a new one, if necessary)
830  //Do ***NOT*** move this further down!
831  push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
832 
833  help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
834  iter2 = candidates.MZBegin(help_mz);
835  if (iter2 == candidates.end() || iter2 == candidates.begin())
836  {
837  continue;
838  }
839 
840  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
841  {
842  //In the other case, we are too close to the peak, leading to incorrect derivatives.
843  if (iter2 != candidates.end())
844  {
845  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
846  }
847  }
848 
849  help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
850  iter2 = candidates.MZBegin(help_mz);
851  if (iter2 == candidates.end() || iter2 == candidates.begin())
852  {
853  continue;
854  }
855 
856  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
857  {
858  //In the other case, we are too close to the peak, leading to incorrect derivatives.
859  if (iter2 != candidates.end())
860  {
861  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
862  }
863  }
864  }
865 
866  clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
867  }
868 
869  template <typename PeakType>
871  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
872  {
873  double c_score = 0, c_val;
874  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
875  Int signal_size((Int)candidate.size());
876  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
877  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
878 
879  //p_h_ind indicates if we are looking for a whole or a peak
880  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
881 
882  std::vector<double> positions(end);
883  for (Int i = 0; i < end; ++i)
884  {
885  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
886  }
887 
888  double l_score = 0, mid_val = 0;
889  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
890  for (Int v = 1; v <= end; ++v, ++p_h_ind)
891  {
892  do
893  {
894  if (start_index < signal_size - 1)
895  ++start_index;
896  else
897  break;
898  }
899  while (candidate[start_index].getMZ() < positions[v - 1]);
900 
901  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
902  {
903  continue;
904  }
905 
906  c_left_iter2 = candidate.begin() + start_index - 1;
907  c_right_iter2 = c_left_iter2 + 1;
908 
909  c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
910 
911  if (v == (int)(ceil(end / 2.)))
912  {
913  l_score = c_score;
914  mid_val = c_val;
915  }
916 
917  if (p_h_ind % 2 == 1) //I.e. a whole
918  {
919  c_score -= c_val;
920 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
921  if (trunc(seed_mz) == 874)
922  std::cout << -c_val << std::endl;
923 #endif
924  }
925  else
926  {
927  c_score += c_val;
928 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
929  if (trunc(seed_mz) == 874)
930  std::cout << c_val << std::endl;
931 #endif
932  }
933 
934 
935  start_index = distance(candidate.begin(), c_left_iter2);
936  }
937 
938 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
939  std::ofstream ofile_score("scores.dat", ios::app);
940  std::ofstream ofile_check_score("check_scores.dat", ios::app);
941  ofile_score.close();
942  ofile_check_score.close();
943 #endif
944 
945 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
946  if (trunc(seed_mz) == 874)
947  std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
948 #endif
949 
950  if (c_score - mid_val <= 0)
951  {
952  return 0;
953  }
954 
955  if (c_score - mid_val <= ampl_cutoff)
956  {
957  return -1000;
958  }
959 
960  if (l_score <= 0 || c_score - l_score - mid_val <= 0)
961  {
962  return 0;
963  }
964 
965  return c_score;
966  }
967 
968  template <typename PeakType>
970  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
971  {
972  double c_score = 0, c_val;
973  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
974  Int signal_size((Int)candidate.size());
975 
976  //p_h_ind indicates if we are looking for a whole or a peak
977  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
978 
979  std::vector<double> positions(end);
980  for (Int i = 0; i < end; ++i)
981  {
982  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
983  }
984 
985  double l_score = 0, mid_val = 0;
986  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
987  for (Int v = 1; v <= end; ++v, ++p_h_ind)
988  {
989  do
990  {
991  if (start_index < signal_size - 1)
992  ++start_index;
993  else
994  break;
995  }
996  while (candidate.getMZ(start_index) < positions[v - 1]);
997 
998  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
999  {
1000  continue;
1001  }
1002 
1003  c_left_iter2 = candidate.begin() + start_index - 1;
1004  c_right_iter2 = c_left_iter2 + 1;
1005 
1006  c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
1007  if (v == (int)(ceil(end / 2.)))
1008  {
1009  l_score = c_score;
1010  mid_val = c_val;
1011  }
1012 
1013  if (p_h_ind % 2 == 1) //I.e. a whole
1014  {
1015  c_score -= c_val;
1016  }
1017  else
1018  {
1019  c_score += c_val;
1020  }
1021 
1022  start_index = distance(candidate.begin(), c_left_iter2);
1023  }
1024 
1025 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1026  std::ofstream ofile_score("scores.dat", ios::app);
1027  std::ofstream ofile_check_score("check_scores.dat", ios::app);
1028  ofile_score << c_check_point << "\t" << c_score << std::endl;
1029  ofile_score.close();
1030  ofile_check_score.close();
1031 #endif
1032 
1033  if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1034  {
1035  return 0;
1036  }
1037 
1038  return c_score;
1039  }
1040 
1041  template <typename PeakType>
1043  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1044  {
1045  typename std::multimap<double, Box>::iterator iter;
1046  typename Box::iterator box_iter;
1047  std::vector<BoxElement> final_box;
1048  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1049  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1050 
1051  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1052  {
1053 
1054  Box& c_box = iter->second;
1055  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1056  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1057 
1058  //Now, let's get the RT boundaries for the box
1059  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1060  {
1061  if (box_iter->second.score == 0) //virtual helping point
1062  {
1063  if (count != 0)
1064  continue; //it is in any way not pure virtual
1065 
1066  c_mz = box_iter->second.mz;
1067  virtual_av_intens += box_iter->second.intens;
1068  virtual_av_abs_intens += fabs(box_iter->second.intens);
1069  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1070  ++virtual_count;
1071  }
1072  else
1073  {
1074  c_mz = box_iter->second.mz;
1075  av_score += box_iter->second.score;
1076  av_intens += box_iter->second.intens;
1077  av_abs_intens += fabs(box_iter->second.intens);
1078  av_mz += c_mz * fabs(box_iter->second.intens);
1079  ++count;
1080  }
1081  }
1082 
1083  if (count == 0) //pure virtual helping box
1084  {
1085  av_intens = virtual_av_intens / virtual_count;
1086  av_score = 0;
1087  av_mz = virtual_av_mz / virtual_av_abs_intens;
1088  }
1089  else
1090  {
1091  av_intens /= count;
1092  av_score /= count;
1093  av_mz /= av_abs_intens;
1094  }
1095 
1096  BoxElement c_box_element;
1097  c_box_element.mz = av_mz;
1098  c_box_element.c = c;
1099  c_box_element.score = av_score;
1100  c_box_element.intens = av_intens;
1101 
1102  c_box_element.RT = c_box.begin()->second.RT;
1103  final_box.push_back(c_box_element);
1104  }
1105 
1106  Size num_o_feature = final_box.size();
1107  if (num_o_feature == 0)
1108  {
1109  tmp_boxes_->at(c).clear();
1110  return;
1111  }
1112 
1113  //Computing the derivatives
1114  std::vector<double> bwd_diffs(num_o_feature, 0);
1115 
1116  bwd_diffs[0] = 0;
1117  for (Size i = 1; i < num_o_feature; ++i)
1118  {
1119  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1120  }
1121 
1122 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1123  std::ofstream ofile_bwd("bwd_cpu.dat");
1124  for (Size i = 0; i < num_o_feature; ++i)
1125  {
1126  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1127  }
1128  ofile_bwd.close();
1129 #endif
1130 
1131 
1132  for (Size i = 0; i < num_o_feature - 1; ++i)
1133  {
1134  while (i < num_o_feature - 2)
1135  {
1136  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1137  break;
1138  ++i;
1139  }
1140 
1141  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1142  {
1143  checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1144  continue;
1145  }
1146  }
1147 
1148  tmp_boxes_->at(c).clear();
1149  }
1150 
1151  template <typename PeakType>
1153  {
1154  double av_intens = 0;
1155  for (UInt i = 0; i < scan.size(); ++i)
1156  {
1157  if (scan[i].getIntensity() >= 0)
1158  {
1159  av_intens += scan[i].getIntensity();
1160  }
1161  }
1162  return av_intens / (double)scan.size();
1163  }
1164 
1165  template <typename PeakType>
1167  {
1168  double res = 0, intens;
1169  for (UInt i = 0; i < scan.size(); ++i)
1170  {
1171  if (scan[i].getIntensity() >= 0)
1172  {
1173  intens = scan[i].getIntensity();
1174  res += (intens - mean) * (intens - mean);
1175  }
1176  }
1177  return sqrt(res / (double)(scan.size() - 1));
1178  }
1179 
1180  template <typename PeakType>
1181  double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1182  {
1183  std::vector<double> diffs(scan.size() - 1, 0);
1184  for (UInt i = 0; i < scan.size() - 1; ++i)
1185  {
1186  diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1187  }
1188 
1189  sort(diffs.begin(), diffs.end());
1190  double av_MZ_spacing = 0;
1191  for (UInt i = 0; i < diffs.size() / 2; ++i)
1192  {
1193  av_MZ_spacing += diffs[i];
1194  }
1195 
1196  return av_MZ_spacing / (diffs.size() / 2);
1197  }
1198 
1199  template <typename PeakType>
1201  {
1202  double av_intens = 0;
1203  for (UInt i = 0; i < scan.size(); ++i)
1204  {
1205  if (scan.getTransIntensity(i) >= 0)
1206  {
1207  av_intens += scan.getTransIntensity(i);
1208  }
1209  }
1210  return av_intens / (double)scan.size();
1211  }
1212 
1213  template <typename PeakType>
1215  {
1216  double res = 0, intens;
1217  for (UInt i = 0; i < scan.size(); ++i)
1218  {
1219  if (scan.getTransIntensity(i) >= 0)
1220  {
1221  intens = scan.getTransIntensity(i);
1222  res += (intens - mean) * (intens - mean);
1223  }
1224  }
1225  return sqrt(res / (double)(scan.size() - 1));
1226  }
1227 
1228  template <typename PeakType>
1229  void IsotopeWaveletTransform<PeakType>::push2Box_(const double mz, const UInt scan, UInt c,
1230  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1231  {
1232  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1233 
1234  typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1235  typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1236 
1237  if (lower_iter != open_boxes_.end())
1238  {
1239  //Ugly, but necessary due to the implementation of STL lower_bound
1240  if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1241  {
1242  --lower_iter;
1243  }
1244  }
1245 
1246  typename std::multimap<double, Box>::iterator insert_iter;
1247  bool create_new_box = true;
1248  if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1249  {
1250  //There is another special case to be considered here:
1251  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1252  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1253  if (!open_boxes_.empty())
1254  {
1255  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1256  {
1257  create_new_box = false;
1258  insert_iter = lower_iter;
1259  }
1260  }
1261  else
1262  {
1263  create_new_box = true;
1264  }
1265  }
1266  else
1267  {
1268  if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1269  {
1270  insert_iter = lower_iter;
1271  create_new_box = false;
1272  }
1273  else
1274  {
1275  create_new_box = true;
1276  }
1277  }
1278 
1279 
1280  if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1281  {
1282  //Here is the question if you should figure out the smallest charge .... and then
1283 
1284  //Figure out which entry is closer to m/z
1285  double dist_lower = fabs(lower_iter->first - mz);
1286  double dist_upper = fabs(upper_iter->first - mz);
1287  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1288  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1289 
1290  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1291  {
1292  create_new_box = true;
1293  }
1294  else
1295  {
1296  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1297  create_new_box = false;
1298  }
1299  }
1300 
1301  BoxElement element;
1302  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1303  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1304 
1305 
1306  if (create_new_box == false)
1307  {
1308  std::pair<UInt, BoxElement> help2(scan, element);
1309  insert_iter->second.insert(help2);
1310 
1311  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1312  Box replacement(insert_iter->second);
1313 
1314  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1315  //Also note that we already inserted the new entry, leading to size-1.
1316  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1317  c_mz /= ((double) insert_iter->second.size());
1318 
1319  //Now let's remove the old and insert the new one
1320  open_boxes_.erase(insert_iter);
1321  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1322  open_boxes_.insert(help3);
1323  }
1324  else
1325  {
1326  std::pair<UInt, BoxElement> help2(scan, element);
1327  std::multimap<UInt, BoxElement> help3;
1328  help3.insert(help2);
1329  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1330  open_boxes_.insert(help4);
1331  }
1332  }
1333 
1334  template <typename PeakType>
1335  void IsotopeWaveletTransform<PeakType>::push2TmpBox_(const double mz, const UInt scan, UInt c,
1336  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1337  {
1338  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1339 
1340  std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1341  typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1342  typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1343 
1344  if (lower_iter != tmp_box.end())
1345  {
1346  //Ugly, but necessary due to the implementation of STL lower_bound
1347  if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1348  {
1349  --lower_iter;
1350  }
1351  }
1352 
1353  typename std::multimap<double, Box>::iterator insert_iter;
1354  bool create_new_box = true;
1355  if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1356  {
1357  //There is another special case to be considered here:
1358  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1359  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1360  if (!tmp_box.empty())
1361  {
1362  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1363  {
1364  create_new_box = false;
1365  insert_iter = lower_iter;
1366  }
1367  }
1368  else
1369  {
1370  create_new_box = true;
1371  }
1372  }
1373  else
1374  {
1375  if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1376  {
1377  insert_iter = lower_iter;
1378  create_new_box = false;
1379  }
1380  else
1381  {
1382  create_new_box = true;
1383  }
1384  }
1385 
1386 
1387  if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1388  {
1389  //Figure out which entry is closer to m/z
1390  double dist_lower = fabs(lower_iter->first - mz);
1391  double dist_upper = fabs(upper_iter->first - mz);
1392  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1393  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1394 
1395  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1396  {
1397  create_new_box = true;
1398  }
1399  else
1400  {
1401  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1402  create_new_box = false;
1403  }
1404  }
1405 
1406  BoxElement element;
1407  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1408  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1409 
1410  if (create_new_box == false)
1411  {
1412  std::pair<UInt, BoxElement> help2(scan, element);
1413  insert_iter->second.insert(help2);
1414 
1415  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1416  Box replacement(insert_iter->second);
1417 
1418  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1419  //Also note that we already inserted the new entry, leading to size-1.
1420  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1421  c_mz /= ((double) insert_iter->second.size());
1422 
1423  //Now let's remove the old and insert the new one
1424  tmp_box.erase(insert_iter);
1425  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1426  tmp_box.insert(help3);
1427  }
1428  else
1429  {
1430  std::pair<UInt, BoxElement> help2(scan, element);
1431  std::multimap<UInt, BoxElement> help3;
1432  help3.insert(help2);
1433 
1434  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1435  tmp_box.insert(help4);
1436  }
1437  }
1438 
1439  template <typename PeakType>
1440  void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1441  const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1442  {
1443  typename std::multimap<double, Box>::iterator iter, iter2;
1444 
1445  if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1446  {
1447  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1448  {
1449 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1450  std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1451  typename Box::iterator dings;
1452  for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1453  std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1454 #endif
1455  end_boxes_.insert(*iter);
1456  }
1457  open_boxes_.clear();
1458  return;
1459  }
1460 
1461  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1462  {
1463 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1464  if (front_bound > 0)
1465  {
1466  std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1467  }
1468 #endif
1469 
1470  //For each Box we need to figure out, if and when the last RT value has been inserted
1471  UInt lastScan = (--(iter->second.end()))->first;
1472  if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1473  {
1474  if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1475  {
1476  iter2 = iter;
1477  ++iter2;
1478 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1479  std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1480 #endif
1481  front_boxes_.insert(*iter);
1482  open_boxes_.erase(iter);
1483  iter = iter2;
1484  continue;
1485  }
1486 
1487  iter2 = iter;
1488  ++iter2;
1489  //Please do **NOT** simplify the upcoming lines.
1490  //The 'obvious' overhead is necessary since the object represented by iter might be erased
1491  //by push2Box which might be called by extendBox_.
1492  if (iter->second.size() >= RT_votes_cutoff)
1493  {
1494  //extendBox_ (map, iter->second);
1495  iter = iter2;
1496  closed_boxes_.insert(*(--iter));
1497  }
1498  open_boxes_.erase(iter);
1499  iter = iter2;
1500  }
1501  else
1502  {
1503  ++iter;
1504  }
1505  }
1506  }
1507 
1508  template <typename PeakType>
1510  {
1511 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1512  std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1513 #endif
1514 
1515  //Determining the elution profile
1516  typename Box::const_iterator iter;
1517  std::vector<double> elution_profile(box.size());
1518  UInt index = 0;
1519  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1520  {
1521  for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1522  {
1523  elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1524  }
1525  elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1526  }
1527 
1528  double max = 0;
1529  Int max_index = INT_MIN;
1530  for (Size i = 0; i < elution_profile.size(); ++i)
1531  {
1532  if (elution_profile[i] > max)
1533  {
1534  max_index = i;
1535  max = elution_profile[i];
1536  }
1537  }
1538 
1539  Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1540 
1541  double av_elution = 0;
1542  for (Size i = 0; i < elution_profile.size(); ++i)
1543  {
1544  av_elution += elution_profile[i];
1545  }
1546  av_elution /= (double)elution_profile.size();
1547 
1548  double sd_elution = 0;
1549  for (Size i = 0; i < elution_profile.size(); ++i)
1550  {
1551  sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1552  }
1553  sd_elution /= (double)(elution_profile.size() - 1);
1554  sd_elution = sqrt(sd_elution);
1555 
1556  //Determine average m/z monoisotopic pos
1557  double av_mz = 0;
1558  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1559  {
1560  av_mz += iter->second.mz;
1561 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1562  std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1563 #endif
1564  }
1565  av_mz /= (double)box.size();
1566 
1567 
1568  //Boundary check
1569  if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1570  {
1571  return;
1572  }
1573 
1574  UInt pre_index = box.begin()->second.RT_index - 1;
1575  typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1576  double pre_elution = 0;
1577 
1578  double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1579  double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1580 
1581  typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1582  for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1583  {
1584  pre_elution += mz_iter->getIntensity();
1585  }
1586 
1587 
1588  //Do we need to extend at all?
1589  if (pre_elution <= av_elution - 2 * sd_elution)
1590  {
1591  return;
1592  }
1593 
1594  Int c_index = max_extension;
1595  Int first_index = box.begin()->second.RT_index;
1596  for (Int i = 1; i < max_extension; ++i)
1597  {
1598  c_index = first_index - i;
1599  if (c_index < 0)
1600  {
1601  break;
1602  }
1603 
1604  //CHECK Majority vote for charge???????????????
1605 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1606  std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1607 #endif
1608 
1609  push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1610  map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1611  }
1612  }
1613 
1614  template <typename PeakType>
1616  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1617  {
1618  typename std::multimap<double, Box>::iterator iter;
1619  typename Box::iterator box_iter;
1620  std::vector<BoxElement> final_box;
1621  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1622  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1623 
1624  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1625  {
1626  Box& c_box = iter->second;
1627  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1628  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1629 
1630  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1631  {
1632  if (box_iter->second.score == 0) //virtual helping point
1633  {
1634  if (count != 0)
1635  continue; //it is in any way not pure virtual
1636 
1637  c_mz = box_iter->second.mz;
1638  virtual_av_intens += box_iter->second.intens;
1639  virtual_av_abs_intens += fabs(box_iter->second.intens);
1640  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1641  ++virtual_count;
1642  }
1643  else
1644  {
1645  c_mz = box_iter->second.mz;
1646  av_score += box_iter->second.score;
1647  av_intens += box_iter->second.intens;
1648  av_abs_intens += fabs(box_iter->second.intens);
1649  av_mz += c_mz * fabs(box_iter->second.intens);
1650  ++count;
1651  }
1652  }
1653 
1654  if (count == 0) //pure virtual helping box
1655  {
1656  av_intens = virtual_av_intens / virtual_count;
1657  av_score = 0;
1658  av_mz = virtual_av_mz / virtual_av_abs_intens;
1659  }
1660  else
1661  {
1662  av_intens /= count;
1663  av_score /= count;
1664  av_mz /= av_abs_intens;
1665  }
1666 
1667  BoxElement c_box_element;
1668  c_box_element.mz = av_mz;
1669  c_box_element.c = c;
1670  c_box_element.score = av_score;
1671  c_box_element.intens = av_intens;
1672 
1673  c_box_element.RT = c_box.begin()->second.RT;
1674 
1675  final_box.push_back(c_box_element);
1676  }
1677 
1678  UInt num_o_feature = final_box.size();
1679  if (num_o_feature == 0)
1680  {
1681  tmp_boxes_->at(c).clear();
1682  return;
1683  }
1684 
1685  //Computing the derivatives
1686  std::vector<double> bwd_diffs(num_o_feature, 0);
1687 
1688  bwd_diffs[0] = 0;
1689  for (UInt i = 1; i < num_o_feature; ++i)
1690  {
1691  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1692  }
1693 
1694 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1695  std::ofstream ofile_bwd("bwd_gpu.dat");
1696  for (UInt i = 0; i < num_o_feature; ++i)
1697  {
1698  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1699  }
1700  ofile_bwd.close();
1701 #endif
1702 
1703  for (UInt i = 0; i < num_o_feature - 1; ++i)
1704  {
1705  while (i < num_o_feature - 2)
1706  {
1707  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1708  break;
1709  ++i;
1710  }
1711 
1712  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1713  {
1714  checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1715  continue;
1716  }
1717  }
1718 
1719  tmp_boxes_->at(c).clear();
1720  }
1721 
1722  template <typename PeakType>
1724  {
1725  FeatureMap feature_map;
1726  typename std::multimap<double, Box>::iterator iter;
1727  typename Box::iterator box_iter;
1728  UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1729  double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1730  bool restart = false;
1731 
1732  for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1733  {
1734  sum_of_ref_intenses_g = 0;
1735  Box& c_box = iter->second;
1736  std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1737  restart = false;
1738 
1739  //Let's first determine the charge
1740  //Therefore, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1741  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1742  {
1743 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1744  if (trunc(box_iter->second.mz) == 874)
1745  std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1746 #endif
1747 
1748  if (box_iter->second.score == -1000)
1749  {
1750  restart = true;
1751  break;
1752  }
1753 
1754  charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1755  ++charge_binary_votes[box_iter->second.c];
1756  }
1757 
1758  if (restart)
1759  {
1760  continue;
1761  }
1762 
1763  //... determining the best fitting charge
1764  best_charge_index = 0; best_charge_score = 0;
1765  for (UInt i = 0; i < max_charge_; ++i)
1766  {
1767  if (charge_votes[i] > best_charge_score)
1768  {
1769  best_charge_index = i;
1770  best_charge_score = charge_votes[i];
1771  }
1772  }
1773 
1774  //Pattern found in too few RT scan
1775  if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1776  {
1777  continue;
1778  }
1779 
1780  c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1781 
1782  av_intens = 0; av_ref_intens = 0; av_score = 0; av_mz = 0; av_RT = 0;
1783  //Now, let's get the RT boundaries for the box
1784  std::vector<DPosition<2> > point_set;
1785  double sum_of_ref_intenses_l;
1786  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1787  {
1788  sum_of_ref_intenses_l = 0;
1789  c_mz = box_iter->second.mz;
1790  c_RT = box_iter->second.RT;
1791 
1792  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1793 
1794  point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1795  //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1796  point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1797 
1798 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1799  std::cout << "Intenstype: " << intenstype_ << std::endl;
1800 #endif
1801  if (intenstype_ == "ref")
1802  {
1803  //Find monoisotopic max
1804  const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1805  //'Correct' possible shift
1806  for (unsigned int i = 0; i < mz_cutoff; ++i)
1807  {
1808  typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1809 
1810  hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1811 
1812  while (h_iter != c_spec.begin())
1813  {
1814 
1815 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1816  if (trunc(c_mz) == 874)
1817  {
1818  std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1819  }
1820 #endif
1821 
1822  --h_iter;
1823  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1824  {
1825  hc_iter = h_iter;
1826  }
1827 
1828  if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1829  {
1830  break;
1831  }
1832  }
1833 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1834  if (trunc(c_mz) == 874)
1835  {
1836  std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1837  }
1838 #endif
1839  sum_of_ref_intenses_l += hc_iter->getIntensity();
1840 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1841  if (trunc(c_mz) == 874)
1842  {
1843  std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1844  }
1845 #endif
1846  }
1847  }
1848 
1849  if (best_charge_index == box_iter->second.c)
1850  {
1851  av_score += box_iter->second.score;
1852  av_intens += box_iter->second.intens;
1853  av_ref_intens += box_iter->second.ref_intens;
1854  sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1855  av_mz += c_mz * box_iter->second.intens;
1856  }
1857  av_RT += c_RT;
1858  }
1859 
1860  av_mz /= av_intens;
1861  av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1862  av_score /= (double)charge_binary_votes[best_charge_index];
1863  av_RT /= (double)c_box.size();
1864 
1865 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1866  if (trunc(av_mz) == 874)
1867  std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1868 #endif
1869 
1870  Feature c_feature;
1871  ConvexHull2D c_conv_hull;
1872  c_conv_hull.addPoints(point_set);
1873  c_feature.setCharge(c_charge);
1874  c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1875 
1876  //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1877  if (intenstype_ == "corrected")
1878  {
1879  double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1880  av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0.0, 2.0 * lambda);
1881  }
1882  if (intenstype_ == "ref")
1883  {
1884 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1885  if (trunc(c_mz) == 874)
1886  {
1887  std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1888  }
1889 #endif
1890 
1891  av_intens = sum_of_ref_intenses_g;
1892  }
1893 
1894  c_feature.setMZ(av_mz);
1895  c_feature.setIntensity(av_intens);
1896  c_feature.setRT(av_RT);
1897  c_feature.setOverallQuality(av_score);
1898  feature_map.push_back(c_feature);
1899  }
1900 
1901  return feature_map;
1902  }
1903 
1904  template <typename PeakType>
1906  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1907  {
1908  typename MSSpectrum::const_iterator iter, ref_iter;
1909  UInt peak_cutoff;
1910  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1911 
1912  iter = candidate.MZBegin(seed_mz);
1913  //we can ignore those cases
1914  if (iter == candidate.begin() || iter == candidate.end())
1915  {
1916  return false;
1917  }
1918 
1919  std::pair<double, double> reals;
1920  ref_iter = ref.MZBegin(seed_mz);
1921  //Correct the position
1922  double real_mz, real_intens;
1923  if (check_PPMs)
1924  {
1925  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1926  real_mz = reals.first; real_intens = reals.second;
1927  //if (real_mz <= 0 || real_intens <= 0)
1928  //{
1929  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1930  while (h_iter != ref.begin())
1931  {
1932  --h_iter;
1933  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1934  {
1935  hc_iter = h_iter;
1936  }
1937  else
1938  {
1939  break;
1940  }
1941 
1942  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1943  {
1944  return false;
1945  }
1946  }
1947  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1948  real_mz = reals.first; real_intens = reals.second;
1949  if (real_mz <= 0 || real_intens <= 0)
1950  {
1951  return false;
1952  }
1953  real_mz = h_iter->getMZ();
1954  real_intens = h_iter->getIntensity();
1955  //}
1956  }
1957  else
1958  {
1959  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
1960  real_mz = reals.first; real_intens = reals.second;
1961 
1962  if (real_mz <= 0 || real_intens <= 0)
1963  {
1964  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1965  while (h_iter != ref.begin())
1966  {
1967  --h_iter;
1968  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1969  {
1970  hc_iter = h_iter;
1971  }
1972  else
1973  {
1974  break;
1975  }
1976 
1977  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1978  {
1979  return false;
1980  }
1981  }
1982  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
1983  if (real_mz <= 0 || real_intens <= 0)
1984  {
1985  return false;
1986  }
1987  real_mz = h_iter->getMZ();
1988  real_intens = h_iter->getIntensity();
1989  }
1990  }
1991 
1992  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
1993 
1994  if (c_score <= 0)
1995  {
1996  return false;
1997  }
1998 
1999  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2000  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2001  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2002  if (real_r_MZ_iter == ref.end())
2003  {
2004  --real_r_MZ_iter;
2005  }
2006 
2007 
2008  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2009  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2010 
2011  if (prev_score == -1000)
2012  {
2013  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2014  }
2015  else
2016  {
2017  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2018  }
2019  return true;
2020  }
2021 
2022  template <typename PeakType>
2024  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
2025  {
2026  typename MSSpectrum::const_iterator iter, ref_iter;
2027  UInt peak_cutoff;
2028  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2029 
2030  iter = candidate.MZBegin(seed_mz);
2031  //we can ignore those cases
2032  if (iter == candidate.begin() || iter == candidate.end())
2033  {
2034  return false;
2035  }
2036 
2037  std::pair<double, double> reals;
2038  ref_iter = ref.MZBegin(seed_mz);
2039  //Correct the position
2040  double real_mz, real_intens;
2041  if (check_PPMs)
2042  {
2043  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2044  real_mz = reals.first; real_intens = reals.second;
2045  //if (real_mz <= 0 || real_intens <= 0)
2046  //{
2047  auto h_iter = ref_iter, hc_iter = ref_iter;
2048  while (h_iter != ref.begin())
2049  {
2050  --h_iter;
2051  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2052  {
2053  hc_iter = h_iter;
2054  }
2055  else
2056  {
2057  break;
2058  }
2059 
2060  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2061  {
2062  return false;
2063  }
2064  }
2065  ++h_iter;
2066  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2067  real_mz = reals.first; real_intens = reals.second;
2068 
2069 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2070  std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2071 #endif
2072 
2073  if (real_mz <= 0 || real_intens <= 0)
2074  {
2075  return false;
2076  }
2077  real_mz = h_iter->getMZ();
2078  real_intens = h_iter->getIntensity();
2079  //}
2080  }
2081  else
2082  {
2083  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2084  real_mz = reals.first; real_intens = reals.second;
2085 
2086  if (real_mz <= 0 || real_intens <= 0)
2087  {
2088  auto h_iter = ref_iter, hc_iter = ref_iter;
2089  while (h_iter != ref.begin())
2090  {
2091  --h_iter;
2092  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2093  {
2094  hc_iter = h_iter;
2095  }
2096  else
2097  {
2098  break;
2099  }
2100 
2101  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2102  {
2103  return false;
2104  }
2105  }
2106  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
2107  if (real_mz <= 0 || real_intens <= 0)
2108  {
2109  return false;
2110  }
2111  real_mz = h_iter->getMZ();
2112  real_intens = h_iter->getIntensity();
2113  }
2114  }
2115 
2116  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2117 
2118  if (c_score <= 0)
2119  {
2120  return false;
2121  }
2122 
2123  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2124  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2125  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2126  if (real_r_MZ_iter == ref.end())
2127  {
2128  --real_r_MZ_iter;
2129  }
2130 
2131 
2132  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2133  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2134 
2135  if (prev_score == -1000)
2136  {
2137  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2138  }
2139  else
2140  {
2141  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2142  }
2143 
2144  return true;
2145  }
2146 
2147  template <typename PeakType>
2148  std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2149  {
2150  double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2151  double ppms = getPPMs_(peptideMassRule_(mass), mass);
2153  {
2154 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2155  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2156 #endif
2157  return std::pair<double, double>(-1, -1);
2158  }
2159 
2160 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2161  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2162 #endif
2163  return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2164  }
2165 
2166 } //namespace
2167 
2168 #pragma clang diagnostic pop
2169 
2170 
void setCharge(const ChargeType &ch)
Set charge state.
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:222
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:70
Iterator begin()
See std::vector documentation.
Definition: ConstRefVector.h:397
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:719
Iterator end()
See std::vector documentation.
Definition: ConstRefVector.h:403
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling.
Definition: ConvexHull2D.h:73
void addPoints(const PointArrayType &points)
A container for features.
Definition: FeatureMap.h:106
An LC-MS feature.
Definition: Feature.h:72
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
void setOverallQuality(QualityType q)
Set the overall quality.
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:97
double getRT() const
Definition: IsotopeWaveletTransform.h:130
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:172
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:109
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:142
TransSpectrum()
Definition: IsotopeWaveletTransform.h:103
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:193
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:136
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:116
std::vector< float > * trans_intens_
The intensities of the transform.
Definition: IsotopeWaveletTransform.h:208
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:154
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:166
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:186
Size size() const
Definition: IsotopeWaveletTransform.h:160
virtual void destroy()
Definition: IsotopeWaveletTransform.h:121
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:148
const MSSpectrum * reference_
The reference spectrum.
Definition: IsotopeWaveletTransform.h:207
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:179
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:200
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:72
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1615
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:559
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:485
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:482
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:493
UInt data_length_
Definition: IsotopeWaveletTransform.h:486
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one.
Definition: IsotopeWaveletTransform.h:1229
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:482
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:472
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:478
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:482
UInt max_charge_
Definition: IsotopeWaveletTransform.h:486
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:489
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2148
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:319
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:478
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score.
Definition: IsotopeWaveletTransform.h:969
double score
The associated score.
Definition: IsotopeWaveletTransform.h:81
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:521
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:482
double mz
The monoisotopic position.
Definition: IsotopeWaveletTransform.h:79
double sigma_
Definition: IsotopeWaveletTransform.h:481
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1440
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:493
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:478
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:479
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:490
UInt MZ_begin
Index.
Definition: IsotopeWaveletTransform.h:86
double ref_intens
Definition: IsotopeWaveletTransform.h:83
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:483
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:297
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:278
UInt c
Note, this is not the charge (it is charge-1!!!)
Definition: IsotopeWaveletTransform.h:80
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1214
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:481
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:565
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass.
Definition: IsotopeWaveletTransform.h:449
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:489
String intenstype_
Definition: IsotopeWaveletTransform.h:488
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:483
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:492
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1335
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1200
double RT
The elution time (not the scan index)
Definition: IsotopeWaveletTransform.h:84
void extendBox_(const PeakMap &map, const Box &box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1509
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1723
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:698
double getSigma() const
Definition: IsotopeWaveletTransform.h:302
UInt RT_index
The elution time (map) index.
Definition: IsotopeWaveletTransform.h:85
double intens
The transformed intensity at the monoisotopic mass.
Definition: IsotopeWaveletTransform.h:82
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:478
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:286
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:646
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1181
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:608
UInt MZ_end
Index.
Definition: IsotopeWaveletTransform.h:87
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:307
double min_spacing_
Definition: IsotopeWaveletTransform.h:492
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:2023
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:486
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:314
bool hr_data_
Definition: IsotopeWaveletTransform.h:487
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:688
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:90
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:482
Internally used data structure.
Definition: IsotopeWaveletTransform.h:78
static UInt getNumPeakCutOff(const double mass, const UInt z)
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually,...
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
Iterator begin()
Definition: MSExperiment.h:150
Size size() const
Definition: MSExperiment.h:120
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
double getRT() const
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:55
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
IntensityType getIntensity() const
Definition: Peak2D.h:166
void setIntensity(IntensityType intensity)
Sets data point intensity (height)
Definition: Peak2D.h:172
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:120
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:68
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:49
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:54
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:43
const double c
Definition: Constants.h:209
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:51
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:50
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:55
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:56
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:497
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:509
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:515
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:503