OpenMS  2.4.0
IsotopeWaveletTransform.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
46 #include <cmath>
47 #include <cmath>
48 #include <boost/math/special_functions/bessel.hpp>
49 #include <vector>
50 #include <map>
51 #include <sstream>
52 #include <fstream>
53 #include <iomanip>
54 
55 // This code has quite a few strange things in it triggering warnings which
56 // clutters the rest of the diagnostics
57 #pragma clang diagnostic push
58 #pragma clang diagnostic ignored "-Wfloat-equal"
59 #pragma clang diagnostic ignored "-Wconversion"
60 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
61 
62 namespace OpenMS
63 {
64 
68  template <typename PeakType>
70  {
71 public:
72 
73 
75  struct BoxElement
76  {
77  double mz;
78  UInt c;
79  double score;
80  double intens;
81  double ref_intens;
82  double RT;
86  };
87 
88  typedef std::multimap<UInt, BoxElement> Box;
89 
90 
95  {
97 
98 public:
99 
102  reference_(nullptr), trans_intens_(nullptr)
103  {
104  }
105 
107  TransSpectrum(const MSSpectrum* reference) :
108  reference_(reference)
109  {
110  trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
111  }
112 
114  virtual ~TransSpectrum()
115  {
116  delete (trans_intens_);
117  }
118 
119  virtual void destroy()
120  {
121  delete (trans_intens_);
122  trans_intens_ = NULL;
123  delete (reference_);
124  reference_ = NULL;
125  }
126 
128  inline double getRT() const
129  {
130  return reference_->getRT();
131  }
132 
134  inline double getMZ(const UInt i) const
135  {
136  return (*reference_)[i].getMZ();
137  }
138 
140  inline double getRefIntensity(const UInt i) const
141  {
142  return (*reference_)[i].getIntensity();
143  }
144 
146  inline double getTransIntensity(const UInt i) const
147  {
148  return (*trans_intens_)[i];
149  }
150 
152  inline void setTransIntensity(const UInt i, const double intens)
153  {
154  (*trans_intens_)[i] = intens;
155  }
156 
158  inline Size size() const
159  {
160  return trans_intens_->size();
161  }
162 
164  inline const MSSpectrum* getRefSpectrum()
165  {
166  return reference_;
167  }
168 
170  inline const MSSpectrum* getRefSpectrum() const
171  {
172  return reference_;
173  }
174 
177  inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
178  {
179  return reference_->MZBegin(mz);
180  }
181 
184  inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
185  {
186  return reference_->MZEnd(mz);
187  }
188 
191  inline typename MSSpectrum::const_iterator end() const
192  {
193  return reference_->end();
194  }
195 
198  inline typename MSSpectrum::const_iterator begin() const
199  {
200  return reference_->begin();
201  }
202 
203 protected:
204 
206  std::vector<float>* trans_intens_;
207 
208  };
209 
210 
211 
217  IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
218 
220  virtual ~IsotopeWaveletTransform();
221 
222 
227  virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
228 
233  virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
234 
254  virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
255  const double ampl_cutoff, const bool check_PPMs);
256 
257  virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
258 
259 #ifdef OPENMS_HAS_CUDA
260 
262  virtual int initializeScanCuda(const MSSpectrum& scan, const UInt c = 0);
263 
265  virtual void finalizeScanCuda();
266 
270  virtual void getTransformCuda(TransSpectrum& c_trans, const UInt c);
271 
287  virtual void identifyChargeCuda(const TransSpectrum& candidates, const UInt scan_index, const UInt c,
288  const double ampl_cutoff, const bool check_PPMs);
289 
292  virtual int sortCuda(MSSpectrum& sorted);
293 #endif
294 
295 
302  void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
303  const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
304 
305 
306  void mergeFeatures(IsotopeWaveletTransform<PeakType>* later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff);
307 
308 
313  FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
314 
316  virtual std::multimap<double, Box> getClosedBoxes()
317  { return closed_boxes_; }
318 
319 
324  inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
325  {
326  return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
327  }
328 
335  inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
336  {
337  return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
338  }
339 
340  inline double getSigma() const
341  {
342  return sigma_;
343  }
344 
345  inline void setSigma(const double sigma)
346  {
347  sigma_ = sigma;
348  }
349 
350  virtual void computeMinSpacing(const MSSpectrum& c_ref);
351 
352  inline double getMinSpacing() const
353  {
354  return min_spacing_;
355  }
356 
357  inline Size getMaxScanSize() const
358  {
359  return max_scan_size_;
360  }
361 
362 protected:
363 
364 
368 
369 
370  inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
371 
372 
379  virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
380  const double seed_mz, const UInt c, const double ampl_cutoff);
381 
388  virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
389  const double seed_mz, const UInt c, const double ampl_cutoff);
390 
391 
399  virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
400  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
401 
409  virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
410  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
411 
412  virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
413 
414 
416  inline double getAvIntens_(const TransSpectrum& scan);
418  inline double getAvIntens_(const MSSpectrum& scan);
419 
421  inline double getSdIntens_(const TransSpectrum& scan, const double mean);
423  inline double getSdIntens_(const MSSpectrum& scan, const double mean);
424 
435  virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
436  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
437 
453  virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
454  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
455 
461  inline double getAvMZSpacing_(const MSSpectrum& scan);
462 
463 
468  void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
469  const UInt scan_index, const UInt c, const bool check_PPMs);
470 
475  virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
476  const UInt scan_index, const UInt c, const bool check_PPMs);
477 
478 
483  void extendBox_(const PeakMap& map, const Box& box);
484 
487  inline double peptideMassRule_(const double c_mass) const
488  {
489  double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
490  double old_frac_mass = c_mass - (Int)(c_mass);
491  double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
492  double new_frac_mass = new_mass - (Int)(new_mass);
493 
494  if (new_frac_mass - old_frac_mass > 0.5)
495  {
496  new_mass -= 1.;
497  }
498 
499  if (new_frac_mass - old_frac_mass < -0.5)
500  {
501  new_mass += 1.;
502  }
503 
504  return new_mass;
505  }
506 
510  inline double getPPMs_(const double mass_a, const double mass_b) const
511  {
512  return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
513  }
514 
515  //internally used data structures for the sweep line algorithm
516  std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
517  std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
518 
520  std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
521  std::vector<double> interpol_xs_, interpol_ys_;
522 
525  bool hr_data_;
528  std::vector<int> indices_;
529 
531  std::vector<float> scores_, zeros_;
532  };
533 
534  template <typename PeakType>
535  bool intensityComparator(const PeakType& a, const PeakType& b)
536  {
537  return a.getIntensity() > b.getIntensity();
538  }
539 
540  template <typename PeakType>
542  {
543  return a.getIntensity() < b.getIntensity();
544  }
545 
546  template <typename PeakType>
548  {
549  return a->getIntensity() > b->getIntensity();
550  }
551 
552  template <typename PeakType>
553  bool positionComparator(const PeakType& a, const PeakType& b)
554  {
555  return a.getMZ() < b.getMZ();
556  }
557 
558  template <typename PeakType>
560  {
561  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
562  av_MZ_spacing_ = 1;
563  max_scan_size_ = 0;
564  max_mz_cutoff_ = 3;
565  max_num_peaks_per_pattern_ = 3;
566  hr_data_ = false;
567  intenstype_ = "ref";
568  }
569 
570  template <typename PeakType>
571  IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
572  {
573  max_charge_ = max_charge;
574  max_scan_size_ = max_scan_size;
575  hr_data_ = hr_data;
576  intenstype_ = intenstype;
577  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
578  if (max_scan_size <= 0) //only important for the CPU
579  {
580  IsotopeWavelet::init(max_mz, max_charge);
581  }
582 
583  av_MZ_spacing_ = 1;
584  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
585  max_num_peaks_per_pattern_ = IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
586 
587  Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
588  Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
589  psi_.reserve(to_reserve); //The wavelet
590  prod_.reserve(to_reserve);
591  xs_.reserve(to_reserve);
594  }
595 
596  template <typename PeakType>
598  {
599  delete (tmp_boxes_);
600  }
601 
602  template <typename PeakType>
604  {
605  Int spec_size((Int)c_ref.size());
606  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
607  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
608  UInt charge = c + 1;
609  double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
610 
611  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
612  {
613  value = 0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
614  old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
615  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
616  c_diff = 0;
617  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
618 
619  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
620  {
621  if (current_conv_pos >= spec_size)
622  {
623  value += 0.5 * old * min_spacing_;
624  break;
625  }
626 
627  c_mz = c_ref[current_conv_pos].getMZ();
628  c_diff = c_mz + origin;
629 
630  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
631  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
632 
633  value += 0.5 * (current + old) * (c_mz - old_pos);
634 
635  old = current;
636  old_pos = c_mz;
637  }
638 
639 
640 
641  c_trans[my_local_pos].setIntensity(value);
642  }
643  }
644 
645  template <typename PeakType>
647  {
648  Int spec_size((Int)c_ref.size());
649  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
650  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
651  UInt charge = c + 1;
652  double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
653 
654  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
655  {
656  value = 0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
657 
658 
659  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
660  c_diff = 0;
661  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
662 
663  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
664  {
665  if (current_conv_pos >= spec_size)
666  {
667  break;
668  }
669 
670  c_mz = c_ref[current_conv_pos].getMZ();
671  c_diff = c_mz + origin;
672 
673  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
674  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
675 
676  value += current;
677  }
678 
679  c_trans[my_local_pos].setIntensity(value);
680  }
681  }
682 
683  template <typename PeakType>
685  {
686  data_length_ = (UInt) c_ref.size();
687  computeMinSpacing(c_ref);
688  Int wavelet_length = 0, quarter_length = 0;
689 
690  if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
691  {
692  UInt c_mz_cutoff;
693  typename MSSpectrum::const_iterator start_iter, end_iter;
694  for (UInt i = 0; i < data_length_; ++i)
695  {
696  c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
697  start_iter = c_ref.MZEnd(c_ref[i].getMZ());
698  end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
699  wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
700  end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
701  quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
702  }
703  }
704  else
705  {
706  //CHANGED
707  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_ - 1].getMZ(), max_charge_);
708  wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
709  }
710  //... done
711 
712 
713  if (wavelet_length > (Int) c_ref.size())
714  {
715  std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
716  std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
717  std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
718  }
719 
720  Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS / min_spacing_);
721  from_max_to_left_ = max_index;
722  from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
723  }
724 
725  template <typename PeakType>
727  {
728  min_spacing_ = INT_MAX;
729  for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
730  {
731  min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
732  }
733  }
734 
735  template <typename PeakType>
737  const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
738  {
739  Size scan_size(candidates.size());
741  typename MSSpectrum::const_iterator iter_start, iter_end, iter_p, seed_iter, iter2;
742  double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
743  UInt MZ_start, MZ_end;
744 
745  MSSpectrum diffed(candidates);
746  diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
747 
748 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
749  std::stringstream stream;
750  stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
751  std::ofstream ofile(stream.str().c_str());
752 #endif
753 
754  if (!hr_data_) //LowRes data
755  {
756  for (UInt i = 0; i < scan_size - 2; ++i)
757  {
758  share = candidates[i + 1].getIntensity(), share_pos = candidates[i + 1].getMZ();
759  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
760  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
761 
762  if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
763  {
764  diffed[i + 1].setIntensity(0);
765  }
766 
767 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
768  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
769 #endif
770  }
771  }
772  else //HighRes data
773  {
774  for (UInt i = 0; i < scan_size - 2; ++i)
775  {
776  share = candidates[i + 1].getIntensity(), share_pos = candidates[i + 1].getMZ();
777  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
778  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
779 
780  if (!(bwd >= 0 && fwd <= 0))
781  {
782  diffed[i + 1].setIntensity(0);
783  }
784 
785 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
786  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
787 #endif
788  }
789  }
790 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
791  ofile.close();
792 #endif
793 
794  ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
795 
796  //Sort the transform in descending order according to the intensities present in the transform
797  c_sorted_candidate.sortByIntensity();
798 
799 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
800  std::stringstream stream2;
801  stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
802  std::ofstream ofile2(stream2.str().c_str());
803  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
804  {
805  ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
806  }
807  ofile2.close();
808 #endif
809 
810  std::vector<UInt> processed(scan_size, 0);
811 
812  if (ampl_cutoff < 0)
813  {
814  threshold = 0;
815  }
816  else
817  {
818  c_av_intens = getAvIntens_(candidates);
819  c_sd_intens = getSdIntens_(candidates, c_av_intens);
820  threshold = ampl_cutoff * c_sd_intens + c_av_intens;
821  }
822 
823  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
824  {
825  if (iter->getIntensity() <= 0)
826  {
827  break;
828  }
829 
830  seed_mz = iter->getMZ();
831  seed_iter = ref.MZBegin(seed_mz);
832 
833  if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
834  {
835  continue;
836  }
837 
838  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
839  //Mark the region as processed
840  //Do not move this further down, since we have to mark this as processed in any case,
841  //even when score <=0; otherwise we would look around the maximum's position unless
842  //any significant point is found
843  iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
844  iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
845  if (iter_end == ref.end())
846  {
847  --iter_end;
848  }
849 
850  MZ_start = distance(ref.begin(), iter_start);
851  MZ_end = distance(ref.begin(), iter_end);
852 
853  memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
854 
855  c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
856 
857 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
858  if (trunc(seed_mz) == 874)
859  std::cout << seed_mz << "\t" << c_score << std::endl;
860 #endif
861 
862  if (c_score <= 0 && c_score != -1000)
863  {
864  continue;
865  }
866 
867  //Push the seed into its corresponding box (or create a new one, if necessary)
868  //Do ***NOT*** move this further down!
869  push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
870 
871  help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
872  iter2 = candidates.MZBegin(help_mz);
873  if (iter2 == candidates.end() || iter2 == candidates.begin())
874  {
875  continue;
876  }
877 
878  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
879  {
880  //In the other case, we are too close to the peak, leading to incorrect derivatives.
881  if (iter2 != candidates.end())
882  {
883  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
884  }
885  }
886 
887  help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
888  iter2 = candidates.MZBegin(help_mz);
889  if (iter2 == candidates.end() || iter2 == candidates.begin())
890  {
891  continue;
892  }
893 
894  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
895  {
896  //In the other case, we are too close to the peak, leading to incorrect derivatives.
897  if (iter2 != candidates.end())
898  {
899  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
900  }
901  }
902  }
903 
904  clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
905  }
906 
907  template <typename PeakType>
909  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
910  {
911  double c_score = 0, c_val;
912  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
913  Int signal_size((Int)candidate.size());
914  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
915  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
916 
917  //p_h_ind indicates if we are looking for a whole or a peak
918  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
919 
920  std::vector<double> positions(end);
921  for (Int i = 0; i < end; ++i)
922  {
923  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
924  }
925 
926  double l_score = 0, mid_val = 0;
927  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
928  for (Int v = 1; v <= end; ++v, ++p_h_ind)
929  {
930  do
931  {
932  if (start_index < signal_size - 1)
933  ++start_index;
934  else
935  break;
936  }
937  while (candidate[start_index].getMZ() < positions[v - 1]);
938 
939  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
940  {
941  continue;
942  }
943 
944  c_left_iter2 = candidate.begin() + start_index - 1;
945  c_right_iter2 = c_left_iter2 + 1;
946 
947  c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
948 
949  if (v == (int)(ceil(end / 2.)))
950  {
951  l_score = c_score;
952  mid_val = c_val;
953  }
954 
955  if (p_h_ind % 2 == 1) //I.e. a whole
956  {
957  c_score -= c_val;
958 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
959  if (trunc(seed_mz) == 874)
960  std::cout << -c_val << std::endl;
961 #endif
962  }
963  else
964  {
965  c_score += c_val;
966 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
967  if (trunc(seed_mz) == 874)
968  std::cout << c_val << std::endl;
969 #endif
970  }
971 
972 
973  start_index = distance(candidate.begin(), c_left_iter2);
974  }
975 
976 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
977  std::ofstream ofile_score("scores.dat", ios::app);
978  std::ofstream ofile_check_score("check_scores.dat", ios::app);
979  ofile_score.close();
980  ofile_check_score.close();
981 #endif
982 
983 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
984  if (trunc(seed_mz) == 874)
985  std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
986 #endif
987 
988  if (c_score - mid_val <= 0)
989  {
990  return 0;
991  }
992 
993  if (c_score - mid_val <= ampl_cutoff)
994  {
995  return -1000;
996  }
997 
998  if (l_score <= 0 || c_score - l_score - mid_val <= 0)
999  {
1000  return 0;
1001  }
1002 
1003  return c_score;
1004  }
1005 
1006  template <typename PeakType>
1008  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
1009  {
1010  double c_score = 0, c_val;
1011  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
1012  Int signal_size((Int)candidate.size());
1013 
1014  //p_h_ind indicates if we are looking for a whole or a peak
1015  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
1016 
1017  std::vector<double> positions(end);
1018  for (Int i = 0; i < end; ++i)
1019  {
1020  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
1021  }
1022 
1023  double l_score = 0, mid_val = 0;
1024  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
1025  for (Int v = 1; v <= end; ++v, ++p_h_ind)
1026  {
1027  do
1028  {
1029  if (start_index < signal_size - 1)
1030  ++start_index;
1031  else
1032  break;
1033  }
1034  while (candidate.getMZ(start_index) < positions[v - 1]);
1035 
1036  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
1037  {
1038  continue;
1039  }
1040 
1041  c_left_iter2 = candidate.begin() + start_index - 1;
1042  c_right_iter2 = c_left_iter2 + 1;
1043 
1044  c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
1045  if (v == (int)(ceil(end / 2.)))
1046  {
1047  l_score = c_score;
1048  mid_val = c_val;
1049  }
1050 
1051  if (p_h_ind % 2 == 1) //I.e. a whole
1052  {
1053  c_score -= c_val;
1054  }
1055  else
1056  {
1057  c_score += c_val;
1058  }
1059 
1060  start_index = distance(candidate.begin(), c_left_iter2);
1061  }
1062 
1063 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1064  std::ofstream ofile_score("scores.dat", ios::app);
1065  std::ofstream ofile_check_score("check_scores.dat", ios::app);
1066  ofile_score << c_check_point << "\t" << c_score << std::endl;
1067  ofile_score.close();
1068  ofile_check_score.close();
1069 #endif
1070 
1071  if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1072  {
1073  return 0;
1074  }
1075 
1076  return c_score;
1077  }
1078 
1079  template <typename PeakType>
1081  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1082  {
1083  typename std::multimap<double, Box>::iterator iter;
1084  typename Box::iterator box_iter;
1085  std::vector<BoxElement> final_box;
1086  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1087  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1088 
1089  typename std::pair<double, double> c_extend;
1090  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1091  {
1092 
1093  Box& c_box = iter->second;
1094  av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1095  virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1096 
1097  //Now, let's get the RT boundaries for the box
1098  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1099  {
1100  if (box_iter->second.score == 0) //virtual helping point
1101  {
1102  if (count != 0)
1103  continue; //it is in any way not pure virtual
1104 
1105  c_mz = box_iter->second.mz;
1106  virtual_av_intens += box_iter->second.intens;
1107  virtual_av_abs_intens += fabs(box_iter->second.intens);
1108  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1109  ++virtual_count;
1110  }
1111  else
1112  {
1113  c_mz = box_iter->second.mz;
1114  av_score += box_iter->second.score;
1115  av_intens += box_iter->second.intens;
1116  av_abs_intens += fabs(box_iter->second.intens);
1117  av_mz += c_mz * fabs(box_iter->second.intens);
1118  ++count;
1119  }
1120  }
1121 
1122  if (count == 0) //pure virtual helping box
1123  {
1124  av_intens = virtual_av_intens / virtual_count;
1125  av_score = 0;
1126  av_mz = virtual_av_mz / virtual_av_abs_intens;
1127  }
1128  else
1129  {
1130  av_intens /= count;
1131  av_score /= count;
1132  av_mz /= av_abs_intens;
1133  }
1134 
1135  BoxElement c_box_element;
1136  c_box_element.mz = av_mz;
1137  c_box_element.c = c;
1138  c_box_element.score = av_score;
1139  c_box_element.intens = av_intens;
1140 
1141  c_box_element.RT = c_box.begin()->second.RT;
1142  final_box.push_back(c_box_element);
1143  }
1144 
1145  Size num_o_feature = final_box.size();
1146  if (num_o_feature == 0)
1147  {
1148  tmp_boxes_->at(c).clear();
1149  return;
1150  }
1151 
1152  //Computing the derivatives
1153  std::vector<double> bwd_diffs(num_o_feature, 0);
1154 
1155  bwd_diffs[0] = 0;
1156  for (Size i = 1; i < num_o_feature; ++i)
1157  {
1158  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1159  }
1160 
1161 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1162  std::ofstream ofile_bwd("bwd_cpu.dat");
1163  for (Size i = 0; i < num_o_feature; ++i)
1164  {
1165  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1166  }
1167  ofile_bwd.close();
1168 #endif
1169 
1170 
1171  for (Size i = 0; i < num_o_feature - 1; ++i)
1172  {
1173  while (i < num_o_feature - 2)
1174  {
1175  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1176  break;
1177  ++i;
1178  }
1179 
1180  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1181  {
1182  checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1183  continue;
1184  }
1185  }
1186 
1187  tmp_boxes_->at(c).clear();
1188  }
1189 
1190  template <typename PeakType>
1192  {
1193  double av_intens = 0;
1194  for (UInt i = 0; i < scan.size(); ++i)
1195  {
1196  if (scan[i].getIntensity() >= 0)
1197  {
1198  av_intens += scan[i].getIntensity();
1199  }
1200  }
1201  return av_intens / (double)scan.size();
1202  }
1203 
1204  template <typename PeakType>
1206  {
1207  double res = 0, intens;
1208  for (UInt i = 0; i < scan.size(); ++i)
1209  {
1210  if (scan[i].getIntensity() >= 0)
1211  {
1212  intens = scan[i].getIntensity();
1213  res += (intens - mean) * (intens - mean);
1214  }
1215  }
1216  return sqrt(res / (double)(scan.size() - 1));
1217  }
1218 
1219  template <typename PeakType>
1220  double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1221  {
1222  std::vector<double> diffs(scan.size() - 1, 0);
1223  for (UInt i = 0; i < scan.size() - 1; ++i)
1224  {
1225  diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1226  }
1227 
1228  sort(diffs.begin(), diffs.end());
1229  double av_MZ_spacing = 0;
1230  for (UInt i = 0; i < diffs.size() / 2; ++i)
1231  {
1232  av_MZ_spacing += diffs[i];
1233  }
1234 
1235  return av_MZ_spacing / (diffs.size() / 2);
1236  }
1237 
1238  template <typename PeakType>
1240  {
1241  double av_intens = 0;
1242  for (UInt i = 0; i < scan.size(); ++i)
1243  {
1244  if (scan.getTransIntensity(i) >= 0)
1245  {
1246  av_intens += scan.getTransIntensity(i);
1247  }
1248  }
1249  return av_intens / (double)scan.size();
1250  }
1251 
1252  template <typename PeakType>
1254  {
1255  double res = 0, intens;
1256  for (UInt i = 0; i < scan.size(); ++i)
1257  {
1258  if (scan.getTransIntensity(i) >= 0)
1259  {
1260  intens = scan.getTransIntensity(i);
1261  res += (intens - mean) * (intens - mean);
1262  }
1263  }
1264  return sqrt(res / (double)(scan.size() - 1));
1265  }
1266 
1267  template <typename PeakType>
1268  void IsotopeWaveletTransform<PeakType>::push2Box_(const double mz, const UInt scan, UInt c,
1269  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1270  {
1271  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1272 
1273  typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1274  typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1275 
1276  if (lower_iter != open_boxes_.end())
1277  {
1278  //Ugly, but necessary due to the implementation of STL lower_bound
1279  if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1280  {
1281  --lower_iter;
1282  }
1283  }
1284 
1285  typename std::multimap<double, Box>::iterator insert_iter;
1286  bool create_new_box = true;
1287  if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1288  {
1289  //There is another special case to be considered here:
1290  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1291  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1292  if (!open_boxes_.empty())
1293  {
1294  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1295  {
1296  create_new_box = false;
1297  insert_iter = lower_iter;
1298  }
1299  }
1300  else
1301  {
1302  create_new_box = true;
1303  }
1304  }
1305  else
1306  {
1307  if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1308  {
1309  insert_iter = lower_iter;
1310  create_new_box = false;
1311  }
1312  else
1313  {
1314  create_new_box = true;
1315  }
1316  }
1317 
1318 
1319  if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1320  {
1321  //Here is the question if you should figure out the smallest charge .... and then
1322 
1323  //Figure out which entry is closer to m/z
1324  double dist_lower = fabs(lower_iter->first - mz);
1325  double dist_upper = fabs(upper_iter->first - mz);
1326  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1327  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1328 
1329  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1330  {
1331  create_new_box = true;
1332  }
1333  else
1334  {
1335  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1336  create_new_box = false;
1337  }
1338  }
1339 
1340  BoxElement element;
1341  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1342  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1343 
1344 
1345  if (create_new_box == false)
1346  {
1347  std::pair<UInt, BoxElement> help2(scan, element);
1348  insert_iter->second.insert(help2);
1349 
1350  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1351  Box replacement(insert_iter->second);
1352 
1353  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1354  //Also note that we already inserted the new entry, leading to size-1.
1355  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1356  c_mz /= ((double) insert_iter->second.size());
1357 
1358  //Now let's remove the old and insert the new one
1359  open_boxes_.erase(insert_iter);
1360  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1361  open_boxes_.insert(help3);
1362  }
1363  else
1364  {
1365  std::pair<UInt, BoxElement> help2(scan, element);
1366  std::multimap<UInt, BoxElement> help3;
1367  help3.insert(help2);
1368  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1369  open_boxes_.insert(help4);
1370  }
1371  }
1372 
1373  template <typename PeakType>
1374  void IsotopeWaveletTransform<PeakType>::push2TmpBox_(const double mz, const UInt scan, UInt c,
1375  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1376  {
1377  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1378 
1379  std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1380  typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1381  typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1382 
1383  if (lower_iter != tmp_box.end())
1384  {
1385  //Ugly, but necessary due to the implementation of STL lower_bound
1386  if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1387  {
1388  --lower_iter;
1389  }
1390  }
1391 
1392  typename std::multimap<double, Box>::iterator insert_iter;
1393  bool create_new_box = true;
1394  if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1395  {
1396  //There is another special case to be considered here:
1397  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1398  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1399  if (!tmp_box.empty())
1400  {
1401  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1402  {
1403  create_new_box = false;
1404  insert_iter = lower_iter;
1405  }
1406  }
1407  else
1408  {
1409  create_new_box = true;
1410  }
1411  }
1412  else
1413  {
1414  if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1415  {
1416  insert_iter = lower_iter;
1417  create_new_box = false;
1418  }
1419  else
1420  {
1421  create_new_box = true;
1422  }
1423  }
1424 
1425 
1426  if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1427  {
1428  //Figure out which entry is closer to m/z
1429  double dist_lower = fabs(lower_iter->first - mz);
1430  double dist_upper = fabs(upper_iter->first - mz);
1431  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1432  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1433 
1434  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1435  {
1436  create_new_box = true;
1437  }
1438  else
1439  {
1440  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1441  create_new_box = false;
1442  }
1443  }
1444 
1445  BoxElement element;
1446  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1447  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1448 
1449  if (create_new_box == false)
1450  {
1451  std::pair<UInt, BoxElement> help2(scan, element);
1452  insert_iter->second.insert(help2);
1453 
1454  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1455  Box replacement(insert_iter->second);
1456 
1457  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1458  //Also note that we already inserted the new entry, leading to size-1.
1459  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1460  c_mz /= ((double) insert_iter->second.size());
1461 
1462  //Now let's remove the old and insert the new one
1463  tmp_box.erase(insert_iter);
1464  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1465  tmp_box.insert(help3);
1466  }
1467  else
1468  {
1469  std::pair<UInt, BoxElement> help2(scan, element);
1470  std::multimap<UInt, BoxElement> help3;
1471  help3.insert(help2);
1472 
1473  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1474  tmp_box.insert(help4);
1475  }
1476  }
1477 
1478  template <typename PeakType>
1479  void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1480  const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1481  {
1482  typename std::multimap<double, Box>::iterator iter, iter2;
1483 
1484  if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1485  {
1486  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1487  {
1488 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1489  std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1490  typename Box::iterator dings;
1491  for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1492  std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1493 #endif
1494  end_boxes_.insert(*iter);
1495  }
1496  open_boxes_.clear();
1497  return;
1498  }
1499 
1500  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1501  {
1502 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1503  if (front_bound > 0)
1504  {
1505  std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1506  }
1507 #endif
1508 
1509  //For each Box we need to figure out, if and when the last RT value has been inserted
1510  UInt lastScan = (--(iter->second.end()))->first;
1511  if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1512  {
1513  if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1514  {
1515  iter2 = iter;
1516  ++iter2;
1517 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1518  std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1519 #endif
1520  front_boxes_.insert(*iter);
1521  open_boxes_.erase(iter);
1522  iter = iter2;
1523  continue;
1524  }
1525 
1526  iter2 = iter;
1527  ++iter2;
1528  //Please do **NOT** simplify the upcoming lines.
1529  //The 'obvious' overhead is necessary since the object represented by iter might be erased
1530  //by push2Box which might be called by extendBox_.
1531  if (iter->second.size() >= RT_votes_cutoff)
1532  {
1533  //extendBox_ (map, iter->second);
1534  iter = iter2;
1535  closed_boxes_.insert(*(--iter));
1536  }
1537  open_boxes_.erase(iter);
1538  iter = iter2;
1539  }
1540  else
1541  {
1542  ++iter;
1543  }
1544  }
1545  }
1546 
1547  template <typename PeakType>
1549  {
1550 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1551  std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1552 #endif
1553 
1554  //Determining the elution profile
1555  typename Box::const_iterator iter;
1556  std::vector<double> elution_profile(box.size());
1557  UInt index = 0;
1558  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1559  {
1560  for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1561  {
1562  elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1563  }
1564  elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1565  }
1566 
1567  double max = 0;
1568  Int max_index = INT_MIN;
1569  for (Size i = 0; i < elution_profile.size(); ++i)
1570  {
1571  if (elution_profile[i] > max)
1572  {
1573  max_index = i;
1574  max = elution_profile[i];
1575  }
1576  }
1577 
1578  Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1579 
1580  double av_elution = 0;
1581  for (Size i = 0; i < elution_profile.size(); ++i)
1582  {
1583  av_elution += elution_profile[i];
1584  }
1585  av_elution /= (double)elution_profile.size();
1586 
1587  double sd_elution = 0;
1588  for (Size i = 0; i < elution_profile.size(); ++i)
1589  {
1590  sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1591  }
1592  sd_elution /= (double)(elution_profile.size() - 1);
1593  sd_elution = sqrt(sd_elution);
1594 
1595  //Determine average m/z monoisotopic pos
1596  double av_mz = 0;
1597  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1598  {
1599  av_mz += iter->second.mz;
1600 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1601  std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1602 #endif
1603  }
1604  av_mz /= (double)box.size();
1605 
1606 
1607  //Boundary check
1608  if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1609  {
1610  return;
1611  }
1612 
1613  UInt pre_index = box.begin()->second.RT_index - 1;
1614  typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1615  double pre_elution = 0;
1616 
1617  double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1618  double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1619 
1620  typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1621  for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1622  {
1623  pre_elution += mz_iter->getIntensity();
1624  }
1625 
1626 
1627  //Do we need to extend at all?
1628  if (pre_elution <= av_elution - 2 * sd_elution)
1629  {
1630  return;
1631  }
1632 
1633  Int c_index = max_extension;
1634  Int first_index = box.begin()->second.RT_index;
1635  for (Int i = 1; i < max_extension; ++i)
1636  {
1637  c_index = first_index - i;
1638  if (c_index < 0)
1639  {
1640  break;
1641  }
1642 
1643  //CHECK Majority vote for charge???????????????
1644 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1645  std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1646 #endif
1647 
1648  push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1649  map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1650  }
1651  }
1652 
1653  template <typename PeakType>
1655  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1656  {
1657  typename std::multimap<double, Box>::iterator iter;
1658  typename Box::iterator box_iter;
1659  std::vector<BoxElement> final_box;
1660  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1661  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1662 
1663  typename std::pair<double, double> c_extend;
1664  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1665  {
1666  Box& c_box = iter->second;
1667  av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1668  virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1669 
1670  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1671  {
1672  if (box_iter->second.score == 0) //virtual helping point
1673  {
1674  if (count != 0)
1675  continue; //it is in any way not pure virtual
1676 
1677  c_mz = box_iter->second.mz;
1678  virtual_av_intens += box_iter->second.intens;
1679  virtual_av_abs_intens += fabs(box_iter->second.intens);
1680  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1681  ++virtual_count;
1682  }
1683  else
1684  {
1685  c_mz = box_iter->second.mz;
1686  av_score += box_iter->second.score;
1687  av_intens += box_iter->second.intens;
1688  av_abs_intens += fabs(box_iter->second.intens);
1689  av_mz += c_mz * fabs(box_iter->second.intens);
1690  ++count;
1691  }
1692  }
1693 
1694  if (count == 0) //pure virtual helping box
1695  {
1696  av_intens = virtual_av_intens / virtual_count;
1697  av_score = 0;
1698  av_mz = virtual_av_mz / virtual_av_abs_intens;
1699  }
1700  else
1701  {
1702  av_intens /= count;
1703  av_score /= count;
1704  av_mz /= av_abs_intens;
1705  }
1706 
1707  BoxElement c_box_element;
1708  c_box_element.mz = av_mz;
1709  c_box_element.c = c;
1710  c_box_element.score = av_score;
1711  c_box_element.intens = av_intens;
1712 
1713  c_box_element.RT = c_box.begin()->second.RT;
1714 
1715  final_box.push_back(c_box_element);
1716  }
1717 
1718  UInt num_o_feature = final_box.size();
1719  if (num_o_feature == 0)
1720  {
1721  tmp_boxes_->at(c).clear();
1722  return;
1723  }
1724 
1725  //Computing the derivatives
1726  std::vector<double> bwd_diffs(num_o_feature, 0);
1727 
1728  bwd_diffs[0] = 0;
1729  for (UInt i = 1; i < num_o_feature; ++i)
1730  {
1731  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1732  }
1733 
1734 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1735  std::ofstream ofile_bwd("bwd_gpu.dat");
1736  for (UInt i = 0; i < num_o_feature; ++i)
1737  {
1738  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1739  }
1740  ofile_bwd.close();
1741 #endif
1742 
1743  for (UInt i = 0; i < num_o_feature - 1; ++i)
1744  {
1745  while (i < num_o_feature - 2)
1746  {
1747  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1748  break;
1749  ++i;
1750  }
1751 
1752  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1753  {
1754  checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1755  continue;
1756  }
1757  }
1758 
1759  tmp_boxes_->at(c).clear();
1760  }
1761 
1762  template <typename PeakType>
1764  {
1765  FeatureMap feature_map;
1766  typename std::multimap<double, Box>::iterator iter;
1767  typename Box::iterator box_iter;
1768  UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1769  double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1770  bool restart = false;
1771 
1772  typename std::pair<double, double> c_extend;
1773  for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1774  {
1775  sum_of_ref_intenses_g = 0;
1776  Box& c_box = iter->second;
1777  std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1778  restart = false;
1779 
1780  //Let's first determine the charge
1781  //Therefor, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1782  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1783  {
1784 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1785  if (trunc(box_iter->second.mz) == 874)
1786  std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1787 #endif
1788 
1789  if (box_iter->second.score == -1000)
1790  {
1791  restart = true;
1792  break;
1793  }
1794 
1795  charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1796  ++charge_binary_votes[box_iter->second.c];
1797  }
1798 
1799  if (restart)
1800  {
1801  continue;
1802  }
1803 
1804  //... determining the best fitting charge
1805  best_charge_index = 0; best_charge_score = 0;
1806  for (UInt i = 0; i < max_charge_; ++i)
1807  {
1808  if (charge_votes[i] > best_charge_score)
1809  {
1810  best_charge_index = i;
1811  best_charge_score = charge_votes[i];
1812  }
1813  }
1814 
1815  //Pattern found in too few RT scan
1816  if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1817  {
1818  continue;
1819  }
1820 
1821  c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1822 
1823  av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0;
1824  //Now, let's get the RT boundaries for the box
1825  std::vector<DPosition<2> > point_set;
1826  double sum_of_ref_intenses_l;
1827  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1828  {
1829  sum_of_ref_intenses_l = 0;
1830  c_mz = box_iter->second.mz;
1831  c_RT = box_iter->second.RT;
1832 
1833  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1834 
1835  point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1836  //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1837  point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1838 
1839 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1840  std::cout << "Intenstype: " << intenstype_ << std::endl;
1841 #endif
1842  if (intenstype_ == "ref")
1843  {
1844  //Find monoisotopic max
1845  const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1846  //'Correct' possible shift
1847  for (unsigned int i = 0; i < mz_cutoff; ++i)
1848  {
1849  typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1850 
1851  hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1852 
1853  while (h_iter != c_spec.begin())
1854  {
1855 
1856 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1857  if (trunc(c_mz) == 874)
1858  {
1859  std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1860  }
1861 #endif
1862 
1863  --h_iter;
1864  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1865  {
1866  hc_iter = h_iter;
1867  }
1868 
1869  if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1870  {
1871  break;
1872  }
1873  }
1874 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1875  if (trunc(c_mz) == 874)
1876  {
1877  std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1878  }
1879 #endif
1880  sum_of_ref_intenses_l += hc_iter->getIntensity();
1881 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1882  if (trunc(c_mz) == 874)
1883  {
1884  std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1885  }
1886 #endif
1887  }
1888  }
1889 
1890  if (best_charge_index == box_iter->second.c)
1891  {
1892  av_score += box_iter->second.score;
1893  av_intens += box_iter->second.intens;
1894  av_ref_intens += box_iter->second.ref_intens;
1895  sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1896  av_mz += c_mz * box_iter->second.intens;
1897  }
1898  av_RT += c_RT;
1899  }
1900 
1901  av_mz /= av_intens;
1902  av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1903  av_score /= (double)charge_binary_votes[best_charge_index];
1904  av_RT /= (double)c_box.size();
1905 
1906 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1907  if (trunc(av_mz) == 874)
1908  std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1909 #endif
1910 
1911  Feature c_feature;
1912  ConvexHull2D c_conv_hull;
1913  c_conv_hull.addPoints(point_set);
1914  c_feature.setCharge(c_charge);
1915  c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1916 
1917  //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1918  if (intenstype_ == "corrected")
1919  {
1920  double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1921  av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0, 2 * lambda);
1922  }
1923  if (intenstype_ == "ref")
1924  {
1925 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1926  if (trunc(c_mz) == 874)
1927  {
1928  std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1929  }
1930 #endif
1931 
1932  av_intens = sum_of_ref_intenses_g;
1933  }
1934 
1935  c_feature.setMZ(av_mz);
1936  c_feature.setIntensity(av_intens);
1937  c_feature.setRT(av_RT);
1938  c_feature.setOverallQuality(av_score);
1939  feature_map.push_back(c_feature);
1940  }
1941 
1942  return feature_map;
1943  }
1944 
1945  template <typename PeakType>
1947  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1948  {
1949  typename MSSpectrum::const_iterator iter, ref_iter;
1950  UInt peak_cutoff;
1951  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1952 
1953  iter = candidate.MZBegin(seed_mz);
1954  //we can ignore those cases
1955  if (iter == candidate.begin() || iter == candidate.end())
1956  {
1957  return false;
1958  }
1959 
1960  std::pair<double, double> reals;
1961  ref_iter = ref.MZBegin(seed_mz);
1962  //Correct the position
1963  double real_mz, real_intens;
1964  if (check_PPMs)
1965  {
1966  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1967  real_mz = reals.first, real_intens = reals.second;
1968  //if (real_mz <= 0 || real_intens <= 0)
1969  //{
1970  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1971  while (h_iter != ref.begin())
1972  {
1973  --h_iter;
1974  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1975  {
1976  hc_iter = h_iter;
1977  }
1978  else
1979  {
1980  break;
1981  }
1982 
1983  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1984  {
1985  return false;
1986  }
1987  }
1988  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1989  real_mz = reals.first, real_intens = reals.second;
1990  if (real_mz <= 0 || real_intens <= 0)
1991  {
1992  return false;
1993  }
1994  real_mz = h_iter->getMZ();
1995  real_intens = h_iter->getIntensity();
1996  //}
1997  }
1998  else
1999  {
2000  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2001  real_mz = reals.first, real_intens = reals.second;
2002 
2003  if (real_mz <= 0 || real_intens <= 0)
2004  {
2005  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2006  while (h_iter != ref.begin())
2007  {
2008  --h_iter;
2009  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2010  {
2011  hc_iter = h_iter;
2012  }
2013  else
2014  {
2015  break;
2016  }
2017 
2018  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2019  {
2020  return false;
2021  }
2022  }
2023  real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2024  if (real_mz <= 0 || real_intens <= 0)
2025  {
2026  return false;
2027  }
2028  real_mz = h_iter->getMZ();
2029  real_intens = h_iter->getIntensity();
2030  }
2031  }
2032 
2033  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2034 
2035  if (c_score <= 0)
2036  {
2037  return false;
2038  }
2039 
2040  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2041  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2042  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2043  if (real_r_MZ_iter == ref.end())
2044  {
2045  --real_r_MZ_iter;
2046  }
2047 
2048 
2049  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2050  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2051 
2052  if (prev_score == -1000)
2053  {
2054  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2055  }
2056  else
2057  {
2058  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2059  }
2060  return true;
2061  }
2062 
2063  template <typename PeakType>
2065  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
2066  {
2067  typename MSSpectrum::const_iterator iter, ref_iter;
2068  UInt peak_cutoff;
2069  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2070 
2071  iter = candidate.MZBegin(seed_mz);
2072  //we can ignore those cases
2073  if (iter == candidate.begin() || iter == candidate.end())
2074  {
2075  return false;
2076  }
2077 
2078  std::pair<double, double> reals;
2079  ref_iter = ref.MZBegin(seed_mz);
2080  //Correct the position
2081  double real_mz, real_intens;
2082  if (check_PPMs)
2083  {
2084  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2085  real_mz = reals.first, real_intens = reals.second;
2086  //if (real_mz <= 0 || real_intens <= 0)
2087  //{
2088  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2089  while (h_iter != ref.begin())
2090  {
2091  --h_iter;
2092  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2093  {
2094  hc_iter = h_iter;
2095  }
2096  else
2097  {
2098  break;
2099  }
2100 
2101  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2102  {
2103  return false;
2104  }
2105  }
2106  ++h_iter;
2107  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2108  real_mz = reals.first, real_intens = reals.second;
2109 
2110 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2111  std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2112 #endif
2113 
2114  if (real_mz <= 0 || real_intens <= 0)
2115  {
2116  return false;
2117  }
2118  real_mz = h_iter->getMZ();
2119  real_intens = h_iter->getIntensity();
2120  //}
2121  }
2122  else
2123  {
2124  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2125  real_mz = reals.first, real_intens = reals.second;
2126 
2127  if (real_mz <= 0 || real_intens <= 0)
2128  {
2129  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2130  while (h_iter != ref.begin())
2131  {
2132  --h_iter;
2133  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2134  {
2135  hc_iter = h_iter;
2136  }
2137  else
2138  {
2139  break;
2140  }
2141 
2142  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2143  {
2144  return false;
2145  }
2146  }
2147  real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2148  if (real_mz <= 0 || real_intens <= 0)
2149  {
2150  return false;
2151  }
2152  real_mz = h_iter->getMZ();
2153  real_intens = h_iter->getIntensity();
2154  }
2155  }
2156 
2157  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2158 
2159  if (c_score <= 0)
2160  {
2161  return false;
2162  }
2163 
2164  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2165  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2166  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2167  if (real_r_MZ_iter == ref.end())
2168  {
2169  --real_r_MZ_iter;
2170  }
2171 
2172 
2173  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2174  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2175 
2176  if (prev_score == -1000)
2177  {
2178  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2179  }
2180  else
2181  {
2182  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2183  }
2184 
2185  return true;
2186  }
2187 
2188  template <typename PeakType>
2189  std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2190  {
2191  double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2192  double ppms = getPPMs_(peptideMassRule_(mass), mass);
2194  {
2195 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2196  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2197 #endif
2198  return std::pair<double, double>(-1, -1);
2199  }
2200 
2201 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2202  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2203 #endif
2204  return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2205  }
2206 
2207 } //namespace
2208 
2209 #pragma clang diagnostic pop
2210 
2211 
UInt MZ_end
Index.
Definition: IsotopeWaveletTransform.h:85
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:152
void mergeFeatures(IsotopeWaveletTransform< PeakType > *later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff)
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:134
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2189
double score
The associated score.
Definition: IsotopeWaveletTransform.h:79
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:524
virtual void destroy()
Definition: IsotopeWaveletTransform.h:119
A more convenient string class.
Definition: String.h:57
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:520
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:198
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:547
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:54
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
bool hr_data_
Definition: IsotopeWaveletTransform.h:525
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score...
Definition: IsotopeWaveletTransform.h:1007
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:184
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:68
double getSigma() const
Definition: IsotopeWaveletTransform.h:340
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:646
A container for features.
Definition: FeatureMap.h:93
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:751
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:520
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
double min_spacing_
Definition: IsotopeWaveletTransform.h:530
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:146
Iterator begin()
Definition: MSExperiment.h:157
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:517
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass...
Definition: IsotopeWaveletTransform.h:487
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:516
const double c
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually, you do not need to call this function. Please use.
Size size() const
Definition: MSExperiment.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
double getRT() const
Definition: IsotopeWaveletTransform.h:128
IntensityType getIntensity() const
Definition: Peak2D.h:166
const MSSpectrum * reference_
The reference spectrum.
Definition: IsotopeWaveletTransform.h:205
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling...
Definition: ConvexHull2D.h:72
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:94
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:134
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:521
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:535
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:172
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:603
static UInt getNumPeakCutOff(const double mass, const UInt z)
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:520
double sigma_
Definition: IsotopeWaveletTransform.h:519
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:520
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:345
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:527
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:520
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
double mz
The monoisotopic position.
Definition: IsotopeWaveletTransform.h:77
double RT
The elution time (not the scan index)
Definition: IsotopeWaveletTransform.h:82
UInt MZ_begin
Index.
Definition: IsotopeWaveletTransform.h:84
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1253
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:684
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:316
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:191
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:324
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:519
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:69
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:510
double ref_intens
Definition: IsotopeWaveletTransform.h:81
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1220
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1479
An LC-MS feature.
Definition: Feature.h:70
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:521
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:241
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:357
String intenstype_
Definition: IsotopeWaveletTransform.h:526
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:559
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1374
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:553
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:54
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:527
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:516
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:140
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:516
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1763
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:50
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:516
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:726
void setOverallQuality(QualityType q)
Set the overall quality.
void extendBox_(const PeakMap &map, const Box &box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1548
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:530
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:531
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:107
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
UInt data_length_
Definition: IsotopeWaveletTransform.h:524
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1239
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:2064
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:170
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:541
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:352
TransSpectrum()
Definition: IsotopeWaveletTransform.h:101
Size size() const
Definition: IsotopeWaveletTransform.h:158
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:597
void clear(bool clear_meta_data)
Clears all data and meta data.
UInt c
Note, this is not the charge (it is charge-1!!!)
Definition: IsotopeWaveletTransform.h:78
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:528
void setCharge(const ChargeType &ch)
Set charge state.
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:736
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:164
void addPoints(const PointArrayType &points)
std::vector< float > * trans_intens_
The intensities of the transform.
Definition: IsotopeWaveletTransform.h:206
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:114
UInt max_charge_
Definition: IsotopeWaveletTransform.h:524
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:88
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:335
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one...
Definition: IsotopeWaveletTransform.h:1268
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:70
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:55
UInt RT_index
The elution time (map) index.
Definition: IsotopeWaveletTransform.h:83
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:49
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:523
double getRT() const
int Int
Signed integer type.
Definition: Types.h:102
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:56
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:43
double intens
The transformed intensity at the monoisotopic mass.
Definition: IsotopeWaveletTransform.h:80
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:531
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1654
Internally used data structure.
Definition: IsotopeWaveletTransform.h:75
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:177
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:51