Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
IsotopeWaveletTransform.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_ISOTOPEWAVELETTRANSFORM_H
37 
47 #include <cmath>
48 #include <math.h>
49 #include <boost/math/special_functions/bessel.hpp>
50 #include <vector>
51 #include <map>
52 #include <sstream>
53 #include <fstream>
54 #include <iomanip>
55 
56 // This code has quite a few strange things in it triggering warnings which
57 // clutters the rest of the diagnostics
58 #pragma clang diagnostic push
59 #pragma clang diagnostic ignored "-Wfloat-equal"
60 #pragma clang diagnostic ignored "-Wconversion"
61 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
62 
63 namespace OpenMS
64 {
65 
69  template <typename PeakType>
71  {
72 public:
73 
74 
76  struct BoxElement
77  {
78  double mz; //<The monoisotopic position
79  UInt c; //<Note, this is not the charge (it is charge-1!!!)
80  double score; //<The associated score
81  double intens; //<The transformed intensity at the monoisotopic mass
82  double ref_intens;
83  double RT; //<The elution time (not the scan index)
84  UInt RT_index; //<The elution time (map) index
85  UInt MZ_begin; //<Index
86  UInt MZ_end; //<Index
87  };
88 
89  typedef std::multimap<UInt, BoxElement> Box;
90 
91 
96  {
98 
99 public:
100 
103  reference_(NULL), trans_intens_(NULL)
104  {
105  }
106 
108  TransSpectrum(const MSSpectrum* reference) :
109  reference_(reference)
110  {
111  trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
112  }
113 
115  virtual ~TransSpectrum()
116  {
117  delete (trans_intens_);
118  }
119 
120  virtual void destroy()
121  {
122  delete (trans_intens_);
123  trans_intens_ = NULL;
124  delete (reference_);
125  reference_ = NULL;
126  }
127 
129  inline double getRT() const
130  {
131  return reference_->getRT();
132  }
133 
135  inline double getMZ(const UInt i) const
136  {
137  return (*reference_)[i].getMZ();
138  }
139 
141  inline double getRefIntensity(const UInt i) const
142  {
143  return (*reference_)[i].getIntensity();
144  }
145 
147  inline double getTransIntensity(const UInt i) const
148  {
149  return (*trans_intens_)[i];
150  }
151 
153  inline void setTransIntensity(const UInt i, const double intens)
154  {
155  (*trans_intens_)[i] = intens;
156  }
157 
159  inline Size size() const
160  {
161  return trans_intens_->size();
162  }
163 
165  inline const MSSpectrum* getRefSpectrum()
166  {
167  return reference_;
168  }
169 
171  inline const MSSpectrum* getRefSpectrum() const
172  {
173  return reference_;
174  }
175 
178  inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
179  {
180  return reference_->MZBegin(mz);
181  }
182 
185  inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
186  {
187  return reference_->MZEnd(mz);
188  }
189 
192  inline typename MSSpectrum::const_iterator end() const
193  {
194  return reference_->end();
195  }
196 
199  inline typename MSSpectrum::const_iterator begin() const
200  {
201  return reference_->begin();
202  }
203 
204 protected:
205 
206  const MSSpectrum* reference_; //<The reference spectrum
207  std::vector<float>* trans_intens_; //<The intensities of the transform
208 
209  };
210 
211 
212 
218  IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
219 
221  virtual ~IsotopeWaveletTransform();
222 
223 
228  virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
229 
234  virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
235 
255  virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
256  const double ampl_cutoff, const bool check_PPMs);
257 
258  virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
259 
260 #ifdef OPENMS_HAS_CUDA
261 
263  virtual int initializeScanCuda(const MSSpectrum& scan, const UInt c = 0);
264 
266  virtual void finalizeScanCuda();
267 
271  virtual void getTransformCuda(TransSpectrum& c_trans, const UInt c);
272 
288  virtual void identifyChargeCuda(const TransSpectrum& candidates, const UInt scan_index, const UInt c,
289  const double ampl_cutoff, const bool check_PPMs);
290 
293  virtual int sortCuda(MSSpectrum& sorted);
294 #endif
295 
296 
303  void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
304  const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
305 
306 
307  void mergeFeatures(IsotopeWaveletTransform<PeakType>* later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff);
308 
309 
314  FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
315 
317  virtual std::multimap<double, Box> getClosedBoxes()
318  { return closed_boxes_; }
319 
320 
325  inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
326  {
327  return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
328  }
329 
336  inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
337  {
338  return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
339  }
340 
341  inline double getSigma() const
342  {
343  return sigma_;
344  }
345 
346  inline void setSigma(const double sigma)
347  {
348  sigma_ = sigma;
349  }
350 
351  virtual void computeMinSpacing(const MSSpectrum& c_ref);
352 
353  inline double getMinSpacing() const
354  {
355  return min_spacing_;
356  }
357 
358  inline Size getMaxScanSize() const
359  {
360  return max_scan_size_;
361  }
362 
363 protected:
364 
365 
369 
370 
371  inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
372 
373 
380  virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
381  const double seed_mz, const UInt c, const double ampl_cutoff);
382 
389  virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
390  const double seed_mz, const UInt c, const double ampl_cutoff);
391 
392 
400  virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
401  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
402 
410  virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
411  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
412 
413  virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
414 
415 
417  inline double getAvIntens_(const TransSpectrum& scan);
419  inline double getAvIntens_(const MSSpectrum& scan);
420 
422  inline double getSdIntens_(const TransSpectrum& scan, const double mean);
424  inline double getSdIntens_(const MSSpectrum& scan, const double mean);
425 
436  virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
437  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
438 
454  virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
455  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
456 
462  inline double getAvMZSpacing_(const MSSpectrum& scan);
463 
464 
469  void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
470  const UInt scan_index, const UInt c, const bool check_PPMs);
471 
476  virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
477  const UInt scan_index, const UInt c, const bool check_PPMs);
478 
479 
484  void extendBox_(const PeakMap& map, const Box box);
485 
488  inline double peptideMassRule_(const double c_mass) const
489  {
490  double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
491  double old_frac_mass = c_mass - (Int)(c_mass);
492  double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
493  double new_frac_mass = new_mass - (Int)(new_mass);
494 
495  if (new_frac_mass - old_frac_mass > 0.5)
496  {
497  new_mass -= 1.;
498  }
499 
500  if (new_frac_mass - old_frac_mass < -0.5)
501  {
502  new_mass += 1.;
503  }
504 
505  return new_mass;
506  }
507 
511  inline double getPPMs_(const double mass_a, const double mass_b) const
512  {
513  return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
514  }
515 
516  //internally used data structures for the sweep line algorithm
517  std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
518  std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
519 
521  std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
522  std::vector<double> interpol_xs_, interpol_ys_;
523 
526  bool hr_data_;
529  std::vector<int> indices_;
530 
532  std::vector<float> scores_, zeros_;
533  };
534 
535  template <typename PeakType>
536  bool intensityComparator(const PeakType& a, const PeakType& b)
537  {
538  return a.getIntensity() > b.getIntensity();
539  }
540 
541  template <typename PeakType>
543  {
544  return a.getIntensity() < b.getIntensity();
545  }
546 
547  template <typename PeakType>
549  {
550  return a->getIntensity() > b->getIntensity();
551  }
552 
553  template <typename PeakType>
554  bool positionComparator(const PeakType& a, const PeakType& b)
555  {
556  return a.getMZ() < b.getMZ();
557  }
558 
559  template <typename PeakType>
561  {
562  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
563  av_MZ_spacing_ = 1;
564  max_scan_size_ = 0;
565  max_mz_cutoff_ = 3;
567  hr_data_ = false;
568  intenstype_ = "ref";
569  }
570 
571  template <typename PeakType>
572  IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
573  {
574  max_charge_ = max_charge;
575  max_scan_size_ = max_scan_size;
576  hr_data_ = hr_data;
577  intenstype_ = intenstype;
578  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
579  if (max_scan_size <= 0) //only important for the CPU
580  {
581  IsotopeWavelet::init(max_mz, max_charge);
582  }
583 
584  av_MZ_spacing_ = 1;
587 
588  Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
589  Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
590  psi_.reserve(to_reserve); //The wavelet
591  prod_.reserve(to_reserve);
592  xs_.reserve(to_reserve);
595  }
596 
597  template <typename PeakType>
599  {
600  delete (tmp_boxes_);
601  }
602 
603  template <typename PeakType>
605  {
606  Int spec_size((Int)c_ref.size());
607  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
608  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
609  UInt charge = c + 1;
610  double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
611 
612  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
613  {
614  value = 0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
615  old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
616  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
617  c_diff = 0;
618  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
619 
620  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
621  {
622  if (current_conv_pos >= spec_size)
623  {
624  value += 0.5 * old * min_spacing_;
625  break;
626  }
627 
628  c_mz = c_ref[current_conv_pos].getMZ();
629  c_diff = c_mz + origin;
630 
631  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
632  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
633 
634  value += 0.5 * (current + old) * (c_mz - old_pos);
635 
636  old = current;
637  old_pos = c_mz;
638  }
639 
640 
641 
642  c_trans[my_local_pos].setIntensity(value);
643  }
644  }
645 
646  template <typename PeakType>
648  {
649  Int spec_size((Int)c_ref.size());
650  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
651  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
652  UInt charge = c + 1;
653  double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
654 
655  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
656  {
657  value = 0; T_boundary_left = 0, T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
658 
659 
660  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
661  c_diff = 0;
662  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
663 
664  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
665  {
666  if (current_conv_pos >= spec_size)
667  {
668  break;
669  }
670 
671  c_mz = c_ref[current_conv_pos].getMZ();
672  c_diff = c_mz + origin;
673 
674  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
675  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
676 
677  value += current;
678  }
679 
680  c_trans[my_local_pos].setIntensity(value);
681  }
682  }
683 
684  template <typename PeakType>
686  {
687  data_length_ = (UInt) c_ref.size();
688  computeMinSpacing(c_ref);
689  Int wavelet_length = 0, quarter_length = 0;
690 
691  if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
692  {
693  UInt c_mz_cutoff;
694  typename MSSpectrum::const_iterator start_iter, end_iter;
695  for (UInt i = 0; i < data_length_; ++i)
696  {
697  c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
698  start_iter = c_ref.MZEnd(c_ref[i].getMZ());
699  end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
700  wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
701  end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
702  quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
703  }
704  }
705  else
706  {
707  //CHANGED
709  wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
710  }
711  //... done
712 
713 
714  if (wavelet_length > (Int) c_ref.size())
715  {
716  std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
717  std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
718  std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
719  }
720 
722  from_max_to_left_ = max_index;
723  from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
724  }
725 
726  template <typename PeakType>
728  {
729  min_spacing_ = INT_MAX;
730  for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
731  {
732  min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
733  }
734  }
735 
736  template <typename PeakType>
738  const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
739  {
740  Size scan_size(candidates.size());
742  typename MSSpectrum::const_iterator iter_start, iter_end, iter_p, seed_iter, iter2;
743  double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
744  UInt MZ_start, MZ_end;
745 
746  MSSpectrum diffed(candidates);
747  diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
748 
749 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
750  std::stringstream stream;
751  stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
752  std::ofstream ofile(stream.str().c_str());
753 #endif
754 
755  if (!hr_data_) //LowRes data
756  {
757  for (UInt i = 0; i < scan_size - 2; ++i)
758  {
759  share = candidates[i + 1].getIntensity(), share_pos = candidates[i + 1].getMZ();
760  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
761  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
762 
763  if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
764  {
765  diffed[i + 1].setIntensity(0);
766  }
767 
768 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
769  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
770 #endif
771  }
772  }
773  else //HighRes data
774  {
775  for (UInt i = 0; i < scan_size - 2; ++i)
776  {
777  share = candidates[i + 1].getIntensity(), share_pos = candidates[i + 1].getMZ();
778  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
779  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
780 
781  if (!(bwd >= 0 && fwd <= 0))
782  {
783  diffed[i + 1].setIntensity(0);
784  }
785 
786 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
787  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
788 #endif
789  }
790  }
791 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
792  ofile.close();
793 #endif
794 
795  ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
796 
797  //Sort the transform in descending order according to the intensities present in the transform
798  c_sorted_candidate.sortByIntensity();
799 
800 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
801  std::stringstream stream2;
802  stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
803  std::ofstream ofile2(stream2.str().c_str());
804  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
805  {
806  ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
807  }
808  ofile2.close();
809 #endif
810 
811  std::vector<UInt> processed(scan_size, 0);
812 
813  if (ampl_cutoff < 0)
814  {
815  threshold = 0;
816  }
817  else
818  {
819  c_av_intens = getAvIntens_(candidates);
820  c_sd_intens = getSdIntens_(candidates, c_av_intens);
821  threshold = ampl_cutoff * c_sd_intens + c_av_intens;
822  }
823 
824  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
825  {
826  if (iter->getIntensity() <= 0)
827  {
828  break;
829  }
830 
831  seed_mz = iter->getMZ();
832  seed_iter = ref.MZBegin(seed_mz);
833 
834  if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
835  {
836  continue;
837  }
838 
839  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
840  //Mark the region as processed
841  //Do not move this further down, since we have to mark this as processed in any case,
842  //even when score <=0; otherwise we would look around the maximum's position unless
843  //any significant point is found
844  iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
845  iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
846  if (iter_end == ref.end())
847  {
848  --iter_end;
849  }
850 
851  MZ_start = distance(ref.begin(), iter_start);
852  MZ_end = distance(ref.begin(), iter_end);
853 
854  memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
855 
856  c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
857 
858 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
859  if (trunc(seed_mz) == 874)
860  std::cout << seed_mz << "\t" << c_score << std::endl;
861 #endif
862 
863  if (c_score <= 0 && c_score != -1000)
864  {
865  continue;
866  }
867 
868  //Push the seed into its corresponding box (or create a new one, if necessary)
869  //Do ***NOT*** move this further down!
870  push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
871 
872  help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
873  iter2 = candidates.MZBegin(help_mz);
874  if (iter2 == candidates.end() || iter2 == candidates.begin())
875  {
876  continue;
877  }
878 
879  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
880  {
881  //In the other case, we are too close to the peak, leading to incorrect derivatives.
882  if (iter2 != candidates.end())
883  {
884  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
885  }
886  }
887 
888  help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
889  iter2 = candidates.MZBegin(help_mz);
890  if (iter2 == candidates.end() || iter2 == candidates.begin())
891  {
892  continue;
893  }
894 
895  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
896  {
897  //In the other case, we are too close to the peak, leading to incorrect derivatives.
898  if (iter2 != candidates.end())
899  {
900  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
901  }
902  }
903  }
904 
905  clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
906  }
907 
908  template <typename PeakType>
910  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
911  {
912  double c_score = 0, c_val;
913  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
914  Int signal_size((Int)candidate.size());
915  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
916  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
917 
918  //p_h_ind indicates if we are looking for a whole or a peak
919  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
920 
921  std::vector<double> positions(end);
922  for (Int i = 0; i < end; ++i)
923  {
924  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
925  }
926 
927  double l_score = 0, mid_val = 0;
928  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
929  for (Int v = 1; v <= end; ++v, ++p_h_ind)
930  {
931  do
932  {
933  if (start_index < signal_size - 1)
934  ++start_index;
935  else
936  break;
937  }
938  while (candidate[start_index].getMZ() < positions[v - 1]);
939 
940  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
941  {
942  continue;
943  }
944 
945  c_left_iter2 = candidate.begin() + start_index - 1;
946  c_right_iter2 = c_left_iter2 + 1;
947 
948  c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
949 
950  if (v == (int)(ceil(end / 2.)))
951  {
952  l_score = c_score;
953  mid_val = c_val;
954  }
955 
956  if (p_h_ind % 2 == 1) //I.e. a whole
957  {
958  c_score -= c_val;
959 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
960  if (trunc(seed_mz) == 874)
961  std::cout << -c_val << std::endl;
962 #endif
963  }
964  else
965  {
966  c_score += c_val;
967 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
968  if (trunc(seed_mz) == 874)
969  std::cout << c_val << std::endl;
970 #endif
971  }
972 
973 
974  start_index = distance(candidate.begin(), c_left_iter2);
975  }
976 
977 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
978  std::ofstream ofile_score("scores.dat", ios::app);
979  std::ofstream ofile_check_score("check_scores.dat", ios::app);
980  ofile_score.close();
981  ofile_check_score.close();
982 #endif
983 
984 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
985  if (trunc(seed_mz) == 874)
986  std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
987 #endif
988 
989  if (c_score - mid_val <= 0)
990  {
991  return 0;
992  }
993 
994  if (c_score - mid_val <= ampl_cutoff)
995  {
996  return -1000;
997  }
998 
999  if (l_score <= 0 || c_score - l_score - mid_val <= 0)
1000  {
1001  return 0;
1002  }
1003 
1004  return c_score;
1005  }
1006 
1007  template <typename PeakType>
1009  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
1010  {
1011  double c_score = 0, c_val;
1012  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
1013  Int signal_size((Int)candidate.size());
1014 
1015  //p_h_ind indicates if we are looking for a whole or a peak
1016  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
1017 
1018  std::vector<double> positions(end);
1019  for (Int i = 0; i < end; ++i)
1020  {
1021  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
1022  }
1023 
1024  double l_score = 0, mid_val = 0;
1025  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
1026  for (Int v = 1; v <= end; ++v, ++p_h_ind)
1027  {
1028  do
1029  {
1030  if (start_index < signal_size - 1)
1031  ++start_index;
1032  else
1033  break;
1034  }
1035  while (candidate.getMZ(start_index) < positions[v - 1]);
1036 
1037  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
1038  {
1039  continue;
1040  }
1041 
1042  c_left_iter2 = candidate.begin() + start_index - 1;
1043  c_right_iter2 = c_left_iter2 + 1;
1044 
1045  c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
1046  if (v == (int)(ceil(end / 2.)))
1047  {
1048  l_score = c_score;
1049  mid_val = c_val;
1050  }
1051 
1052  if (p_h_ind % 2 == 1) //I.e. a whole
1053  {
1054  c_score -= c_val;
1055  }
1056  else
1057  {
1058  c_score += c_val;
1059  }
1060 
1061  start_index = distance(candidate.begin(), c_left_iter2);
1062  }
1063 
1064 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1065  std::ofstream ofile_score("scores.dat", ios::app);
1066  std::ofstream ofile_check_score("check_scores.dat", ios::app);
1067  ofile_score << c_check_point << "\t" << c_score << std::endl;
1068  ofile_score.close();
1069  ofile_check_score.close();
1070 #endif
1071 
1072  if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1073  {
1074  return 0;
1075  }
1076 
1077  return c_score;
1078  }
1079 
1080  template <typename PeakType>
1082  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1083  {
1084  typename std::multimap<double, Box>::iterator iter;
1085  typename Box::iterator box_iter;
1086  std::vector<BoxElement> final_box;
1087  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1088  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1089 
1090  typename std::pair<double, double> c_extend;
1091  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1092  {
1093 
1094  Box& c_box = iter->second;
1095  av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1096  virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1097 
1098  //Now, let's get the RT boundaries for the box
1099  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1100  {
1101  if (box_iter->second.score == 0) //virtual helping point
1102  {
1103  if (count != 0)
1104  continue; //it is in any way not pure virtual
1105 
1106  c_mz = box_iter->second.mz;
1107  virtual_av_intens += box_iter->second.intens;
1108  virtual_av_abs_intens += fabs(box_iter->second.intens);
1109  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1110  ++virtual_count;
1111  }
1112  else
1113  {
1114  c_mz = box_iter->second.mz;
1115  av_score += box_iter->second.score;
1116  av_intens += box_iter->second.intens;
1117  av_abs_intens += fabs(box_iter->second.intens);
1118  av_mz += c_mz * fabs(box_iter->second.intens);
1119  ++count;
1120  }
1121  }
1122 
1123  if (count == 0) //pure virtual helping box
1124  {
1125  av_intens = virtual_av_intens / virtual_count;
1126  av_score = 0;
1127  av_mz = virtual_av_mz / virtual_av_abs_intens;
1128  }
1129  else
1130  {
1131  av_intens /= count;
1132  av_score /= count;
1133  av_mz /= av_abs_intens;
1134  }
1135 
1136  BoxElement c_box_element;
1137  c_box_element.mz = av_mz;
1138  c_box_element.c = c;
1139  c_box_element.score = av_score;
1140  c_box_element.intens = av_intens;
1141 
1142  c_box_element.RT = c_box.begin()->second.RT;
1143  final_box.push_back(c_box_element);
1144  }
1145 
1146  Size num_o_feature = final_box.size();
1147  if (num_o_feature == 0)
1148  {
1149  tmp_boxes_->at(c).clear();
1150  return;
1151  }
1152 
1153  //Computing the derivatives
1154  std::vector<double> bwd_diffs(num_o_feature, 0);
1155 
1156  bwd_diffs[0] = 0;
1157  for (Size i = 1; i < num_o_feature; ++i)
1158  {
1159  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1160  }
1161 
1162 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1163  std::ofstream ofile_bwd("bwd_cpu.dat");
1164  for (Size i = 0; i < num_o_feature; ++i)
1165  {
1166  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1167  }
1168  ofile_bwd.close();
1169 #endif
1170 
1171 
1172  for (Size i = 0; i < num_o_feature - 1; ++i)
1173  {
1174  while (i < num_o_feature - 2)
1175  {
1176  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1177  break;
1178  ++i;
1179  }
1180 
1181  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1182  {
1183  checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1184  continue;
1185  }
1186  }
1187 
1188  tmp_boxes_->at(c).clear();
1189  }
1190 
1191  template <typename PeakType>
1193  {
1194  double av_intens = 0;
1195  for (UInt i = 0; i < scan.size(); ++i)
1196  {
1197  if (scan[i].getIntensity() >= 0)
1198  {
1199  av_intens += scan[i].getIntensity();
1200  }
1201  }
1202  return av_intens / (double)scan.size();
1203  }
1204 
1205  template <typename PeakType>
1207  {
1208  double res = 0, intens;
1209  for (UInt i = 0; i < scan.size(); ++i)
1210  {
1211  if (scan[i].getIntensity() >= 0)
1212  {
1213  intens = scan[i].getIntensity();
1214  res += (intens - mean) * (intens - mean);
1215  }
1216  }
1217  return sqrt(res / (double)(scan.size() - 1));
1218  }
1219 
1220  template <typename PeakType>
1221  double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1222  {
1223  std::vector<double> diffs(scan.size() - 1, 0);
1224  for (UInt i = 0; i < scan.size() - 1; ++i)
1225  {
1226  diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1227  }
1228 
1229  sort(diffs.begin(), diffs.end());
1230  double av_MZ_spacing = 0;
1231  for (UInt i = 0; i < diffs.size() / 2; ++i)
1232  {
1233  av_MZ_spacing += diffs[i];
1234  }
1235 
1236  return av_MZ_spacing / (diffs.size() / 2);
1237  }
1238 
1239  template <typename PeakType>
1241  {
1242  double av_intens = 0;
1243  for (UInt i = 0; i < scan.size(); ++i)
1244  {
1245  if (scan.getTransIntensity(i) >= 0)
1246  {
1247  av_intens += scan.getTransIntensity(i);
1248  }
1249  }
1250  return av_intens / (double)scan.size();
1251  }
1252 
1253  template <typename PeakType>
1255  {
1256  double res = 0, intens;
1257  for (UInt i = 0; i < scan.size(); ++i)
1258  {
1259  if (scan.getTransIntensity(i) >= 0)
1260  {
1261  intens = scan.getTransIntensity(i);
1262  res += (intens - mean) * (intens - mean);
1263  }
1264  }
1265  return sqrt(res / (double)(scan.size() - 1));
1266  }
1267 
1268  template <typename PeakType>
1270  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1271  {
1272  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1273 
1274  typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1275  typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1276 
1277  if (lower_iter != open_boxes_.end())
1278  {
1279  //Ugly, but necessary due to the implementation of STL lower_bound
1280  if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1281  {
1282  --lower_iter;
1283  }
1284  }
1285 
1286  typename std::multimap<double, Box>::iterator insert_iter;
1287  bool create_new_box = true;
1288  if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1289  {
1290  //There is another special case to be considered here:
1291  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1292  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1293  if (!open_boxes_.empty())
1294  {
1295  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1296  {
1297  create_new_box = false;
1298  insert_iter = lower_iter;
1299  }
1300  }
1301  else
1302  {
1303  create_new_box = true;
1304  }
1305  }
1306  else
1307  {
1308  if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1309  {
1310  insert_iter = lower_iter;
1311  create_new_box = false;
1312  }
1313  else
1314  {
1315  create_new_box = true;
1316  }
1317  }
1318 
1319 
1320  if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1321  {
1322  //Here is the question if you should figure out the smallest charge .... and then
1323 
1324  //Figure out which entry is closer to m/z
1325  double dist_lower = fabs(lower_iter->first - mz);
1326  double dist_upper = fabs(upper_iter->first - mz);
1327  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1328  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1329 
1330  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1331  {
1332  create_new_box = true;
1333  }
1334  else
1335  {
1336  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1337  create_new_box = false;
1338  }
1339  }
1340 
1341  BoxElement element;
1342  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1343  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1344 
1345 
1346  if (create_new_box == false)
1347  {
1348  std::pair<UInt, BoxElement> help2(scan, element);
1349  insert_iter->second.insert(help2);
1350 
1351  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1352  Box replacement(insert_iter->second);
1353 
1354  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1355  //Also note that we already inserted the new entry, leading to size-1.
1356  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1357  c_mz /= ((double) insert_iter->second.size());
1358 
1359  //Now let's remove the old and insert the new one
1360  open_boxes_.erase(insert_iter);
1361  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1362  open_boxes_.insert(help3);
1363  }
1364  else
1365  {
1366  std::pair<UInt, BoxElement> help2(scan, element);
1367  std::multimap<UInt, BoxElement> help3;
1368  help3.insert(help2);
1369  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1370  open_boxes_.insert(help4);
1371  }
1372  }
1373 
1374  template <typename PeakType>
1376  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1377  {
1378  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1379 
1380  std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1381  typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1382  typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1383 
1384  if (lower_iter != tmp_box.end())
1385  {
1386  //Ugly, but necessary due to the implementation of STL lower_bound
1387  if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1388  {
1389  --lower_iter;
1390  }
1391  }
1392 
1393  typename std::multimap<double, Box>::iterator insert_iter;
1394  bool create_new_box = true;
1395  if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1396  {
1397  //There is another special case to be considered here:
1398  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1399  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1400  if (!tmp_box.empty())
1401  {
1402  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1403  {
1404  create_new_box = false;
1405  insert_iter = lower_iter;
1406  }
1407  }
1408  else
1409  {
1410  create_new_box = true;
1411  }
1412  }
1413  else
1414  {
1415  if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1416  {
1417  insert_iter = lower_iter;
1418  create_new_box = false;
1419  }
1420  else
1421  {
1422  create_new_box = true;
1423  }
1424  }
1425 
1426 
1427  if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1428  {
1429  //Figure out which entry is closer to m/z
1430  double dist_lower = fabs(lower_iter->first - mz);
1431  double dist_upper = fabs(upper_iter->first - mz);
1432  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1433  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1434 
1435  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1436  {
1437  create_new_box = true;
1438  }
1439  else
1440  {
1441  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1442  create_new_box = false;
1443  }
1444  }
1445 
1446  BoxElement element;
1447  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1448  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1449 
1450  if (create_new_box == false)
1451  {
1452  std::pair<UInt, BoxElement> help2(scan, element);
1453  insert_iter->second.insert(help2);
1454 
1455  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1456  Box replacement(insert_iter->second);
1457 
1458  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1459  //Also note that we already inserted the new entry, leading to size-1.
1460  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1461  c_mz /= ((double) insert_iter->second.size());
1462 
1463  //Now let's remove the old and insert the new one
1464  tmp_box.erase(insert_iter);
1465  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1466  tmp_box.insert(help3);
1467  }
1468  else
1469  {
1470  std::pair<UInt, BoxElement> help2(scan, element);
1471  std::multimap<UInt, BoxElement> help3;
1472  help3.insert(help2);
1473 
1474  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1475  tmp_box.insert(help4);
1476  }
1477  }
1478 
1479  template <typename PeakType>
1480  void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1481  const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1482  {
1483  typename std::multimap<double, Box>::iterator iter, iter2;
1484 
1485  if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1486  {
1487  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1488  {
1489 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1490  std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1491  typename Box::iterator dings;
1492  for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1493  std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1494 #endif
1495  end_boxes_.insert(*iter);
1496  }
1497  open_boxes_.clear();
1498  return;
1499  }
1500 
1501  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1502  {
1503 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1504  if (front_bound > 0)
1505  {
1506  std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1507  }
1508 #endif
1509 
1510  //For each Box we need to figure out, if and when the last RT value has been inserted
1511  UInt lastScan = (--(iter->second.end()))->first;
1512  if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1513  {
1514  if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1515  {
1516  iter2 = iter;
1517  ++iter2;
1518 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1519  std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1520 #endif
1521  front_boxes_.insert(*iter);
1522  open_boxes_.erase(iter);
1523  iter = iter2;
1524  continue;
1525  }
1526 
1527  iter2 = iter;
1528  ++iter2;
1529  //Please do **NOT** simplify the upcoming lines.
1530  //The 'obvious' overhead is necessary since the object represented by iter might be erased
1531  //by push2Box which might be called by extendBox_.
1532  if (iter->second.size() >= RT_votes_cutoff)
1533  {
1534  //extendBox_ (map, iter->second);
1535  iter = iter2;
1536  closed_boxes_.insert(*(--iter));
1537  }
1538  open_boxes_.erase(iter);
1539  iter = iter2;
1540  }
1541  else
1542  {
1543  ++iter;
1544  }
1545  }
1546  }
1547 
1548  template <typename PeakType>
1550  {
1551 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1552  std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1553 #endif
1554 
1555  //Determining the elution profile
1556  typename Box::const_iterator iter;
1557  std::vector<double> elution_profile(box.size());
1558  UInt index = 0;
1559  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1560  {
1561  for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1562  {
1563  elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1564  }
1565  elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1566  }
1567 
1568  double max = 0;
1569  Int max_index = INT_MIN;
1570  for (Size i = 0; i < elution_profile.size(); ++i)
1571  {
1572  if (elution_profile[i] > max)
1573  {
1574  max_index = i;
1575  max = elution_profile[i];
1576  }
1577  }
1578 
1579  Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1580 
1581  double av_elution = 0;
1582  for (Size i = 0; i < elution_profile.size(); ++i)
1583  {
1584  av_elution += elution_profile[i];
1585  }
1586  av_elution /= (double)elution_profile.size();
1587 
1588  double sd_elution = 0;
1589  for (Size i = 0; i < elution_profile.size(); ++i)
1590  {
1591  sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1592  }
1593  sd_elution /= (double)(elution_profile.size() - 1);
1594  sd_elution = sqrt(sd_elution);
1595 
1596  //Determine average m/z monoisotopic pos
1597  double av_mz = 0;
1598  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1599  {
1600  av_mz += iter->second.mz;
1601 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1602  std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1603 #endif
1604  }
1605  av_mz /= (double)box.size();
1606 
1607 
1608  //Boundary check
1609  if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1610  {
1611  return;
1612  }
1613 
1614  UInt pre_index = box.begin()->second.RT_index - 1;
1615  typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1616  double pre_elution = 0;
1617 
1618  double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1619  double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1620 
1621  typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1622  for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1623  {
1624  pre_elution += mz_iter->getIntensity();
1625  }
1626 
1627 
1628  //Do we need to extend at all?
1629  if (pre_elution <= av_elution - 2 * sd_elution)
1630  {
1631  return;
1632  }
1633 
1634  Int c_index = max_extension;
1635  Int first_index = box.begin()->second.RT_index;
1636  for (Int i = 1; i < max_extension; ++i)
1637  {
1638  c_index = first_index - i;
1639  if (c_index < 0)
1640  {
1641  break;
1642  }
1643 
1644  //CHECK Majority vote for charge???????????????
1645 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1646  std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1647 #endif
1648 
1649  push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1650  map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1651  }
1652  }
1653 
1654  template <typename PeakType>
1656  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1657  {
1658  typename std::multimap<double, Box>::iterator iter;
1659  typename Box::iterator box_iter;
1660  std::vector<BoxElement> final_box;
1661  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1662  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1663 
1664  typename std::pair<double, double> c_extend;
1665  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1666  {
1667  Box& c_box = iter->second;
1668  av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1669  virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1670 
1671  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1672  {
1673  if (box_iter->second.score == 0) //virtual helping point
1674  {
1675  if (count != 0)
1676  continue; //it is in any way not pure virtual
1677 
1678  c_mz = box_iter->second.mz;
1679  virtual_av_intens += box_iter->second.intens;
1680  virtual_av_abs_intens += fabs(box_iter->second.intens);
1681  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1682  ++virtual_count;
1683  }
1684  else
1685  {
1686  c_mz = box_iter->second.mz;
1687  av_score += box_iter->second.score;
1688  av_intens += box_iter->second.intens;
1689  av_abs_intens += fabs(box_iter->second.intens);
1690  av_mz += c_mz * fabs(box_iter->second.intens);
1691  ++count;
1692  }
1693  }
1694 
1695  if (count == 0) //pure virtual helping box
1696  {
1697  av_intens = virtual_av_intens / virtual_count;
1698  av_score = 0;
1699  av_mz = virtual_av_mz / virtual_av_abs_intens;
1700  }
1701  else
1702  {
1703  av_intens /= count;
1704  av_score /= count;
1705  av_mz /= av_abs_intens;
1706  }
1707 
1708  BoxElement c_box_element;
1709  c_box_element.mz = av_mz;
1710  c_box_element.c = c;
1711  c_box_element.score = av_score;
1712  c_box_element.intens = av_intens;
1713 
1714  c_box_element.RT = c_box.begin()->second.RT;
1715 
1716  final_box.push_back(c_box_element);
1717  }
1718 
1719  UInt num_o_feature = final_box.size();
1720  if (num_o_feature == 0)
1721  {
1722  tmp_boxes_->at(c).clear();
1723  return;
1724  }
1725 
1726  //Computing the derivatives
1727  std::vector<double> bwd_diffs(num_o_feature, 0);
1728 
1729  bwd_diffs[0] = 0;
1730  for (UInt i = 1; i < num_o_feature; ++i)
1731  {
1732  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1733  }
1734 
1735 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1736  std::ofstream ofile_bwd("bwd_gpu.dat");
1737  for (UInt i = 0; i < num_o_feature; ++i)
1738  {
1739  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1740  }
1741  ofile_bwd.close();
1742 #endif
1743 
1744  for (UInt i = 0; i < num_o_feature - 1; ++i)
1745  {
1746  while (i < num_o_feature - 2)
1747  {
1748  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1749  break;
1750  ++i;
1751  }
1752 
1753  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1754  {
1755  checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1756  continue;
1757  }
1758  }
1759 
1760  tmp_boxes_->at(c).clear();
1761  }
1762 
1763  template <typename PeakType>
1765  {
1766  FeatureMap feature_map;
1767  typename std::multimap<double, Box>::iterator iter;
1768  typename Box::iterator box_iter;
1769  UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1770  double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1771  bool restart = false;
1772 
1773  typename std::pair<double, double> c_extend;
1774  for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1775  {
1776  sum_of_ref_intenses_g = 0;
1777  Box& c_box = iter->second;
1778  std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1779  restart = false;
1780 
1781  //Let's first determine the charge
1782  //Therefor, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1783  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1784  {
1785 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1786  if (trunc(box_iter->second.mz) == 874)
1787  std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1788 #endif
1789 
1790  if (box_iter->second.score == -1000)
1791  {
1792  restart = true;
1793  break;
1794  }
1795 
1796  charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1797  ++charge_binary_votes[box_iter->second.c];
1798  }
1799 
1800  if (restart)
1801  {
1802  continue;
1803  }
1804 
1805  //... determining the best fitting charge
1806  best_charge_index = 0; best_charge_score = 0;
1807  for (UInt i = 0; i < max_charge_; ++i)
1808  {
1809  if (charge_votes[i] > best_charge_score)
1810  {
1811  best_charge_index = i;
1812  best_charge_score = charge_votes[i];
1813  }
1814  }
1815 
1816  //Pattern found in too few RT scan
1817  if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1818  {
1819  continue;
1820  }
1821 
1822  c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1823 
1824  av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0;
1825  //Now, let's get the RT boundaries for the box
1826  std::vector<DPosition<2> > point_set;
1827  double sum_of_ref_intenses_l;
1828  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1829  {
1830  sum_of_ref_intenses_l = 0;
1831  c_mz = box_iter->second.mz;
1832  c_RT = box_iter->second.RT;
1833 
1834  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1835 
1836  point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1837  //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1838  point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1839 
1840 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1841  std::cout << "Intenstype: " << intenstype_ << std::endl;
1842 #endif
1843  if (intenstype_ == "ref")
1844  {
1845  //Find monoisotopic max
1846  const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1847  //'Correct' possible shift
1848  for (unsigned int i = 0; i < mz_cutoff; ++i)
1849  {
1850  typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1851 
1852  hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1853 
1854  while (h_iter != c_spec.begin())
1855  {
1856 
1857 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1858  if (trunc(c_mz) == 874)
1859  {
1860  std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1861  }
1862 #endif
1863 
1864  --h_iter;
1865  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1866  {
1867  hc_iter = h_iter;
1868  }
1869 
1870  if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1871  {
1872  break;
1873  }
1874  }
1875 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1876  if (trunc(c_mz) == 874)
1877  {
1878  std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1879  }
1880 #endif
1881  sum_of_ref_intenses_l += hc_iter->getIntensity();
1882 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1883  if (trunc(c_mz) == 874)
1884  {
1885  std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1886  }
1887 #endif
1888  }
1889  }
1890 
1891  if (best_charge_index == box_iter->second.c)
1892  {
1893  av_score += box_iter->second.score;
1894  av_intens += box_iter->second.intens;
1895  av_ref_intens += box_iter->second.ref_intens;
1896  sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1897  av_mz += c_mz * box_iter->second.intens;
1898  }
1899  av_RT += c_RT;
1900  }
1901 
1902  av_mz /= av_intens;
1903  av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1904  av_score /= (double)charge_binary_votes[best_charge_index];
1905  av_RT /= (double)c_box.size();
1906 
1907 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1908  if (trunc(av_mz) == 874)
1909  std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1910 #endif
1911 
1912  Feature c_feature;
1913  ConvexHull2D c_conv_hull;
1914  c_conv_hull.addPoints(point_set);
1915  c_feature.setCharge(c_charge);
1916  c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1917 
1918  //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1919  if (intenstype_ == "corrected")
1920  {
1921  double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1922  av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0, 2 * lambda);
1923  }
1924  if (intenstype_ == "ref")
1925  {
1926 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1927  if (trunc(c_mz) == 874)
1928  {
1929  std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1930  }
1931 #endif
1932 
1933  av_intens = sum_of_ref_intenses_g;
1934  }
1935 
1936  c_feature.setMZ(av_mz);
1937  c_feature.setIntensity(av_intens);
1938  c_feature.setRT(av_RT);
1939  c_feature.setOverallQuality(av_score);
1940  feature_map.push_back(c_feature);
1941  }
1942 
1943  return feature_map;
1944  }
1945 
1946  template <typename PeakType>
1948  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1949  {
1950  typename MSSpectrum::const_iterator iter, ref_iter;
1951  UInt peak_cutoff;
1952  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1953 
1954  iter = candidate.MZBegin(seed_mz);
1955  //we can ignore those cases
1956  if (iter == candidate.begin() || iter == candidate.end())
1957  {
1958  return false;
1959  }
1960 
1961  std::pair<double, double> reals;
1962  ref_iter = ref.MZBegin(seed_mz);
1963  //Correct the position
1964  double real_mz, real_intens;
1965  if (check_PPMs)
1966  {
1967  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1968  real_mz = reals.first, real_intens = reals.second;
1969  //if (real_mz <= 0 || real_intens <= 0)
1970  //{
1971  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1972  while (h_iter != ref.begin())
1973  {
1974  --h_iter;
1975  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1976  {
1977  hc_iter = h_iter;
1978  }
1979  else
1980  {
1981  break;
1982  }
1983 
1984  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1985  {
1986  return false;
1987  }
1988  }
1989  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1990  real_mz = reals.first, real_intens = reals.second;
1991  if (real_mz <= 0 || real_intens <= 0)
1992  {
1993  return false;
1994  }
1995  real_mz = h_iter->getMZ();
1996  real_intens = h_iter->getIntensity();
1997  //}
1998  }
1999  else
2000  {
2001  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2002  real_mz = reals.first, real_intens = reals.second;
2003 
2004  if (real_mz <= 0 || real_intens <= 0)
2005  {
2006  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2007  while (h_iter != ref.begin())
2008  {
2009  --h_iter;
2010  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2011  {
2012  hc_iter = h_iter;
2013  }
2014  else
2015  {
2016  break;
2017  }
2018 
2019  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2020  {
2021  return false;
2022  }
2023  }
2024  real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2025  if (real_mz <= 0 || real_intens <= 0)
2026  {
2027  return false;
2028  }
2029  real_mz = h_iter->getMZ();
2030  real_intens = h_iter->getIntensity();
2031  }
2032  }
2033 
2034  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2035 
2036  if (c_score <= 0)
2037  {
2038  return false;
2039  }
2040 
2041  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2042  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2043  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2044  if (real_r_MZ_iter == ref.end())
2045  {
2046  --real_r_MZ_iter;
2047  }
2048 
2049 
2050  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2051  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2052 
2053  if (prev_score == -1000)
2054  {
2055  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2056  }
2057  else
2058  {
2059  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2060  }
2061  return true;
2062  }
2063 
2064  template <typename PeakType>
2066  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
2067  {
2068  typename MSSpectrum::const_iterator iter, ref_iter;
2069  UInt peak_cutoff;
2070  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2071 
2072  iter = candidate.MZBegin(seed_mz);
2073  //we can ignore those cases
2074  if (iter == candidate.begin() || iter == candidate.end())
2075  {
2076  return false;
2077  }
2078 
2079  std::pair<double, double> reals;
2080  ref_iter = ref.MZBegin(seed_mz);
2081  //Correct the position
2082  double real_mz, real_intens;
2083  if (check_PPMs)
2084  {
2085  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2086  real_mz = reals.first, real_intens = reals.second;
2087  //if (real_mz <= 0 || real_intens <= 0)
2088  //{
2089  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2090  while (h_iter != ref.begin())
2091  {
2092  --h_iter;
2093  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2094  {
2095  hc_iter = h_iter;
2096  }
2097  else
2098  {
2099  break;
2100  }
2101 
2102  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2103  {
2104  return false;
2105  }
2106  }
2107  ++h_iter;
2108  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2109  real_mz = reals.first, real_intens = reals.second;
2110 
2111 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2112  std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2113 #endif
2114 
2115  if (real_mz <= 0 || real_intens <= 0)
2116  {
2117  return false;
2118  }
2119  real_mz = h_iter->getMZ();
2120  real_intens = h_iter->getIntensity();
2121  //}
2122  }
2123  else
2124  {
2125  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2126  real_mz = reals.first, real_intens = reals.second;
2127 
2128  if (real_mz <= 0 || real_intens <= 0)
2129  {
2130  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
2131  while (h_iter != ref.begin())
2132  {
2133  --h_iter;
2134  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2135  {
2136  hc_iter = h_iter;
2137  }
2138  else
2139  {
2140  break;
2141  }
2142 
2143  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2144  {
2145  return false;
2146  }
2147  }
2148  real_mz = h_iter->getMZ(), real_intens = h_iter->getIntensity();
2149  if (real_mz <= 0 || real_intens <= 0)
2150  {
2151  return false;
2152  }
2153  real_mz = h_iter->getMZ();
2154  real_intens = h_iter->getIntensity();
2155  }
2156  }
2157 
2158  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2159 
2160  if (c_score <= 0)
2161  {
2162  return false;
2163  }
2164 
2165  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2166  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2167  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2168  if (real_r_MZ_iter == ref.end())
2169  {
2170  --real_r_MZ_iter;
2171  }
2172 
2173 
2174  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2175  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2176 
2177  if (prev_score == -1000)
2178  {
2179  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2180  }
2181  else
2182  {
2183  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2184  }
2185 
2186  return true;
2187  }
2188 
2189  template <typename PeakType>
2190  std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2191  {
2192  double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2193  double ppms = getPPMs_(peptideMassRule_(mass), mass);
2195  {
2196 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2197  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2198 #endif
2199  return std::pair<double, double>(-1, -1);
2200  }
2201 
2202 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2203  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2204 #endif
2205  return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2206  }
2207 
2208 } //namespace
2209 
2210 #pragma clang diagnostic pop
2211 
2212 #endif
UInt MZ_end
Definition: IsotopeWaveletTransform.h:86
void extendBox_(const PeakMap &map, const Box box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1549
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:153
void mergeFeatures(IsotopeWaveletTransform< PeakType > *later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff)
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:135
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2190
double score
Definition: IsotopeWaveletTransform.h:80
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:525
virtual void destroy()
Definition: IsotopeWaveletTransform.h:120
A more convenient string class.
Definition: String.h:57
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:521
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:199
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:548
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:55
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:203
bool hr_data_
Definition: IsotopeWaveletTransform.h:526
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score...
Definition: IsotopeWaveletTransform.h:1008
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:185
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:69
double getSigma() const
Definition: IsotopeWaveletTransform.h:341
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:647
A container for features.
Definition: FeatureMap.h:94
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:752
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:521
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
double min_spacing_
Definition: IsotopeWaveletTransform.h:531
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:147
Iterator begin()
Definition: MSExperiment.h:162
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:135
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:518
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass...
Definition: IsotopeWaveletTransform.h:488
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:517
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually, you do not need to call this function. Please use.
Size size() const
Definition: MSExperiment.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double getRT() const
Definition: IsotopeWaveletTransform.h:129
IntensityType getIntensity() const
Definition: Peak2D.h:167
const MSSpectrum * reference_
Definition: IsotopeWaveletTransform.h:206
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling...
Definition: ConvexHull2D.h:73
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:95
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:135
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:522
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:536
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:173
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:604
static UInt getNumPeakCutOff(const double mass, const UInt z)
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:521
double sigma_
Definition: IsotopeWaveletTransform.h:520
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:521
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:346
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:528
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:521
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:215
double mz
Definition: IsotopeWaveletTransform.h:78
double RT
Definition: IsotopeWaveletTransform.h:83
UInt MZ_begin
Definition: IsotopeWaveletTransform.h:85
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:197
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1254
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:685
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:317
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:192
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:325
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:520
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:70
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:511
double ref_intens
Definition: IsotopeWaveletTransform.h:82
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1221
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1480
An LC-MS feature.
Definition: Feature.h:70
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:522
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:242
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:358
String intenstype_
Definition: IsotopeWaveletTransform.h:527
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:560
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1375
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:554
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:55
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:528
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:517
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:141
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:517
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1764
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:51
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:517
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:727
void setOverallQuality(QualityType q)
Set the overall quality.
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:531
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:532
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:108
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
UInt data_length_
Definition: IsotopeWaveletTransform.h:525
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1240
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:2065
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:171
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:542
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:353
TransSpectrum()
Definition: IsotopeWaveletTransform.h:102
Size size() const
Definition: IsotopeWaveletTransform.h:159
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:598
UInt c
Definition: IsotopeWaveletTransform.h:79
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:529
void setCharge(const ChargeType &ch)
Set charge state.
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:737
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:165
void addPoints(const PointArrayType &points)
std::vector< float > * trans_intens_
Definition: IsotopeWaveletTransform.h:207
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:115
UInt max_charge_
Definition: IsotopeWaveletTransform.h:525
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:89
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:336
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one...
Definition: IsotopeWaveletTransform.h:1269
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:71
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:56
UInt RT_index
Definition: IsotopeWaveletTransform.h:84
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:50
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:524
double getRT() const
int Int
Signed integer type.
Definition: Types.h:103
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:57
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:44
double intens
Definition: IsotopeWaveletTransform.h:81
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:532
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1655
Internally used data structure.
Definition: IsotopeWaveletTransform.h:76
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:178
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:52

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13