Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #ifndef OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
36 #define OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
37 
51 #include <vector>
52 
53 namespace OpenMS
54 {
55 
64  class OPENMS_DLLAPI SpectraMerger :
65  public DefaultParamHandler, public ProgressLogger
66  {
67 
68 protected:
69 
70  /* Determine distance between two spectra
71 
72  Distance is determined as
73 
74  (d_rt/rt_max_ + d_mz/mz_max_) / 2
75 
76  */
78  public DefaultParamHandler
79  {
80 public:
82  DefaultParamHandler("SpectraDistance")
83  {
84  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
85  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
86  defaultsToParam_();
87  }
88 
90  {
91  rt_max_ = (double) param_.getValue("rt_tolerance");
92  mz_max_ = (double) param_.getValue("mz_tolerance");
93 
94  return;
95  }
96 
97  double getSimilarity(const double d_rt, const double d_mz) const
98  {
99  // 1 - distance
100  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
101  }
102 
103  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
104  double operator()(const BaseFeature& first, const BaseFeature& second) const
105  {
106  // get RT distance:
107  double d_rt = fabs(first.getRT() - second.getRT());
108  double d_mz = fabs(first.getMZ() - second.getMZ());
109 
110  if (d_rt > rt_max_ || d_mz > mz_max_) {return 0; }
111 
112  // calculate similarity (0-1):
113  double sim = getSimilarity(d_rt, d_mz);
114 
115  return sim;
116  }
117 
118 protected:
119  double rt_max_;
120  double mz_max_;
121 
122  }; // end of SpectraDistance
123 
124 public:
125 
128 
131 
132  // @name Constructors and Destructors
133  // @{
135  SpectraMerger();
136 
138  SpectraMerger(const SpectraMerger& source);
139 
141  virtual ~SpectraMerger();
142  // @}
143 
144  // @name Operators
145  // @{
147  SpectraMerger& operator=(const SpectraMerger& source);
148  // @}
149 
150  // @name Merging functions
151  // @{
153  template <typename MapType>
155  {
156  IntList ms_levels = param_.getValue("block_method:ms_levels");
157  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
158  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
159 
160  if (rt_max_length == 0) // no rt restriction set?
161  {
162  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
163  }
164 
165  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
166  {
167  MergeBlocks spectra_to_merge;
168  Size idx_block(0);
169  SignedSize block_size_count(rt_block_size + 1);
170  Size idx_spectrum(0);
171  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
172  {
173  if (Int(it1->getMSLevel()) == *it_mslevel)
174  {
175  // block full if it contains a maximum number of scans or if maximum rt length spanned
176  if (++block_size_count >= rt_block_size ||
177  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
178  {
179  block_size_count = 0;
180  idx_block = idx_spectrum;
181  }
182  else
183  {
184  spectra_to_merge[idx_block].push_back(idx_spectrum);
185  }
186  }
187 
188  ++idx_spectrum;
189  }
190  // check if last block had sacrifice spectra
191  if (block_size_count == 0) //block just got initialized
192  {
193  spectra_to_merge[idx_block] = std::vector<Size>();
194  }
195 
196  // merge spectra, remove all old MS spectra and add new consensus spectra
197  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
198  }
199 
200  exp.sortSpectra();
201 
202  return;
203  }
204 
206  template <typename MapType>
208  {
209 
210  // convert spectra's precursors to clusterizable data
211  Size data_size;
212  std::vector<BinaryTreeNode> tree;
213  Map<Size, Size> index_mapping;
214  // local scope to save memory - we do not need the clustering stuff later
215  {
216  std::vector<BaseFeature> data;
217 
218  for (Size i = 0; i < exp.size(); ++i)
219  {
220  if (exp[i].getMSLevel() != 2) continue;
221 
222  // remember which index in distance data ==> experiment index
223  index_mapping[data.size()] = i;
224 
225  // make cluster element
226  BaseFeature bf;
227  bf.setRT(exp[i].getRT());
228  std::vector<Precursor> pcs = exp[i].getPrecursors();
229  if (pcs.empty()) throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
230  if (pcs.size() > 1) LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
231  bf.setMZ(pcs[0].getMZ());
232  data.push_back(bf);
233  }
234  data_size = data.size();
235 
236  SpectraDistance_ llc;
237  llc.setParameters(param_.copy("precursor_method:", true));
238  SingleLinkage sl;
239  DistanceMatrix<float> dist; // will be filled
241 
242  //ch.setThreshold(0.99);
243  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
244  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
245  }
246 
247  // extract the clusters
248  ClusterAnalyzer ca;
249  std::vector<std::vector<Size> > clusters;
250  // count number of real tree nodes (not the -1 ones):
251  Size node_count = 0;
252  for (Size ii = 0; ii < tree.size(); ++ii)
253  {
254  if (tree[ii].distance >= 1) tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
255  if (tree[ii].distance != -1) ++node_count;
256  }
257  ca.cut(data_size - node_count, tree, clusters);
258 
259  //std::cerr << "Treesize: " << (tree.size()+1) << " #clusters: " << clusters.size() << std::endl;
260  //std::cerr << "tree:\n" << ca.newickTree(tree, true) << "\n";
261 
262  // convert to blocks
263  MergeBlocks spectra_to_merge;
264 
265  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
266  {
267  if (clusters[i_outer].size() <= 1) continue;
268  // init block with first cluster element
269  Size cl_index0 = clusters[i_outer][0];
270  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
271  // add all other elements
272  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
273  {
274  Size cl_index = clusters[i_outer][i_inner];
275  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
276  }
277  }
278 
279  // do it
280  mergeSpectra_(exp, spectra_to_merge, 2);
281 
282  exp.sortSpectra();
283 
284  return;
285  }
286 
293  template <typename MapType>
294  void average(MapType& exp, String average_type)
295  {
296  // MS level to be averaged
297  int ms_level = param_.getValue("average_gaussian:ms_level");
298  if (average_type == "tophat")
299  {
300  ms_level = param_.getValue("average_tophat:ms_level");
301  }
302 
303  // spectrum type (profile, centroid or automatic)
304  String spectrum_type = param_.getValue("average_gaussian:spectrum_type");
305  if (average_type == "tophat")
306  {
307  spectrum_type = param_.getValue("average_tophat:spectrum_type");
308  }
309 
310  // parameters for Gaussian averaging
311  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
312  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
313  double cutoff(param_.getValue("average_gaussian:cutoff"));
314 
315  // parameters for Top-Hat averaging
316  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
317  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
318  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
319  int range_scans = range;
320  if ((range_scans % 2) == 0)
321  {
322  ++range_scans;
323  }
324  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
325 
326  AverageBlocks spectra_to_average_over;
327 
328  // loop over RT
329  int n(0); // spectrum index
330  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
331  {
332  if (Int(it_rt->getMSLevel()) == ms_level)
333  {
334  int m; // spectrum index
335  int steps;
336  bool terminate_now;
337  typename MapType::const_iterator it_rt_2;
338 
339  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
340  steps = 0;
341  m = n + 1;
342  it_rt_2 = it_rt + 1;
343  terminate_now = false;
344  while (it_rt_2 != exp.end() && !terminate_now)
345  {
346  if (Int(it_rt_2->getMSLevel()) == ms_level)
347  {
348  double weight = 1;
349  if (average_type == "gaussian")
350  {
351  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
352  }
353  std::pair<Size, double> p(m, weight);
354  spectra_to_average_over[n].push_back(p);
355  ++steps;
356  }
357  if (average_type == "gaussian")
358  {
359  // Gaussian
360  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
361  }
362  else if (unit)
363  {
364  // Top-Hat with RT unit = scans
365  terminate_now = (steps > range_scans);
366  }
367  else
368  {
369  // Top-Hat with RT unit = seconds
370  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
371  }
372  ++m;
373  ++it_rt_2;
374  }
375 
376  // go backward
377  steps = 0;
378  m = n;
379  it_rt_2 = it_rt;
380  terminate_now = false;
381  while (it_rt_2 != exp.begin() && !terminate_now)
382  {
383  if (Int(it_rt_2->getMSLevel()) == ms_level)
384  {
385  double weight = 1;
386  if (average_type == "gaussian")
387  {
388  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
389  }
390  std::pair<Size, double> p(m, weight);
391  spectra_to_average_over[n].push_back(p);
392  ++steps;
393  }
394  if (average_type == "gaussian")
395  {
396  // Gaussian
397  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
398  }
399  else if (unit)
400  {
401  // Top-Hat with RT unit = scans
402  terminate_now = (steps > range_scans);
403  }
404  else
405  {
406  // Top-Hat with RT unit = seconds
407  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
408  }
409  --m;
410  --it_rt_2;
411  }
412 
413  }
414  ++n;
415  }
416 
417  // normalize weights
418  for (AverageBlocks::Iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
419  {
420  double sum(0.0);
421  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
422  {
423  sum += it2->second;
424  }
425 
426  for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
427  {
428  (*it2).second /= sum;
429  }
430  }
431 
432  // determine type of spectral data (profile or centroided)
434  if (spectrum_type == "automatic")
435  {
436  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
437  type = exp[idx].getType();
438  if (type == SpectrumSettings::UNKNOWN)
439  {
440  type = PeakTypeEstimator().estimateType(exp[idx].begin(), exp[idx].end());
441  }
442  }
443  else if (spectrum_type == "profile")
444  {
446  }
447  else if (spectrum_type == "centroid")
448  {
450  }
451 
452  // generate new spectra
453  if (type == SpectrumSettings::PEAKS)
454  {
455  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
456  }
457  else
458  {
459  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
460  }
461 
462  exp.sortSpectra();
463 
464  return;
465  }
466 
467  // @}
468 
469 protected:
470 
481  template <typename MapType>
482  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
483  {
484  double mz_binning_width(param_.getValue("mz_binning_width"));
485  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
486 
487  // merge spectra
488  MapType merged_spectra;
489 
490  Map<Size, Size> cluster_sizes;
491  std::set<Size> merged_indices;
492 
493  // set up alignment
494  SpectrumAlignment sas;
495  Param p;
496  p.setValue("tolerance", mz_binning_width);
497  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm")) throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
498  // TODO : SpectrumAlignment does not implement is_relative_tolerance
499  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
500  sas.setParameters(p);
501  std::vector<std::pair<Size, Size> > alignment;
502 
503  Size count_peaks_aligned(0);
504  Size count_peaks_overall(0);
505 
506  // each BLOCK
507  for (Map<Size, std::vector<Size> >::ConstIterator it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
508  {
509 
510  ++cluster_sizes[it->second.size() + 1]; // for stats
511 
512  typename MapType::SpectrumType consensus_spec = exp[it->first];
513  consensus_spec.setMSLevel(ms_level);
514 
515  //consensus_spec.unify(exp[it->first]); // append meta info
516  merged_indices.insert(it->first);
517 
518  //typename MapType::SpectrumType all_peaks = exp[it->first];
519  double rt_average = consensus_spec.getRT();
520  double precursor_mz_average = 0.0;
521  Size precursor_count(0);
522  if (!consensus_spec.getPrecursors().empty())
523  {
524  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
525  ++precursor_count;
526  }
527 
528  count_peaks_overall += consensus_spec.size();
529 
530  // block elements
531  for (std::vector<Size>::const_iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
532  {
533  consensus_spec.unify(exp[*sit]); // append meta info
534  merged_indices.insert(*sit);
535 
536  rt_average += exp[*sit].getRT();
537  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
538  {
539  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
540  ++precursor_count;
541  }
542 
543  // merge data points
544  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
545  //std::cerr << "alignment of " << it->first << " with " << *sit << " yielded " << alignment.size() << " common peaks!\n";
546  count_peaks_aligned += alignment.size();
547  count_peaks_overall += exp[*sit].size();
548 
549  Size align_index(0);
550  Size spec_b_index(0);
551 
552  // sanity check for number of peaks
553  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
554  for (typename MapType::SpectrumType::ConstIterator pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
555  {
556  // either add aligned peak height to existing peak
557  if (alignment.size() > 0 && alignment[align_index].second == spec_b_index)
558  {
559  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
560  pit->getIntensity());
561  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
562  if (align_index == alignment.size()) alignment.clear(); // end reached -> avoid going into this block again
563  }
564  else // ... or add unaligned peak
565  {
566  consensus_spec.push_back(*pit);
567  }
568  ++spec_b_index;
569  }
570  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
571  if (spec_a + spec_b - align_size != consensus_spec.size()) std::cerr << "\n\n ERRROR \n\n";
572  }
573  rt_average /= it->second.size() + 1;
574  consensus_spec.setRT(rt_average);
575 
576  if (ms_level >= 2)
577  {
578  if (precursor_count) precursor_mz_average /= precursor_count;
579  std::vector<Precursor> pcs = consensus_spec.getPrecursors();
580  //if (pcs.size()>1) LOG_WARN << "Removing excessive precursors - leaving only one per MS2 spectrum.\n";
581  pcs.resize(1);
582  pcs[0].setMZ(precursor_mz_average);
583  consensus_spec.setPrecursors(pcs);
584  }
585 
586  if (consensus_spec.empty()) continue;
587  else merged_spectra.addSpectrum(consensus_spec);
588  }
589 
590  LOG_INFO << "Cluster sizes:\n";
591  for (Map<Size, Size>::const_iterator it = cluster_sizes.begin(); it != cluster_sizes.end(); ++it)
592  {
593  LOG_INFO << " size " << it->first << ": " << it->second << "x\n";
594  }
595 
596  char buffer[200];
597  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
598  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
599  LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
600 
601  // remove all spectra that were within a cluster
602  typename MapType::SpectrumType empty_spec;
603  MapType exp_tmp;
604  for (Size i = 0; i < exp.size(); ++i)
605  {
606  if (merged_indices.count(i) == 0) // save unclustered ones
607  {
608  exp_tmp.addSpectrum(exp[i]);
609  exp[i] = empty_spec;
610  }
611  }
612 
613  //typedef std::vector<typename MapType::SpectrumType> Base;
614  //exp.Base::operator=(exp_tmp);
615  exp.clear(false);
616  exp.getSpectra().insert(exp.end(), exp_tmp.begin(), exp_tmp.end());
617 
618  // exp.erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<typename MapType::SpectrumType>(ListUtils::create<int>(String(ms_level)), false)), exp.end());
619 
620  // ... and add consensus spectra
621  exp.getSpectra().insert(exp.end(), merged_spectra.begin(), merged_spectra.end());
622 
623  }
624 
645  template <typename MapType>
646  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
647  {
648  MapType exp_tmp; // temporary experiment for averaged spectra
649 
650  double mz_binning_width(param_.getValue("mz_binning_width"));
651  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
652 
653  unsigned progress = 0;
654  std::stringstream progress_message;
655  progress_message << "averaging profile spectra of MS level " << ms_level;
656  startProgress(0, spectra_to_average_over.size(), progress_message.str());
657 
658  // loop over blocks
659  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
660  {
661  setProgress(++progress);
662 
663  // loop over spectra in blocks
664  std::vector<double> mz_positions_all; // m/z positions from all spectra
665  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
666  {
667  // loop over m/z positions
668  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
669  {
670  mz_positions_all.push_back(it_mz->getMZ());
671  }
672  }
673 
674  sort(mz_positions_all.begin(), mz_positions_all.end());
675 
676  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
677  std::vector<double> intensities;
678  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
679  double delta_mz(mz_binning_width); // for m/z unit Da
680  for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
681  {
682  if (mz_binning_unit == "ppm")
683  {
684  delta_mz = mz_binning_width * (*it_mz) / 1000000;
685  }
686 
687  if (((*it_mz) - last_mz) > delta_mz)
688  {
689  mz_positions.push_back(*it_mz);
690  intensities.push_back(0.0);
691  last_mz = *it_mz;
692  }
693  }
694 
695  // loop over spectra in blocks
696  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
697  {
698  SplineSpectrum spline(exp[it2->first]);
700 
701  // loop over m/z positions
702  for (Size i = 0; i < mz_positions.size(); ++i)
703  {
704  if ((spline.getMzMin() < mz_positions[i]) && (mz_positions[i] < spline.getMzMax()))
705  {
706  intensities[i] += nav.eval(mz_positions[i]) * (it2->second); // spline-interpolated intensity * weight
707  }
708  }
709  }
710 
711  // update spectrum
712  typename MapType::SpectrumType average_spec = exp[it->first];
713  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
714  //average_spec.setMSLevel(ms_level);
715 
716  // refill spectrum
717  for (Size i = 0; i < mz_positions.size(); ++i)
718  {
719  typename MapType::PeakType peak;
720  peak.setMZ(mz_positions[i]);
721  peak.setIntensity(intensities[i]);
722  average_spec.push_back(peak);
723  }
724 
725  // store spectrum temporarily
726  exp_tmp.addSpectrum(average_spec);
727  }
728 
729  endProgress();
730 
731  // loop over blocks
732  int n(0);
733  //typename MapType::SpectrumType empty_spec;
734  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
735  {
736  exp[it->first] = exp_tmp[n];
737  //exp_tmp[n] = empty_spec;
738  ++n;
739  }
740 
741  }
742 
758  template <typename MapType>
759  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
760  {
761  MapType exp_tmp; // temporary experiment for averaged spectra
762 
763  double mz_binning_width(param_.getValue("mz_binning_width"));
764  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
765 
766  unsigned progress = 0;
767  ProgressLogger logger;
768  std::stringstream progress_message;
769  progress_message << "averaging centroid spectra of MS level " << ms_level;
770  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
771 
772  // loop over blocks
773  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
774  {
775  logger.setProgress(++progress);
776 
777  // collect peaks from all spectra
778  // loop over spectra in blocks
779  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
780  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
781  {
782  // loop over m/z positions
783  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
784  {
785  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second)); // m/z, intensity * weight
786  mz_intensity_all.push_back(mz_intensity);
787  }
788  }
789 
790  sort(mz_intensity_all.begin(), mz_intensity_all.end(), SpectraMerger::compareByFirst);
791 
792  // generate new spectrum
793  std::vector<double> mz_new;
794  std::vector<double> intensity_new;
795  double last_mz = std::numeric_limits<double>::min();
796  double delta_mz = mz_binning_width;
797  double sum_mz(0);
798  double sum_intensity(0);
799  Size count(0);
800  for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
801  {
802  if (mz_binning_unit == "ppm")
803  {
804  delta_mz = mz_binning_width * (it_mz->first) / 1000000;
805  }
806 
807  if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
808  {
809  mz_new.push_back(sum_mz / count);
810  intensity_new.push_back(sum_intensity); // intensities already weighted
811 
812  sum_mz = 0;
813  sum_intensity = 0;
814 
815  last_mz = it_mz->first;
816  count = 0;
817  }
818 
819  sum_mz += it_mz->first;
820  sum_intensity += it_mz->second;
821  ++count;
822  }
823  if (count > 0)
824  {
825  mz_new.push_back(sum_mz / count);
826  intensity_new.push_back(sum_intensity); // intensities already weighted
827  }
828 
829  // update spectrum
830  typename MapType::SpectrumType average_spec = exp[it->first];
831  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
832  //average_spec.setMSLevel(ms_level);
833 
834  // refill spectrum
835  for (Size i = 0; i < mz_new.size(); ++i)
836  {
837  typename MapType::PeakType peak;
838  peak.setMZ(mz_new[i]);
839  peak.setIntensity(intensity_new[i]);
840  average_spec.push_back(peak);
841  }
842 
843  // store spectrum temporarily
844  exp_tmp.addSpectrum(average_spec);
845 
846  }
847 
848  logger.endProgress();
849 
850  // loop over blocks
851  int n(0);
852  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
853  {
854  exp[it->first] = exp_tmp[n];
855  ++n;
856  }
857 
858  }
859 
863  bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
864  {
865  return i.first < j.first;
866  }
867 
868  };
869 
870 }
871 #endif //OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:130
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
Base::iterator Iterator
Definition: Map.h:81
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:122
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:203
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:455
void sortByPosition()
Lexicographically sorts the peaks by their position.
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Definition: MSExperiment.h:831
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode&#39;s)
Definition: ClusterAnalyzer.h:52
void endProgress() const
Ends the progress display.
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:863
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:482
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
double eval(double mz)
returns spline interpolated intensity at m/z (fast access since we can start search from lastPackage)...
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:154
Iterator begin()
Definition: MSExperiment.h:162
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:59
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:135
iterator class for access of spline packages
Definition: SplineSpectrum.h:102
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Definition: MSExperiment.h:649
Base::const_iterator const_iterator
Definition: MSExperiment.h:130
Raw data (also called profile data)
Definition: SpectrumSettings.h:75
SplineSpectrum::Navigator getNavigator()
returns an iterator for access of spline packages
Size size() const
Definition: MSExperiment.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:120
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:111
void setParameters(const Param &param)
Sets the parameters.
Unknown spectrum type.
Definition: SpectrumSettings.h:73
SpectraDistance_()
Definition: SpectraMerger.h:81
SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end) const
Estimates the peak type of the peaks in the iterator range based on the variance of inter-peak distan...
Definition: PeakTypeEstimator.h:59
A basic LC-MS feature.
Definition: BaseFeature.h:56
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
Iterator end()
Definition: MSExperiment.h:172
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
double getMzMax() const
returns the maximum m/z of the spectrum
Data structure for spline interpolation of MS1 spectra.
Definition: SplineSpectrum.h:57
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:215
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:197
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:87
double rt_max_
Definition: SpectraMerger.h:119
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:104
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:97
void setProgress(SignedSize value) const
Sets the current progress.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void average(MapType &exp, String average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:294
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: SpectraMerger.h:77
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via &#39;tolerance&#39; parameter) ali...
Definition: SpectrumAlignment.h:66
Base::const_iterator ConstIterator
Definition: Map.h:82
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Estimates if the data of a spectrum is raw data or peak data.
Definition: PeakTypeEstimator.h:50
Management and storage of parameters / INI files.
Definition: Param.h:75
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:127
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:759
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:209
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:58
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Illegal self operation exception.
Definition: Exception.h:379
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:118
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
double mz_max_
Definition: SpectraMerger.h:120
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:929
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
double getMzMin() const
returns the minimum m/z of the spectrum
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:646
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:207
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Definition: MSExperiment.h:837
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
double getRT() const
int Int
Signed integer type.
Definition: Types.h:103
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:51
void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:89
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:108
Not all required information provided.
Definition: Exception.h:196

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:04 using doxygen 1.8.13