OpenMS  2.4.0
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
50 #include <vector>
51 
52 namespace OpenMS
53 {
54 
63  class OPENMS_DLLAPI SpectraMerger :
64  public DefaultParamHandler, public ProgressLogger
65  {
66 
67 protected:
68 
69  /* Determine distance between two spectra
70 
71  Distance is determined as
72 
73  (d_rt/rt_max_ + d_mz/mz_max_) / 2
74 
75  */
77  public DefaultParamHandler
78  {
79 public:
81  DefaultParamHandler("SpectraDistance")
82  {
83  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
84  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
85  defaultsToParam_();
86  }
87 
88  void updateMembers_() override
89  {
90  rt_max_ = (double) param_.getValue("rt_tolerance");
91  mz_max_ = (double) param_.getValue("mz_tolerance");
92 
93  return;
94  }
95 
96  double getSimilarity(const double d_rt, const double d_mz) const
97  {
98  // 1 - distance
99  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
100  }
101 
102  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
103  double operator()(const BaseFeature& first, const BaseFeature& second) const
104  {
105  // get RT distance:
106  double d_rt = fabs(first.getRT() - second.getRT());
107  double d_mz = fabs(first.getMZ() - second.getMZ());
108 
109  if (d_rt > rt_max_ || d_mz > mz_max_)
110  {
111  return 0;
112  }
113 
114  // calculate similarity (0-1):
115  double sim = getSimilarity(d_rt, d_mz);
116 
117  return sim;
118  }
119 
120 protected:
121  double rt_max_;
122  double mz_max_;
123 
124  }; // end of SpectraDistance
125 
126 public:
127 
130 
133 
134  // @name Constructors and Destructors
135  // @{
137  SpectraMerger();
138 
140  SpectraMerger(const SpectraMerger& source);
141 
143  ~SpectraMerger() override;
144  // @}
145 
146  // @name Operators
147  // @{
149  SpectraMerger& operator=(const SpectraMerger& source);
150  // @}
151 
152  // @name Merging functions
153  // @{
155  template <typename MapType>
157  {
158  IntList ms_levels = param_.getValue("block_method:ms_levels");
159  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
160  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
161 
162  if (rt_max_length == 0) // no rt restriction set?
163  {
164  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
165  }
166 
167  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
168  {
169  MergeBlocks spectra_to_merge;
170  Size idx_block(0);
171  SignedSize block_size_count(rt_block_size + 1);
172  Size idx_spectrum(0);
173  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
174  {
175  if (Int(it1->getMSLevel()) == *it_mslevel)
176  {
177  // block full if it contains a maximum number of scans or if maximum rt length spanned
178  if (++block_size_count >= rt_block_size ||
179  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
180  {
181  block_size_count = 0;
182  idx_block = idx_spectrum;
183  }
184  else
185  {
186  spectra_to_merge[idx_block].push_back(idx_spectrum);
187  }
188  }
189 
190  ++idx_spectrum;
191  }
192  // check if last block had sacrifice spectra
193  if (block_size_count == 0) //block just got initialized
194  {
195  spectra_to_merge[idx_block] = std::vector<Size>();
196  }
197 
198  // merge spectra, remove all old MS spectra and add new consensus spectra
199  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
200  }
201 
202  exp.sortSpectra();
203 
204  return;
205  }
206 
208  template <typename MapType>
210  {
211 
212  // convert spectra's precursors to clusterizable data
213  Size data_size;
214  std::vector<BinaryTreeNode> tree;
215  Map<Size, Size> index_mapping;
216  // local scope to save memory - we do not need the clustering stuff later
217  {
218  std::vector<BaseFeature> data;
219 
220  for (Size i = 0; i < exp.size(); ++i)
221  {
222  if (exp[i].getMSLevel() != 2)
223  {
224  continue;
225  }
226 
227  // remember which index in distance data ==> experiment index
228  index_mapping[data.size()] = i;
229 
230  // make cluster element
231  BaseFeature bf;
232  bf.setRT(exp[i].getRT());
233  std::vector<Precursor> pcs = exp[i].getPrecursors();
234  if (pcs.empty())
235  {
236  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
237  }
238  if (pcs.size() > 1)
239  {
240  LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
241  }
242  bf.setMZ(pcs[0].getMZ());
243  data.push_back(bf);
244  }
245  data_size = data.size();
246 
247  SpectraDistance_ llc;
248  llc.setParameters(param_.copy("precursor_method:", true));
249  SingleLinkage sl;
250  DistanceMatrix<float> dist; // will be filled
252 
253  //ch.setThreshold(0.99);
254  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
255  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
256  }
257 
258  // extract the clusters
259  ClusterAnalyzer ca;
260  std::vector<std::vector<Size> > clusters;
261  // count number of real tree nodes (not the -1 ones):
262  Size node_count = 0;
263  for (Size ii = 0; ii < tree.size(); ++ii)
264  {
265  if (tree[ii].distance >= 1)
266  {
267  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
268  }
269  if (tree[ii].distance != -1)
270  {
271  ++node_count;
272  }
273  }
274  ca.cut(data_size - node_count, tree, clusters);
275 
276  //std::cerr << "Treesize: " << (tree.size()+1) << " #clusters: " << clusters.size() << std::endl;
277  //std::cerr << "tree:\n" << ca.newickTree(tree, true) << "\n";
278 
279  // convert to blocks
280  MergeBlocks spectra_to_merge;
281 
282  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
283  {
284  if (clusters[i_outer].size() <= 1)
285  {
286  continue;
287  }
288  // init block with first cluster element
289  Size cl_index0 = clusters[i_outer][0];
290  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
291  // add all other elements
292  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
293  {
294  Size cl_index = clusters[i_outer][i_inner];
295  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
296  }
297  }
298 
299  // do it
300  mergeSpectra_(exp, spectra_to_merge, 2);
301 
302  exp.sortSpectra();
303 
304  return;
305  }
306 
313  template <typename MapType>
314  void average(MapType& exp, String average_type)
315  {
316  // MS level to be averaged
317  int ms_level = param_.getValue("average_gaussian:ms_level");
318  if (average_type == "tophat")
319  {
320  ms_level = param_.getValue("average_tophat:ms_level");
321  }
322 
323  // spectrum type (profile, centroid or automatic)
324  String spectrum_type = param_.getValue("average_gaussian:spectrum_type");
325  if (average_type == "tophat")
326  {
327  spectrum_type = param_.getValue("average_tophat:spectrum_type");
328  }
329 
330  // parameters for Gaussian averaging
331  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
332  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
333  double cutoff(param_.getValue("average_gaussian:cutoff"));
334 
335  // parameters for Top-Hat averaging
336  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
337  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
338  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
339  int range_scans = range;
340  if ((range_scans % 2) == 0)
341  {
342  ++range_scans;
343  }
344  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
345 
346  AverageBlocks spectra_to_average_over;
347 
348  // loop over RT
349  int n(0); // spectrum index
350  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
351  {
352  if (Int(it_rt->getMSLevel()) == ms_level)
353  {
354  int m; // spectrum index
355  int steps;
356  bool terminate_now;
357  typename MapType::const_iterator it_rt_2;
358 
359  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
360  steps = 0;
361  m = n + 1;
362  it_rt_2 = it_rt + 1;
363  terminate_now = false;
364  while (it_rt_2 != exp.end() && !terminate_now)
365  {
366  if (Int(it_rt_2->getMSLevel()) == ms_level)
367  {
368  double weight = 1;
369  if (average_type == "gaussian")
370  {
371  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
372  }
373  std::pair<Size, double> p(m, weight);
374  spectra_to_average_over[n].push_back(p);
375  ++steps;
376  }
377  if (average_type == "gaussian")
378  {
379  // Gaussian
380  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
381  }
382  else if (unit)
383  {
384  // Top-Hat with RT unit = scans
385  terminate_now = (steps > range_scans);
386  }
387  else
388  {
389  // Top-Hat with RT unit = seconds
390  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
391  }
392  ++m;
393  ++it_rt_2;
394  }
395 
396  // go backward
397  steps = 0;
398  m = n;
399  it_rt_2 = it_rt;
400  terminate_now = false;
401  while (it_rt_2 != exp.begin() && !terminate_now)
402  {
403  if (Int(it_rt_2->getMSLevel()) == ms_level)
404  {
405  double weight = 1;
406  if (average_type == "gaussian")
407  {
408  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
409  }
410  std::pair<Size, double> p(m, weight);
411  spectra_to_average_over[n].push_back(p);
412  ++steps;
413  }
414  if (average_type == "gaussian")
415  {
416  // Gaussian
417  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
418  }
419  else if (unit)
420  {
421  // Top-Hat with RT unit = scans
422  terminate_now = (steps > range_scans);
423  }
424  else
425  {
426  // Top-Hat with RT unit = seconds
427  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
428  }
429  --m;
430  --it_rt_2;
431  }
432 
433  }
434  ++n;
435  }
436 
437  // normalize weights
438  for (AverageBlocks::Iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
439  {
440  double sum(0.0);
441  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
442  {
443  sum += it2->second;
444  }
445 
446  for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
447  {
448  (*it2).second /= sum;
449  }
450  }
451 
452  // determine type of spectral data (profile or centroided)
454  if (spectrum_type == "automatic")
455  {
456  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
457  type = exp[idx].getType();
458  if (type == SpectrumSettings::UNKNOWN)
459  {
460  type = PeakTypeEstimator().estimateType(exp[idx].begin(), exp[idx].end());
461  }
462  }
463  else if (spectrum_type == "profile")
464  {
466  }
467  else if (spectrum_type == "centroid")
468  {
470  }
471 
472  // generate new spectra
473  if (type == SpectrumSettings::CENTROID)
474  {
475  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
476  }
477  else
478  {
479  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
480  }
481 
482  exp.sortSpectra();
483 
484  return;
485  }
486 
487  // @}
488 
489 protected:
490 
501  template <typename MapType>
502  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
503  {
504  double mz_binning_width(param_.getValue("mz_binning_width"));
505  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
506 
507  // merge spectra
508  MapType merged_spectra;
509 
510  Map<Size, Size> cluster_sizes;
511  std::set<Size> merged_indices;
512 
513  // set up alignment
514  SpectrumAlignment sas;
515  Param p;
516  p.setValue("tolerance", mz_binning_width);
517  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
518  {
519  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
520  }
521 
522  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
523  sas.setParameters(p);
524  std::vector<std::pair<Size, Size> > alignment;
525 
526  Size count_peaks_aligned(0);
527  Size count_peaks_overall(0);
528 
529  // each BLOCK
530  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
531  {
532  ++cluster_sizes[it->second.size() + 1]; // for stats
533 
534  typename MapType::SpectrumType consensus_spec = exp[it->first];
535  consensus_spec.setMSLevel(ms_level);
536 
537  //consensus_spec.unify(exp[it->first]); // append meta info
538  merged_indices.insert(it->first);
539 
540  //typename MapType::SpectrumType all_peaks = exp[it->first];
541  double rt_average = consensus_spec.getRT();
542  double precursor_mz_average = 0.0;
543  Size precursor_count(0);
544  if (!consensus_spec.getPrecursors().empty())
545  {
546  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
547  ++precursor_count;
548  }
549 
550  count_peaks_overall += consensus_spec.size();
551 
552  // block elements
553  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
554  {
555  consensus_spec.unify(exp[*sit]); // append meta info
556  merged_indices.insert(*sit);
557 
558  rt_average += exp[*sit].getRT();
559  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
560  {
561  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
562  ++precursor_count;
563  }
564 
565  // merge data points
566  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
567  //std::cerr << "alignment of " << it->first << " with " << *sit << " yielded " << alignment.size() << " common peaks!\n";
568  count_peaks_aligned += alignment.size();
569  count_peaks_overall += exp[*sit].size();
570 
571  Size align_index(0);
572  Size spec_b_index(0);
573 
574  // sanity check for number of peaks
575  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
576  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
577  {
578  if (alignment.size() == 0 || alignment[align_index].second != spec_b_index)
579  // ... add unaligned peak
580  {
581  consensus_spec.push_back(*pit);
582  }
583  // or add aligned peak height to ALL corresponding existing peaks
584  else
585  {
586  Size counter(0);
587  Size copy_of_align_index(align_index);
588 
589  while (alignment.size() > 0 &&
590  copy_of_align_index < alignment.size() &&
591  alignment[copy_of_align_index].second == spec_b_index)
592  {
593  ++copy_of_align_index;
594  ++counter;
595  } // Count the number of peaks in a which correspond to a single b peak.
596 
597  while (alignment.size() > 0 &&
598  align_index < alignment.size() &&
599  alignment[align_index].second == spec_b_index)
600  {
601  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
602  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
603  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
604  if (align_index == alignment.size())
605  {
606  alignment.clear(); // end reached -> avoid going into this block again
607  }
608  }
609  align_size = align_size + 1 - counter; //Decrease align_size by number of
610  }
611  ++spec_b_index;
612  }
613  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
614  if (spec_a + spec_b - align_size != consensus_spec.size())
615  {
616  LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
617  }
618  }
619  rt_average /= it->second.size() + 1;
620  consensus_spec.setRT(rt_average);
621 
622  if (ms_level >= 2)
623  {
624  if (precursor_count)
625  {
626  precursor_mz_average /= precursor_count;
627  }
628  std::vector<Precursor> pcs = consensus_spec.getPrecursors();
629  //if (pcs.size()>1) LOG_WARN << "Removing excessive precursors - leaving only one per MS2 spectrum.\n";
630  pcs.resize(1);
631  pcs[0].setMZ(precursor_mz_average);
632  consensus_spec.setPrecursors(pcs);
633  }
634 
635  if (consensus_spec.empty())
636  {
637  continue;
638  }
639  else
640  {
641  merged_spectra.addSpectrum(consensus_spec);
642  }
643  }
644 
645  LOG_INFO << "Cluster sizes:\n";
646  for (Map<Size, Size>::const_iterator it = cluster_sizes.begin(); it != cluster_sizes.end(); ++it)
647  {
648  LOG_INFO << " size " << it->first << ": " << it->second << "x\n";
649  }
650 
651  char buffer[200];
652  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
653  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
654  LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
655 
656  // remove all spectra that were within a cluster
657  typename MapType::SpectrumType empty_spec;
658  MapType exp_tmp;
659  for (Size i = 0; i < exp.size(); ++i)
660  {
661  if (merged_indices.count(i) == 0) // save unclustered ones
662  {
663  exp_tmp.addSpectrum(exp[i]);
664  exp[i] = empty_spec;
665  }
666  }
667 
668  //typedef std::vector<typename MapType::SpectrumType> Base;
669  //exp.Base::operator=(exp_tmp);
670  exp.clear(false);
671  exp.getSpectra().insert(exp.end(), exp_tmp.begin(), exp_tmp.end());
672 
673  // exp.erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<typename MapType::SpectrumType>(ListUtils::create<int>(String(ms_level)), false)), exp.end());
674 
675  // ... and add consensus spectra
676  exp.getSpectra().insert(exp.end(), merged_spectra.begin(), merged_spectra.end());
677 
678  }
679 
700  template <typename MapType>
701  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
702  {
703  MapType exp_tmp; // temporary experiment for averaged spectra
704 
705  double mz_binning_width(param_.getValue("mz_binning_width"));
706  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
707 
708  unsigned progress = 0;
709  std::stringstream progress_message;
710  progress_message << "averaging profile spectra of MS level " << ms_level;
711  startProgress(0, spectra_to_average_over.size(), progress_message.str());
712 
713  // loop over blocks
714  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
715  {
716  setProgress(++progress);
717 
718  // loop over spectra in blocks
719  std::vector<double> mz_positions_all; // m/z positions from all spectra
720  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
721  {
722  // loop over m/z positions
723  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
724  {
725  mz_positions_all.push_back(it_mz->getMZ());
726  }
727  }
728 
729  sort(mz_positions_all.begin(), mz_positions_all.end());
730 
731  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
732  std::vector<double> intensities;
733  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
734  double delta_mz(mz_binning_width); // for m/z unit Da
735  for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
736  {
737  if (mz_binning_unit == "ppm")
738  {
739  delta_mz = mz_binning_width * (*it_mz) / 1000000;
740  }
741 
742  if (((*it_mz) - last_mz) > delta_mz)
743  {
744  mz_positions.push_back(*it_mz);
745  intensities.push_back(0.0);
746  last_mz = *it_mz;
747  }
748  }
749 
750  // loop over spectra in blocks
751  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
752  {
753  SplineSpectrum spline(exp[it2->first]);
755 
756  // loop over m/z positions
757  for (Size i = 0; i < mz_positions.size(); ++i)
758  {
759  if ((spline.getMzMin() < mz_positions[i]) && (mz_positions[i] < spline.getMzMax()))
760  {
761  intensities[i] += nav.eval(mz_positions[i]) * (it2->second); // spline-interpolated intensity * weight
762  }
763  }
764  }
765 
766  // update spectrum
767  typename MapType::SpectrumType average_spec = exp[it->first];
768  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
769  //average_spec.setMSLevel(ms_level);
770 
771  // refill spectrum
772  for (Size i = 0; i < mz_positions.size(); ++i)
773  {
774  typename MapType::PeakType peak;
775  peak.setMZ(mz_positions[i]);
776  peak.setIntensity(intensities[i]);
777  average_spec.push_back(peak);
778  }
779 
780  // store spectrum temporarily
781  exp_tmp.addSpectrum(average_spec);
782  }
783 
784  endProgress();
785 
786  // loop over blocks
787  int n(0);
788  //typename MapType::SpectrumType empty_spec;
789  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
790  {
791  exp[it->first] = exp_tmp[n];
792  //exp_tmp[n] = empty_spec;
793  ++n;
794  }
795 
796  }
797 
813  template <typename MapType>
814  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
815  {
816  MapType exp_tmp; // temporary experiment for averaged spectra
817 
818  double mz_binning_width(param_.getValue("mz_binning_width"));
819  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
820 
821  unsigned progress = 0;
822  ProgressLogger logger;
823  std::stringstream progress_message;
824  progress_message << "averaging centroid spectra of MS level " << ms_level;
825  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
826 
827  // loop over blocks
828  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
829  {
830  logger.setProgress(++progress);
831 
832  // collect peaks from all spectra
833  // loop over spectra in blocks
834  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
835  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
836  {
837  // loop over m/z positions
838  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
839  {
840  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second)); // m/z, intensity * weight
841  mz_intensity_all.push_back(mz_intensity);
842  }
843  }
844 
845  sort(mz_intensity_all.begin(), mz_intensity_all.end(), SpectraMerger::compareByFirst);
846 
847  // generate new spectrum
848  std::vector<double> mz_new;
849  std::vector<double> intensity_new;
850  double last_mz = std::numeric_limits<double>::min();
851  double delta_mz = mz_binning_width;
852  double sum_mz(0);
853  double sum_intensity(0);
854  Size count(0);
855  for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
856  {
857  if (mz_binning_unit == "ppm")
858  {
859  delta_mz = mz_binning_width * (it_mz->first) / 1000000;
860  }
861 
862  if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
863  {
864  mz_new.push_back(sum_mz / count);
865  intensity_new.push_back(sum_intensity); // intensities already weighted
866 
867  sum_mz = 0;
868  sum_intensity = 0;
869 
870  last_mz = it_mz->first;
871  count = 0;
872  }
873 
874  sum_mz += it_mz->first;
875  sum_intensity += it_mz->second;
876  ++count;
877  }
878  if (count > 0)
879  {
880  mz_new.push_back(sum_mz / count);
881  intensity_new.push_back(sum_intensity); // intensities already weighted
882  }
883 
884  // update spectrum
885  typename MapType::SpectrumType average_spec = exp[it->first];
886  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
887  //average_spec.setMSLevel(ms_level);
888 
889  // refill spectrum
890  for (Size i = 0; i < mz_new.size(); ++i)
891  {
892  typename MapType::PeakType peak;
893  peak.setMZ(mz_new[i]);
894  peak.setIntensity(intensity_new[i]);
895  average_spec.push_back(peak);
896  }
897 
898  // store spectrum temporarily
899  exp_tmp.addSpectrum(average_spec);
900 
901  }
902 
903  logger.endProgress();
904 
905  // loop over blocks
906  int n(0);
907  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
908  {
909  exp[it->first] = exp_tmp[n];
910  ++n;
911  }
912 
913  }
914 
918  bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
919  {
920  return i.first < j.first;
921  }
922 
923  };
924 
925 }
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:132
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
Base::iterator Iterator
Definition: Map.h:80
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:121
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:454
void sortByPosition()
Lexicographically sorts the peaks by their position.
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode&#39;s)
Definition: ClusterAnalyzer.h:51
void endProgress() const
Ends the progress display.
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:70
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:918
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:502
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:67
double eval(double mz)
returns spline interpolated intensity at m/z (fast access since we can start search from lastPackage)...
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
profile data
Definition: SpectrumSettings.h:74
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:156
Iterator begin()
Definition: MSExperiment.h:157
static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end)
Estimates the peak type of the peaks in the iterator range based on intensity characteristics of up t...
Definition: PeakTypeEstimator.h:69
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:58
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:63
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
iterator class for access of spline packages
Definition: SplineSpectrum.h:101
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition: MSExperiment.h:125
SplineSpectrum::Navigator getNavigator()
returns an iterator for access of spline packages
Size size() const
Definition: MSExperiment.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setParameters(const Param &param)
Sets the parameters.
Unknown spectrum type.
Definition: SpectrumSettings.h:72
SpectraDistance_()
Definition: SpectraMerger.h:80
A basic LC-MS feature.
Definition: BaseFeature.h:55
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:450
Iterator end()
Definition: MSExperiment.h:167
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:88
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
double getMzMax() const
returns the maximum m/z of the spectrum
Data structure for spline interpolation of MS1 spectra.
Definition: SplineSpectrum.h:56
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:86
double rt_max_
Definition: SpectraMerger.h:121
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:103
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:96
void setProgress(SignedSize value) const
Sets the current progress.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void average(MapType &exp, String average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:314
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: SpectraMerger.h:76
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via &#39;tolerance&#39; parameter) ali...
Definition: SpectrumAlignment.h:65
Base::const_iterator ConstIterator
Definition: Map.h:81
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Estimates if the data of a spectrum is raw data or peak data.
Definition: PeakTypeEstimator.h:49
Management and storage of parameters / INI files.
Definition: Param.h:74
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:129
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:814
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Illegal self operation exception.
Definition: Exception.h:378
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
double mz_max_
Definition: SpectraMerger.h:122
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void clear(bool clear_meta_data)
Clears all data and meta data.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
double getMzMin() const
returns the minimum m/z of the spectrum
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:701
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:209
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:63
double getRT() const
centroid data or stick data
Definition: SpectrumSettings.h:73
int Int
Signed integer type.
Definition: Types.h:102
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
Not all required information provided.
Definition: Exception.h:195