OpenMS
SpectraMerger.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
7 // --------------------------------------------------------------------------
8 //
9 #pragma once
10 
20 
21 #include <vector>
22 
23 namespace OpenMS
24 {
25 
34  class OPENMS_DLLAPI SpectraMerger :
35  public DefaultParamHandler, public ProgressLogger
36  {
37 
38 protected:
39 
40  /* Determine distance between two spectra
41 
42  Distance is determined as
43 
44  (d_rt/rt_max_ + d_mz/mz_max_) / 2
45 
46  */
48  public DefaultParamHandler
49  {
50 public:
52  DefaultParamHandler("SpectraDistance")
53  {
54  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
55  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
56  defaultsToParam_(); // calls updateMembers_
57  }
58 
59  void updateMembers_() override
60  {
61  rt_max_ = (double) param_.getValue("rt_tolerance");
62  mz_max_ = (double) param_.getValue("mz_tolerance");
63  }
64 
65  double getSimilarity(const double d_rt, const double d_mz) const
66  {
67  // 1 - distance
68  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
69  }
70 
71  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
72  double operator()(const BaseFeature& first, const BaseFeature& second) const
73  {
74  // get RT distance:
75  double d_rt = fabs(first.getRT() - second.getRT());
76  double d_mz = fabs(first.getMZ() - second.getMZ());
77 
78  if (d_rt > rt_max_ || d_mz > mz_max_)
79  {
80  return 0;
81  }
82 
83  // calculate similarity (0-1):
84  double sim = getSimilarity(d_rt, d_mz);
85 
86  return sim;
87  }
88 
89 protected:
90  double rt_max_;
91  double mz_max_;
92 
93  }; // end of SpectraDistance
94 
95 public:
96 
98  typedef std::map<Size, std::vector<Size> > MergeBlocks;
99 
101  typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
102 
103  // @name Constructors and Destructors
104  // @{
107 
109  SpectraMerger(const SpectraMerger& source);
110 
112  SpectraMerger(SpectraMerger&& source) = default;
113 
115  ~SpectraMerger() override;
116  // @}
117 
118  // @name Operators
119  // @{
122 
124  SpectraMerger& operator=(SpectraMerger&& source) = default;
125  // @}
126 
127  // @name Merging functions
128  // @{
129 
132  template <typename MapType>
134  {
135  IntList ms_levels = param_.getValue("block_method:ms_levels");
136  // just checking negative values
137  if ((Int)param_.getValue("block_method:rt_block_size") < 1)
138  {
139  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "The parameter 'block_method:rt_block_size' must be greater than 0.");
140  }
141  // now actually using an UNSIGNED int, so we can increase it by 1 even if the value is INT_MAX without overflow
142  UInt rt_block_size(param_.getValue("block_method:rt_block_size"));
143  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
144 
145  if (rt_max_length == 0) // no rt restriction set?
146  {
147  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
148  }
149 
150  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
151  {
152  MergeBlocks spectra_to_merge;
153  Size idx_block(0);
154  UInt block_size_count(rt_block_size + 1);
155  Size idx_spectrum(0);
156  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
157  {
158  if (Int(it1->getMSLevel()) == *it_mslevel)
159  {
160  // block full if it contains a maximum number of scans or if maximum rt length spanned
161  if (++block_size_count >= rt_block_size ||
162  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
163  {
164  block_size_count = 0;
165  idx_block = idx_spectrum;
166  }
167  else
168  {
169  spectra_to_merge[idx_block].push_back(idx_spectrum);
170  }
171  }
172 
173  ++idx_spectrum;
174  }
175  // check if last block had sacrifice spectra
176  if (block_size_count == 0) //block just got initialized
177  {
178  spectra_to_merge[idx_block] = std::vector<Size>();
179  }
180 
181  // merge spectra, remove all old MS spectra and add new consensus spectra
182  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
183  }
184 
185  exp.sortSpectra();
186  }
187 
189  template <typename MapType>
191  {
192 
193  // convert spectra's precursors to clusterizable data
194  Size data_size;
195  std::vector<BinaryTreeNode> tree;
196  std::map<Size, Size> index_mapping;
197  // local scope to save memory - we do not need the clustering stuff later
198  {
199  std::vector<BaseFeature> data;
200 
201  for (Size i = 0; i < exp.size(); ++i)
202  {
203  if (exp[i].getMSLevel() != 2)
204  {
205  continue;
206  }
207 
208  // remember which index in distance data ==> experiment index
209  index_mapping[data.size()] = i;
210 
211  // make cluster element
212  BaseFeature bf;
213  bf.setRT(exp[i].getRT());
214  const auto& pcs = exp[i].getPrecursors();
215  // keep the first Precursor
216  if (pcs.empty())
217  {
218  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
219  }
220  if (pcs.size() > 1)
221  {
222  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
223  }
224  bf.setMZ(pcs[0].getMZ());
225  data.push_back(bf);
226  }
227  data_size = data.size();
228 
229  SpectraDistance_ llc;
230  llc.setParameters(param_.copy("precursor_method:", true));
231  SingleLinkage sl;
232  DistanceMatrix<float> dist; // will be filled
234 
235  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
236  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
237  }
238 
239  // extract the clusters
240  ClusterAnalyzer ca;
241  std::vector<std::vector<Size> > clusters;
242  // count number of real tree nodes (not the -1 ones):
243  Size node_count = 0;
244  for (Size ii = 0; ii < tree.size(); ++ii)
245  {
246  if (tree[ii].distance >= 1)
247  {
248  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
249  }
250  if (tree[ii].distance != -1)
251  {
252  ++node_count;
253  }
254  }
255  ca.cut(data_size - node_count, tree, clusters);
256 
257  // convert to blocks
258  MergeBlocks spectra_to_merge;
259 
260  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
261  {
262  if (clusters[i_outer].size() <= 1)
263  {
264  continue;
265  }
266  // init block with first cluster element
267  Size cl_index0 = clusters[i_outer][0];
268  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
269  // add all other elements
270  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
271  {
272  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
273  }
274  }
275 
276  // do it
277  mergeSpectra_(exp, spectra_to_merge, 2);
278 
279  exp.sortSpectra();
280  }
281 
290  static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
291  {
292  if (mz1 == mz2 || tol_ppm <= 0)
293  {
294  return true;
295  }
296 
297  const int min_c = 1;
298  const int max_iso_diff = 5; // maximum charge difference 5 is more than enough
299  const double max_charge_diff_ratio = 3.0; // maximum ratio between charges (large / small charge)
300 
301  for (int c1 = min_c; c1 <= max_c; ++c1)
302  {
303  double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
304 
305  for (int c2 = min_c; c2 <= max_c; ++c2)
306  {
307  if (c1 / c2 > max_charge_diff_ratio)
308  {
309  continue;
310  }
311  if (c2 / c1 > max_charge_diff_ratio)
312  {
313  break;
314  }
315 
316  double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
317 
318  if (fabs(mass1 - mass2) > max_iso_diff)
319  {
320  continue;
321  }
322  for (int i = -max_iso_diff; i <= max_iso_diff; ++i)
323  {
324  if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
325  {
326  return true;
327  }
328  }
329  }
330  }
331  return false;
332  }
333 
341  template <typename MapType>
342  void average(MapType& exp, const String& average_type, int ms_level = -1)
343  {
344  // MS level to be averaged
345  if (ms_level < 0)
346  {
347  ms_level = param_.getValue("average_gaussian:ms_level");
348  if (average_type == "tophat")
349  {
350  ms_level = param_.getValue("average_tophat:ms_level");
351  }
352  }
353 
354  // spectrum type (profile, centroid or automatic)
355  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
356  if (average_type == "tophat")
357  {
358  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
359  }
360 
361  // parameters for Gaussian averaging
362  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
363  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
364  double cutoff(param_.getValue("average_gaussian:cutoff"));
365  double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
366  int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
367 
368  // parameters for Top-Hat averaging
369  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
370  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
371  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
372  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
373  if ((range_scans % 2) == 0)
374  {
375  ++range_scans;
376  }
377  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
378 
379  AverageBlocks spectra_to_average_over;
380 
381  // loop over RT
382  int n(0); // spectrum index
383  int cntr(0); // spectrum counter
384  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
385  {
386  if (Int(it_rt->getMSLevel()) == ms_level)
387  {
388  int m; // spectrum index
389  int steps;
390  bool terminate_now;
391  typename MapType::const_iterator it_rt_2;
392 
393  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
394  steps = 0;
395  m = n + 1;
396  it_rt_2 = it_rt + 1;
397  terminate_now = false;
398  while (it_rt_2 != exp.end() && !terminate_now)
399  {
400  if (Int(it_rt_2->getMSLevel()) == ms_level)
401  {
402  bool add = true;
403  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
404  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
405  it_rt_2->getPrecursors().size() > 0)
406  {
407  double mz1 = it_rt->getPrecursors()[0].getMZ();
408  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
409  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
410  }
411 
412  if (add)
413  {
414  double weight = 1;
415  if (average_type == "gaussian")
416  {
417  //factor * (rt_2 -rt)^2
418  double base = it_rt_2->getRT() - it_rt->getRT();
419  weight = std::exp(factor * base * base);
420  }
421  std::pair<Size, double> p(m, weight);
422  spectra_to_average_over[n].push_back(p);
423  }
424  ++steps;
425  }
426  if (average_type == "gaussian")
427  {
428  // Gaussian
429  double base = it_rt_2->getRT() - it_rt->getRT();
430  terminate_now = std::exp(factor * base * base) < cutoff;
431  }
432  else if (unit)
433  {
434  // Top-Hat with RT unit = scans
435  terminate_now = (steps > range_scans);
436  }
437  else
438  {
439  // Top-Hat with RT unit = seconds
440  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
441  }
442  ++m;
443  ++it_rt_2;
444  }
445 
446  // go backward
447  steps = 0;
448  m = n;
449  it_rt_2 = it_rt;
450  terminate_now = false;
451  while (it_rt_2 != exp.begin() && !terminate_now)
452  {
453  if (Int(it_rt_2->getMSLevel()) == ms_level)
454  {
455  bool add = true;
456  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
457  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
458  it_rt_2->getPrecursors().size() > 0)
459  {
460  double mz1 = it_rt->getPrecursors()[0].getMZ();
461  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
462  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
463  }
464  if (add)
465  {
466  double weight = 1;
467  if (average_type == "gaussian")
468  {
469  double base = it_rt_2->getRT() - it_rt->getRT();
470  weight = std::exp(factor * base * base);
471  }
472  std::pair<Size, double> p(m, weight);
473  spectra_to_average_over[n].push_back(p);
474  }
475  ++steps;
476  }
477  if (average_type == "gaussian")
478  {
479  // Gaussian
480  double base = it_rt_2->getRT() - it_rt->getRT();
481  terminate_now = std::exp(factor * base * base) < cutoff;
482  }
483  else if (unit)
484  {
485  // Top-Hat with RT unit = scans
486  terminate_now = (steps > range_scans);
487  }
488  else
489  {
490  // Top-Hat with RT unit = seconds
491  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
492  }
493  --m;
494  --it_rt_2;
495  }
496  ++cntr;
497  }
498  ++n;
499  }
500 
501  if (cntr == 0)
502  {
503  //return;
504  throw Exception::InvalidParameter(__FILE__,
505  __LINE__,
506  OPENMS_PRETTY_FUNCTION,
507  "Input mzML does not have any spectra of MS level specified by ms_level.");
508  }
509 
510  // normalize weights
511  for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
512  {
513  double sum(0.0);
514  for (const auto& weight: it->second)
515  {
516  sum += weight.second;
517  }
518 
519  for (auto& weight: it->second)
520  {
521  weight.second /= sum;
522  }
523  }
524 
525  // determine type of spectral data (profile or centroided)
527  if (spectrum_type == "automatic")
528  {
529  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
530  type = exp[idx].getType(true);
531  }
532  else if (spectrum_type == "profile")
533  {
535  }
536  else if (spectrum_type == "centroid")
537  {
539  }
540  else
541  {
542  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
543  }
544 
545  // generate new spectra
546  if (type == SpectrumSettings::CENTROID)
547  {
548  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
549  }
550  else
551  {
552  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
553  }
554 
555  exp.sortSpectra();
556  }
557 
558  // @}
559 
560 protected:
561 
572  template <typename MapType>
573  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
574  {
575  double mz_binning_width(param_.getValue("mz_binning_width"));
576  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
577 
578  // merge spectra
579  MapType merged_spectra;
580 
581  std::map<Size, Size> cluster_sizes;
582  std::set<Size> merged_indices;
583 
584  // set up alignment
585  SpectrumAlignment sas;
586  Param p;
587  p.setValue("tolerance", mz_binning_width);
588  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
589  {
590  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
591  }
592 
593  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
594  sas.setParameters(p);
595  std::vector<std::pair<Size, Size> > alignment;
596 
597  Size count_peaks_aligned(0);
598  Size count_peaks_overall(0);
599 
600  // each BLOCK
601  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
602  {
603  ++cluster_sizes[it->second.size() + 1]; // for stats
604 
605  typename MapType::SpectrumType consensus_spec = exp[it->first];
606  consensus_spec.setMSLevel(ms_level);
607 
608  merged_indices.insert(it->first);
609 
610  double rt_average = consensus_spec.getRT();
611  double precursor_mz_average = 0.0;
612  Size precursor_count(0);
613  if (!consensus_spec.getPrecursors().empty())
614  {
615  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
616  ++precursor_count;
617  }
618 
619  count_peaks_overall += consensus_spec.size();
620 
621  String consensus_native_id = consensus_spec.getNativeID();
622 
623  // block elements
624  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
625  {
626  consensus_spec.unify(exp[*sit]); // append meta info
627  merged_indices.insert(*sit);
628 
629  rt_average += exp[*sit].getRT();
630  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
631  {
632  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
633  ++precursor_count;
634  }
635 
636  // add native ID to consensus native ID, comma separated
637  consensus_native_id += ",";
638  consensus_native_id += exp[*sit].getNativeID();
639 
640  // merge data points
641  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
642  count_peaks_aligned += alignment.size();
643  count_peaks_overall += exp[*sit].size();
644 
645  Size align_index(0);
646  Size spec_b_index(0);
647 
648  // sanity check for number of peaks
649  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
650  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
651  {
652  if (alignment.empty() || alignment[align_index].second != spec_b_index)
653  // ... add unaligned peak
654  {
655  consensus_spec.push_back(*pit);
656  }
657  // or add aligned peak height to ALL corresponding existing peaks
658  else
659  {
660  Size counter(0);
661  Size copy_of_align_index(align_index);
662 
663  while (!alignment.empty() &&
664  copy_of_align_index < alignment.size() &&
665  alignment[copy_of_align_index].second == spec_b_index)
666  {
667  ++copy_of_align_index;
668  ++counter;
669  } // Count the number of peaks which correspond to a single b peak.
670 
671  while (!alignment.empty() &&
672  align_index < alignment.size() &&
673  alignment[align_index].second == spec_b_index)
674  {
675  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
676  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
677  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
678  if (align_index == alignment.size())
679  {
680  alignment.clear(); // end reached -> avoid going into this block again
681  }
682  }
683  align_size = align_size + 1 - counter; //Decrease align_size by number of
684  }
685  ++spec_b_index;
686  }
687  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
688  if (spec_a + spec_b - align_size != consensus_spec.size())
689  {
690  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
691  }
692  }
693  rt_average /= it->second.size() + 1;
694  consensus_spec.setRT(rt_average);
695 
696  // set new consensus native ID
697  consensus_spec.setNativeID(consensus_native_id);
698 
699  if (ms_level >= 2)
700  {
701  if (precursor_count)
702  {
703  precursor_mz_average /= precursor_count;
704  }
705  auto& pcs = consensus_spec.getPrecursors();
706  pcs.resize(1);
707  pcs[0].setMZ(precursor_mz_average);
708  consensus_spec.setPrecursors(pcs);
709  }
710 
711  if (consensus_spec.empty())
712  {
713  continue;
714  }
715  else
716  {
717  merged_spectra.addSpectrum(std::move(consensus_spec));
718  }
719  }
720 
721  OPENMS_LOG_INFO << "Cluster sizes:\n";
722  for (const auto& cl_size : cluster_sizes)
723  {
724  OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
725  }
726 
727  char buffer[200];
728  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
729  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
730  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
731 
732  // remove all spectra that were within a cluster
733  typename MapType::SpectrumType empty_spec;
734  MapType exp_tmp;
735  for (Size i = 0; i < exp.size(); ++i)
736  {
737  if (merged_indices.count(i) == 0) // save unclustered ones
738  {
739  exp_tmp.addSpectrum(exp[i]);
740  exp[i] = empty_spec;
741  }
742  }
743 
744  //Meta_Data will not be cleared
745  exp.clear(false);
746  exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
747  std::make_move_iterator(exp_tmp.end()));
748 
749  // ... and add consensus spectra
750  exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
751  std::make_move_iterator(merged_spectra.end()));
752 
753  }
754 
775  template <typename MapType>
776  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
777  {
778  MapType exp_tmp; // temporary experiment for averaged spectra
779 
780  double mz_binning_width(param_.getValue("mz_binning_width"));
781  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
782 
783  unsigned progress = 0;
784  std::stringstream progress_message;
785  progress_message << "averaging profile spectra of MS level " << ms_level;
786  startProgress(0, spectra_to_average_over.size(), progress_message.str());
787 
788  // loop over blocks
789  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
790  {
791  setProgress(++progress);
792 
793  // loop over spectra in blocks
794  std::vector<double> mz_positions_all; // m/z positions from all spectra
795  for (const auto& spec : it->second)
796  {
797  // loop over m/z positions
798  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
799  {
800  mz_positions_all.push_back(it_mz->getMZ());
801  }
802  }
803 
804  sort(mz_positions_all.begin(), mz_positions_all.end());
805 
806  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
807  std::vector<double> intensities;
808  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
809  double delta_mz(mz_binning_width); // for m/z unit Da
810  for (const auto mz_pos : mz_positions_all)
811  {
812  if (mz_binning_unit == "ppm")
813  {
814  delta_mz = mz_binning_width * mz_pos / 1000000;
815  }
816 
817  if ((mz_pos - last_mz) > delta_mz)
818  {
819  mz_positions.push_back(mz_pos);
820  intensities.push_back(0.0);
821  last_mz = mz_pos;
822  }
823  }
824 
825  // loop over spectra in blocks
826  for (const auto& spec : it->second)
827  {
828  SplineInterpolatedPeaks spline(exp[spec.first]);
830 
831  // loop over m/z positions
832  for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
833  {
834  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
835  {
836  intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
837  }
838  }
839  }
840 
841  // update spectrum
842  typename MapType::SpectrumType average_spec = exp[it->first];
843  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
844 
845  // refill spectrum
846  for (Size i = 0; i < mz_positions.size(); ++i)
847  {
848  typename MapType::PeakType peak;
849  peak.setMZ(mz_positions[i]);
850  peak.setIntensity(intensities[i]);
851  average_spec.push_back(peak);
852  }
853 
854  // store spectrum temporarily
855  exp_tmp.addSpectrum(std::move(average_spec));
856  }
857 
858  endProgress();
859 
860  // loop over blocks
861  int n(0);
862  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
863  {
864  exp[it->first] = exp_tmp[n];
865  ++n;
866  }
867  }
868 
884  template <typename MapType>
885  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
886  {
887  MapType exp_tmp; // temporary experiment for averaged spectra
888 
889  double mz_binning_width(param_.getValue("mz_binning_width"));
890  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
891 
892  unsigned progress = 0;
893  ProgressLogger logger;
894  std::stringstream progress_message;
895  progress_message << "averaging centroid spectra of MS level " << ms_level;
896  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
897 
898  // loop over blocks
899  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
900  {
901  logger.setProgress(++progress);
902 
903  // collect peaks from all spectra
904  // loop over spectra in blocks
905  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
906  for (const auto& weightedMZ: it->second)
907  {
908  // loop over m/z positions
909  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
910  {
911  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
912  mz_intensity_all.push_back(mz_intensity);
913  }
914  }
915 
916  sort(mz_intensity_all.begin(), mz_intensity_all.end());
917 
918  // generate new spectrum
919  std::vector<double> mz_new;
920  std::vector<double> intensity_new;
921  double last_mz = std::numeric_limits<double>::min();
922  double delta_mz = mz_binning_width;
923  double sum_mz(0);
924  double sum_intensity(0);
925  Size count(0);
926  for (const auto& mz_pos : mz_intensity_all)
927  {
928  if (mz_binning_unit == "ppm")
929  {
930  delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
931  }
932 
933  if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
934  {
935  mz_new.push_back(sum_mz / count);
936  intensity_new.push_back(sum_intensity); // intensities already weighted
937 
938  sum_mz = 0;
939  sum_intensity = 0;
940 
941  last_mz = mz_pos.first;
942  count = 0;
943  }
944 
945  sum_mz += mz_pos.first;
946  sum_intensity += mz_pos.second;
947  ++count;
948  }
949  if (count > 0)
950  {
951  mz_new.push_back(sum_mz / count);
952  intensity_new.push_back(sum_intensity); // intensities already weighted
953  }
954 
955  // update spectrum
956  typename MapType::SpectrumType average_spec = exp[it->first];
957  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
958 
959  // refill spectrum
960  for (Size i = 0; i < mz_new.size(); ++i)
961  {
962  typename MapType::PeakType peak;
963  peak.setMZ(mz_new[i]);
964  peak.setIntensity(intensity_new[i]);
965  average_spec.push_back(peak);
966  }
967 
968  // store spectrum temporarily
969  exp_tmp.addSpectrum(std::move(average_spec));
970  }
971 
972  logger.endProgress();
973 
974  // loop over blocks
975  int n(0);
976  for (const auto& spectral_index : spectra_to_average_over)
977  {
978  exp[spectral_index.first] = std::move(exp_tmp[n]);
979  ++n;
980  }
981  }
982  };
983 }
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:449
A basic LC-MS feature.
Definition: BaseFeature.h:33
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:26
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:37
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:84
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:42
Illegal self operation exception.
Definition: Exception.h:345
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Not all required information provided.
Definition: Exception.h:155
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin() noexcept
Definition: MSExperiment.h:156
Size size() const noexcept
The number of spectra.
Definition: MSExperiment.h:121
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:171
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition: MSExperiment.h:91
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:79
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:44
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:84
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:93
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:172
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:178
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:190
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:184
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:31
Definition: SpectraMerger.h:49
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:65
SpectraDistance_()
Definition: SpectraMerger.h:51
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:72
double mz_max_
Definition: SpectraMerger.h:91
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:59
double rt_max_
Definition: SpectraMerger.h:90
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:36
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:342
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:290
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:885
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:101
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:190
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:776
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:573
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:98
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:133
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:43
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:62
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:45
@ PROFILE
profile data
Definition: SpectrumSettings.h:48
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:47
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:84
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:34
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:29
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:81
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:100
const double PROTON_MASS_U
Definition: Constants.h:90
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19