OpenMS
Loading...
Searching...
No Matches
PeakTypeEstimator.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
12
13#include <cmath>
14#include <numeric>
15
16namespace OpenMS
17{
23 class OPENMS_DLLAPI PeakTypeEstimator
24 {
25public:
42 template <typename PeakConstIterator>
43 static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator& begin, const PeakConstIterator& end)
44 {
45 typedef typename PeakConstIterator::value_type PeakT;
46 // abort if there are less than 5 peak in the iterator range
47 if (end - begin < 5)
48 {
49 return SpectrumSettings::UNKNOWN;
50 }
51
52 const int max_peaks = 5; // maximal number of peaks we are looking at
53 int profile_evidence = 0; // number of peaks found to be profile
54 int centroid_evidence = 0; // number of peaks found to be centroided
55
56 // copy data, since we need to modify
57 std::vector<PeakT> data(begin, end);
58 // total intensity of spectrum
59 double total_int = std::accumulate(begin, end, 0.0, [](double int_, const PeakT& p) { return int_ + p.getIntensity(); } );
60 double explained_int = 0;
61 // get the 5 highest peaks
62 for (int i = 0; i < max_peaks; ++i)
63 {
64 // stop if we explained +50% of all intensity
65 // (due to danger of interpreting noise - usually wrongly classified as centroided data)
66 if (explained_int > 0.5 * total_int) break;
67
68 double int_max = 0;
69 Size idx = std::numeric_limits<Size>::max();
70 // find highest peak position
71 for (Size i = 0; i < data.size(); ++i)
72 {
73 if (data[i].getIntensity() > int_max)
74 {
75 int_max = data[i].getIntensity();
76 idx = i;
77 }
78 }
79 // no more peaks
80 if (idx == std::numeric_limits<Size>::max()) break;
81
82 // check left and right peak shoulders and count number of sample points
83 typedef typename std::vector<PeakT>::iterator PeakIterator; // non-const version, since we need to modify the peaks
84 PeakIterator it_max = data.begin() + idx;
85 PeakIterator it = it_max;
86 double int_last = int_max;
87 while (it != data.begin()
88 && it->getIntensity() <= int_last // at most 100% of last sample point
89 && it->getIntensity() > 0
90 && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
91 && it->getMZ() + 1 > it_max->getMZ()) // at most 1 Th away
92 {
93 int_last = it->getIntensity();
94 explained_int += int_last;
95 it->setIntensity(0); // remove peak from future consideration
96 --it;
97 }
98 // if the current point is rising again, restore the intensity of the
99 // previous 'sink' point (because it could belong to a neighbour peak)
100 // e.g. imagine intensities: 1-2-3-2-1-2-4-2-1. We do not want to destroy the middle '1'
101 if (it->getIntensity() > int_last) (it+1)->setIntensity(int_last);
102
103 //std::cerr << " Peak candidate: " << it_max->getMZ() << " ...";
104 bool break_left = false;
105 if (it_max - it < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
106 { // fewer than two sampling points on left shoulder
107 //std::cerr << " break left " << it_max - it << " points\n";
108 break_left = true;
109 // do not end loop here.. we still need to clean up the right side
110 }
111 it_max->setIntensity(int_max); // restore center intensity
112 explained_int -= int_max;
113 it = it_max;
114 int_last = int_max;
115 while (it != data.end()
116 && it->getIntensity() <= int_last // at most 100% of last sample point
117 && it->getIntensity() > 0
118 && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
119 && it->getMZ() - 1 < it_max->getMZ()) // at most 1 Th away
120 {
121 int_last = it->getIntensity();
122 explained_int += int_last;
123 it->setIntensity(0); // remove peak from future consideration
124 ++it;
125 }
126 // if the current point is rising again, restore the intensity of the
127 // previous 'sink' point (because it could belong to a neighbour peak)
128 // e.g. imagine intensities: 1-2-4-2-1-2-3-2-1. We do not want to destroy the middle '1'
129 // (note: the sequence is not identical to the one of the left shoulder)
130 if (it != data.end() && it->getIntensity() > int_last) (it-1)->setIntensity(int_last);
131
132 if (break_left || it - it_max < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
133 { // fewer than two sampling points on right shoulder
134 //std::cerr << " break right " << it - it_max << " points\n";
135 ++centroid_evidence;
136 continue;
137 }
138 // peak has at least two sampling points on either side within 1 Th
139 ++profile_evidence;
140 //std::cerr << " PROFILE " << it - it_max << " points right\n";
141
142 }
143
144 float evidence_ratio = profile_evidence / float(profile_evidence + centroid_evidence);
145 //std::cerr << "--> Evidence ratio: " << evidence_ratio;
146
147 if (evidence_ratio > 0.75) // 80% are profile
148 {
149 //std::cerr << " PROFILE\n";
150 return SpectrumSettings::PROFILE;
151 }
152 else
153 {
154 //std::cerr << " CENTROID\n";
155 return SpectrumSettings::CENTROID;
156 }
157 }
244 };
245
246} // namespace OpenMS
247
Estimates if the data of a spectrum is raw data or peak data.
Definition PeakTypeEstimator.h:24
static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end)
Estimates the peak type of the peaks in the iterator range based on intensity characteristics of up t...
Definition PeakTypeEstimator.h:43
SpectrumType
Spectrum peak type.
Definition SpectrumSettings.h:50
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19