82 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
83 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
89 rt_max_ = (double) param_.getValue(
"rt_tolerance");
90 mz_max_ = (double) param_.getValue(
"mz_tolerance");
96 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
103 double d_rt = fabs(first.
getRT() - second.
getRT());
104 double d_mz = fabs(first.
getMZ() - second.
getMZ());
106 if (d_rt > rt_max_ || d_mz > mz_max_)
112 double sim = getSimilarity(d_rt, d_mz);
152 template <
typename MapType>
155 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
156 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
157 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
159 if (rt_max_length == 0)
161 rt_max_length = (std::numeric_limits<double>::max)();
164 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
168 SignedSize block_size_count(rt_block_size + 1);
169 Size idx_spectrum(0);
172 if (
Int(it1->getMSLevel()) == *it_mslevel)
175 if (++block_size_count >= rt_block_size ||
176 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
178 block_size_count = 0;
179 idx_block = idx_spectrum;
183 spectra_to_merge[idx_block].push_back(idx_spectrum);
190 if (block_size_count == 0)
192 spectra_to_merge[idx_block] = std::vector<Size>();
196 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
203 template <
typename MapType>
209 std::vector<BinaryTreeNode> tree;
213 std::vector<BaseFeature> data;
215 for (
Size i = 0; i < exp.
size(); ++i)
217 if (exp[i].getMSLevel() != 2)
223 index_mapping[data.size()] = i;
227 bf.
setRT(exp[i].getRT());
228 std::vector<Precursor> pcs = exp[i].getPrecursors();
235 OPENMS_LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
237 bf.
setMZ(pcs[0].getMZ());
240 data_size = data.size();
255 std::vector<std::vector<Size> > clusters;
258 for (
Size ii = 0; ii < tree.size(); ++ii)
260 if (tree[ii].distance >= 1)
262 tree[ii].distance = -1;
264 if (tree[ii].distance != -1)
269 ca.
cut(data_size - node_count, tree, clusters);
277 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
279 if (clusters[i_outer].size() <= 1)
284 Size cl_index0 = clusters[i_outer][0];
285 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
287 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
289 Size cl_index = clusters[i_outer][i_inner];
290 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
295 mergeSpectra_(exp, spectra_to_merge, 2);
306 template <
typename MapType>
310 int ms_level = param_.getValue(
"average_gaussian:ms_level");
311 if (average_type ==
"tophat")
313 ms_level = param_.getValue(
"average_tophat:ms_level");
317 std::string spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
318 if (average_type ==
"tophat")
320 spectrum_type = std::string(param_.getValue(
"average_tophat:spectrum_type"));
324 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
325 double factor = -4 * log(2.0) / (fwhm * fwhm);
326 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
329 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
330 double range(param_.getValue(
"average_tophat:rt_range"));
331 double range_seconds = range / 2;
332 int range_scans =
static_cast<int>(range);
333 if ((range_scans % 2) == 0)
337 range_scans = (range_scans - 1) / 2;
345 if (
Int(it_rt->getMSLevel()) == ms_level)
356 terminate_now =
false;
357 while (it_rt_2 != exp.
end() && !terminate_now)
359 if (
Int(it_rt_2->getMSLevel()) == ms_level)
362 if (average_type ==
"gaussian")
364 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
366 std::pair<Size, double> p(m, weight);
367 spectra_to_average_over[n].push_back(p);
370 if (average_type ==
"gaussian")
373 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
378 terminate_now = (steps > range_scans);
383 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
393 terminate_now =
false;
394 while (it_rt_2 != exp.
begin() && !terminate_now)
396 if (
Int(it_rt_2->getMSLevel()) == ms_level)
399 if (average_type ==
"gaussian")
401 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
403 std::pair<Size, double> p(m, weight);
404 spectra_to_average_over[n].push_back(p);
407 if (average_type ==
"gaussian")
410 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
415 terminate_now = (steps > range_scans);
420 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
434 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
439 for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
441 (*it2).second /=
sum;
447 if (spectrum_type ==
"automatic")
449 Size idx = spectra_to_average_over.begin()->first;
450 type = exp[idx].getType(
true);
452 else if (spectrum_type ==
"profile")
456 else if (spectrum_type ==
"centroid")
462 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,
"Spectrum type has to be one of automatic, profile or centroid.");
468 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
472 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
492 template <
typename MapType>
495 double mz_binning_width(param_.getValue(
"mz_binning_width"));
496 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
502 std::set<Size> merged_indices;
507 p.
setValue(
"tolerance", mz_binning_width);
508 if (!(mz_binning_unit ==
"Da" || mz_binning_unit ==
"ppm"))
513 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
515 std::vector<std::pair<Size, Size> > alignment;
517 Size count_peaks_aligned(0);
518 Size count_peaks_overall(0);
521 for (
auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
523 ++cluster_sizes[it->second.size() + 1];
529 merged_indices.insert(it->first);
532 double rt_average = consensus_spec.
getRT();
533 double precursor_mz_average = 0.0;
534 Size precursor_count(0);
537 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
541 count_peaks_overall += consensus_spec.size();
544 for (
auto sit = it->second.begin(); sit != it->second.end(); ++sit)
546 consensus_spec.
unify(exp[*sit]);
547 merged_indices.insert(*sit);
549 rt_average += exp[*sit].getRT();
550 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
552 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
559 count_peaks_aligned += alignment.size();
560 count_peaks_overall += exp[*sit].
size();
563 Size spec_b_index(0);
566 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
567 for (
auto pit = exp[*sit].begin(); pit != exp[*sit].
end(); ++pit)
569 if (alignment.empty() || alignment[align_index].second != spec_b_index)
572 consensus_spec.push_back(*pit);
578 Size copy_of_align_index(align_index);
580 while (!alignment.empty() &&
581 copy_of_align_index < alignment.size() &&
582 alignment[copy_of_align_index].second == spec_b_index)
584 ++copy_of_align_index;
588 while (!alignment.empty() &&
589 align_index < alignment.size() &&
590 alignment[align_index].second == spec_b_index)
592 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
593 (pit->getIntensity() / (
double)counter));
595 if (align_index == alignment.size())
600 align_size = align_size + 1 - counter;
605 if (spec_a + spec_b - align_size != consensus_spec.size())
607 OPENMS_LOG_WARN <<
"wrong number of features after merge. Expected: " << spec_a + spec_b - align_size <<
" got: " << consensus_spec.size() <<
"\n";
610 rt_average /= it->second.size() + 1;
611 consensus_spec.
setRT(rt_average);
617 precursor_mz_average /= precursor_count;
622 pcs[0].setMZ(precursor_mz_average);
626 if (consensus_spec.empty())
639 OPENMS_LOG_INFO <<
" size " << it->first <<
": " << it->second <<
"x\n";
643 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
644 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
650 for (
Size i = 0; i < exp.
size(); ++i)
652 if (merged_indices.count(i) == 0)
691 template <
typename MapType>
696 double mz_binning_width(param_.getValue(
"mz_binning_width"));
697 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
699 unsigned progress = 0;
700 std::stringstream progress_message;
701 progress_message <<
"averaging profile spectra of MS level " << ms_level;
702 startProgress(0, spectra_to_average_over.size(), progress_message.str());
707 setProgress(++progress);
710 std::vector<double> mz_positions_all;
711 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
716 mz_positions_all.push_back(it_mz->getMZ());
720 sort(mz_positions_all.begin(), mz_positions_all.end());
722 std::vector<double> mz_positions;
723 std::vector<double> intensities;
724 double last_mz = std::numeric_limits<double>::min();
725 double delta_mz(mz_binning_width);
726 for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
728 if (mz_binning_unit ==
"ppm")
730 delta_mz = mz_binning_width * (*it_mz) / 1000000;
733 if (((*it_mz) - last_mz) > delta_mz)
735 mz_positions.push_back(*it_mz);
736 intensities.push_back(0.0);
742 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
748 for (
Size i = 0; i < mz_positions.size(); ++i)
752 intensities[i] += nav.
eval(mz_positions[i]) * (it2->second);
759 average_spec.
clear(
false);
763 for (
Size i = 0; i < mz_positions.size(); ++i)
766 peak.
setMZ(mz_positions[i]);
768 average_spec.push_back(peak);
782 exp[it->first] = exp_tmp[n];
804 template <
typename MapType>
809 double mz_binning_width(param_.getValue(
"mz_binning_width"));
810 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
812 unsigned progress = 0;
814 std::stringstream progress_message;
815 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
816 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
825 std::vector<std::pair<double, double> > mz_intensity_all;
826 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
831 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second));
832 mz_intensity_all.push_back(mz_intensity);
839 std::vector<double> mz_new;
840 std::vector<double> intensity_new;
841 double last_mz = std::numeric_limits<double>::min();
842 double delta_mz = mz_binning_width;
844 double sum_intensity(0);
846 for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
848 if (mz_binning_unit ==
"ppm")
850 delta_mz = mz_binning_width * (it_mz->first) / 1000000;
853 if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
855 mz_new.push_back(sum_mz / count);
856 intensity_new.push_back(sum_intensity);
861 last_mz = it_mz->first;
865 sum_mz += it_mz->first;
866 sum_intensity += it_mz->second;
871 mz_new.push_back(sum_mz / count);
872 intensity_new.push_back(sum_intensity);
877 average_spec.
clear(
false);
881 for (
Size i = 0; i < mz_new.size(); ++i)
884 peak.
setMZ(mz_new[i]);
886 average_spec.push_back(peak);
900 exp[it->first] = exp_tmp[n];
909 bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
911 return i.first < j.first;
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
A basic LC-MS feature.
Definition: BaseFeature.h:59
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
void setParameters(const Param ¶m)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
Illegal self operation exception.
Definition: Exception.h:372
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:73
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin()
Definition: MSExperiment.h:150
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:160
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Size size() const
Definition: MSExperiment.h:120
Base::const_iterator const_iterator
Definition: MSExperiment.h:118
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:106
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
Base::const_iterator ConstIterator
Definition: Map.h:81
Base::iterator Iterator
Definition: Map.h:80
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:104
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:113
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
Definition: SpectraMerger.h:77
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:93
SpectraDistance_()
Definition: SpectraMerger.h:79
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:100
double mz_max_
Definition: SpectraMerger.h:119
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:87
double rt_max_
Definition: SpectraMerger.h:118
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:909
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:805
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:126
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:204
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:129
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:692
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:493
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:153
void average(MapType &exp, const String &average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:307
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:69
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:110
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:60
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47