35 #ifndef OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H 36 #define OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H 84 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
85 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
91 rt_max_ = (
double) param_.getValue(
"rt_tolerance");
92 mz_max_ = (
double) param_.getValue(
"mz_tolerance");
100 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
107 double d_rt = fabs(first.
getRT() - second.
getRT());
108 double d_mz = fabs(first.
getMZ() - second.
getMZ());
110 if (d_rt > rt_max_ || d_mz > mz_max_) {
return 0; }
113 double sim = getSimilarity(d_rt, d_mz);
153 template <
typename MapType>
156 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
157 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
158 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
160 if (rt_max_length == 0)
162 rt_max_length = (std::numeric_limits<double>::max)();
165 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
167 MergeBlocks spectra_to_merge;
169 SignedSize block_size_count(rt_block_size + 1);
170 Size idx_spectrum(0);
173 if (
Int(it1->getMSLevel()) == *it_mslevel)
176 if (++block_size_count >= rt_block_size ||
177 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
179 block_size_count = 0;
180 idx_block = idx_spectrum;
184 spectra_to_merge[idx_block].push_back(idx_spectrum);
191 if (block_size_count == 0)
193 spectra_to_merge[idx_block] = std::vector<Size>();
197 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
206 template <
typename MapType>
212 std::vector<BinaryTreeNode> tree;
216 std::vector<BaseFeature> data;
218 for (
Size i = 0; i < exp.
size(); ++i)
220 if (exp[i].getMSLevel() != 2)
continue;
223 index_mapping[data.size()] = i;
227 bf.
setRT(exp[i].getRT());
228 std::vector<Precursor> pcs = exp[i].getPrecursors();
230 if (pcs.size() > 1)
LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
231 bf.
setMZ(pcs[0].getMZ());
234 data_size = data.size();
249 std::vector<std::vector<Size> > clusters;
252 for (
Size ii = 0; ii < tree.size(); ++ii)
254 if (tree[ii].distance >= 1) tree[ii].distance = -1;
255 if (tree[ii].distance != -1) ++node_count;
257 ca.cut(data_size - node_count, tree, clusters);
263 MergeBlocks spectra_to_merge;
265 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
267 if (clusters[i_outer].size() <= 1)
continue;
269 Size cl_index0 = clusters[i_outer][0];
270 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
272 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
274 Size cl_index = clusters[i_outer][i_inner];
275 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
280 mergeSpectra_(exp, spectra_to_merge, 2);
293 template <
typename MapType>
297 int ms_level = param_.getValue(
"average_gaussian:ms_level");
298 if (average_type ==
"tophat")
300 ms_level = param_.getValue(
"average_tophat:ms_level");
304 String spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
305 if (average_type ==
"tophat")
307 spectrum_type = param_.getValue(
"average_tophat:spectrum_type");
311 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
312 double factor = -4 * log(2.0) / (fwhm * fwhm);
313 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
316 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
317 double range(param_.getValue(
"average_tophat:rt_range"));
318 double range_seconds = range / 2;
319 int range_scans = range;
320 if ((range_scans % 2) == 0)
324 range_scans = (range_scans - 1) / 2;
326 AverageBlocks spectra_to_average_over;
332 if (
Int(it_rt->getMSLevel()) == ms_level)
343 terminate_now =
false;
344 while (it_rt_2 != exp.
end() && !terminate_now)
346 if (
Int(it_rt_2->getMSLevel()) == ms_level)
349 if (average_type ==
"gaussian")
351 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
353 std::pair<Size, double> p(m, weight);
354 spectra_to_average_over[n].push_back(p);
357 if (average_type ==
"gaussian")
360 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
365 terminate_now = (steps > range_scans);
370 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
380 terminate_now =
false;
381 while (it_rt_2 != exp.
begin() && !terminate_now)
383 if (
Int(it_rt_2->getMSLevel()) == ms_level)
386 if (average_type ==
"gaussian")
388 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
390 std::pair<Size, double> p(m, weight);
391 spectra_to_average_over[n].push_back(p);
394 if (average_type ==
"gaussian")
397 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
402 terminate_now = (steps > range_scans);
407 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
421 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
426 for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
428 (*it2).second /=
sum;
434 if (spectrum_type ==
"automatic")
436 Size idx = spectra_to_average_over.begin()->first;
437 type = exp[idx].getType();
443 else if (spectrum_type ==
"profile")
447 else if (spectrum_type ==
"centroid")
455 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
459 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
481 template <
typename MapType>
484 double mz_binning_width(param_.getValue(
"mz_binning_width"));
485 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
491 std::set<Size> merged_indices;
496 p.
setValue(
"tolerance", mz_binning_width);
499 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
501 std::vector<std::pair<Size, Size> > alignment;
503 Size count_peaks_aligned(0);
504 Size count_peaks_overall(0);
507 for (
Map<
Size, std::vector<Size> >::ConstIterator it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
510 ++cluster_sizes[it->second.size() + 1];
516 merged_indices.insert(it->first);
519 double rt_average = consensus_spec.
getRT();
520 double precursor_mz_average = 0.0;
521 Size precursor_count(0);
524 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
528 count_peaks_overall += consensus_spec.size();
531 for (std::vector<Size>::const_iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
533 consensus_spec.
unify(exp[*sit]);
534 merged_indices.insert(*sit);
536 rt_average += exp[*sit].getRT();
537 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
539 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
546 count_peaks_aligned += alignment.size();
547 count_peaks_overall += exp[*sit].
size();
550 Size spec_b_index(0);
553 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
557 if (alignment.size() > 0 && alignment[align_index].second == spec_b_index)
559 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
560 pit->getIntensity());
562 if (align_index == alignment.size()) alignment.clear();
566 consensus_spec.push_back(*pit);
571 if (spec_a + spec_b - align_size != consensus_spec.size()) std::cerr <<
"\n\n ERRROR \n\n";
573 rt_average /= it->second.size() + 1;
574 consensus_spec.
setRT(rt_average);
578 if (precursor_count) precursor_mz_average /= precursor_count;
582 pcs[0].setMZ(precursor_mz_average);
586 if (consensus_spec.empty())
continue;
587 else merged_spectra.addSpectrum(consensus_spec);
593 LOG_INFO <<
" size " << it->first <<
": " << it->second <<
"x\n";
597 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
598 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
604 for (
Size i = 0; i < exp.
size(); ++i)
606 if (merged_indices.count(i) == 0)
621 exp.
getSpectra().insert(exp.
end(), merged_spectra.begin(), merged_spectra.end());
645 template <
typename MapType>
650 double mz_binning_width(param_.getValue(
"mz_binning_width"));
651 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
653 unsigned progress = 0;
654 std::stringstream progress_message;
655 progress_message <<
"averaging profile spectra of MS level " << ms_level;
656 startProgress(0, spectra_to_average_over.size(), progress_message.str());
661 setProgress(++progress);
664 std::vector<double> mz_positions_all;
665 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
670 mz_positions_all.push_back(it_mz->getMZ());
674 sort(mz_positions_all.begin(), mz_positions_all.end());
676 std::vector<double> mz_positions;
677 std::vector<double> intensities;
678 double last_mz = std::numeric_limits<double>::min();
679 double delta_mz(mz_binning_width);
680 for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
682 if (mz_binning_unit ==
"ppm")
684 delta_mz = mz_binning_width * (*it_mz) / 1000000;
687 if (((*it_mz) - last_mz) > delta_mz)
689 mz_positions.push_back(*it_mz);
690 intensities.push_back(0.0);
696 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
702 for (
Size i = 0; i < mz_positions.size(); ++i)
704 if ((spline.
getMzMin() < mz_positions[i]) && (mz_positions[i] < spline.
getMzMax()))
706 intensities[i] += nav.
eval(mz_positions[i]) * (it2->second);
713 average_spec.
clear(
false);
717 for (
Size i = 0; i < mz_positions.size(); ++i)
720 peak.
setMZ(mz_positions[i]);
722 average_spec.push_back(peak);
736 exp[it->first] = exp_tmp[n];
758 template <
typename MapType>
763 double mz_binning_width(param_.getValue(
"mz_binning_width"));
764 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
766 unsigned progress = 0;
768 std::stringstream progress_message;
769 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
770 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
779 std::vector<std::pair<double, double> > mz_intensity_all;
780 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
785 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second));
786 mz_intensity_all.push_back(mz_intensity);
793 std::vector<double> mz_new;
794 std::vector<double> intensity_new;
795 double last_mz = std::numeric_limits<double>::min();
796 double delta_mz = mz_binning_width;
798 double sum_intensity(0);
800 for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
802 if (mz_binning_unit ==
"ppm")
804 delta_mz = mz_binning_width * (it_mz->first) / 1000000;
807 if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
809 mz_new.push_back(sum_mz / count);
810 intensity_new.push_back(sum_intensity);
815 last_mz = it_mz->first;
819 sum_mz += it_mz->first;
820 sum_intensity += it_mz->second;
825 mz_new.push_back(sum_mz / count);
826 intensity_new.push_back(sum_intensity);
831 average_spec.
clear(
false);
835 for (
Size i = 0; i < mz_new.size(); ++i)
838 peak.
setMZ(mz_new[i]);
840 average_spec.push_back(peak);
854 exp[it->first] = exp_tmp[n];
863 bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
865 return i.first < j.first;
871 #endif //OPENMS_FILTERING_TRANSFORMERS_SPECTRAMERGER_H
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:130
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
A more convenient string class.
Definition: String.h:57
Base::iterator Iterator
Definition: Map.h:81
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:122
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:203
#define LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:455
void sortByPosition()
Lexicographically sorts the peaks by their position.
Peak data (also called centroided data or stick data)
Definition: SpectrumSettings.h:74
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Definition: MSExperiment.h:831
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void endProgress() const
Ends the progress display.
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:863
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:482
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
double eval(double mz)
returns spline interpolated intensity at m/z (fast access since we can start search from lastPackage)...
unsigned int UInt
Unsigned integer type.
Definition: Types.h:95
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:154
Iterator begin()
Definition: MSExperiment.h:162
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:59
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:135
iterator class for access of spline packages
Definition: SplineSpectrum.h:102
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Definition: MSExperiment.h:649
Base::const_iterator const_iterator
Definition: MSExperiment.h:130
Raw data (also called profile data)
Definition: SpectrumSettings.h:75
SplineSpectrum::Navigator getNavigator()
returns an iterator for access of spline packages
Size size() const
Definition: MSExperiment.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:120
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:111
void setParameters(const Param ¶m)
Sets the parameters.
Unknown spectrum type.
Definition: SpectrumSettings.h:73
SpectraDistance_()
Definition: SpectraMerger.h:81
SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end) const
Estimates the peak type of the peaks in the iterator range based on the variance of inter-peak distan...
Definition: PeakTypeEstimator.h:59
A basic LC-MS feature.
Definition: BaseFeature.h:56
#define LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:451
Iterator end()
Definition: MSExperiment.h:172
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
double getMzMax() const
returns the maximum m/z of the spectrum
Data structure for spline interpolation of MS1 spectra.
Definition: SplineSpectrum.h:57
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:215
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:197
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:87
double rt_max_
Definition: SpectraMerger.h:119
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:104
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:97
void setProgress(SignedSize value) const
Sets the current progress.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:55
void average(MapType &exp, String average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:294
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: SpectraMerger.h:77
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:66
Base::const_iterator ConstIterator
Definition: Map.h:82
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Estimates if the data of a spectrum is raw data or peak data.
Definition: PeakTypeEstimator.h:50
Management and storage of parameters / INI files.
Definition: Param.h:75
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:127
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:759
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:209
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:58
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Illegal self operation exception.
Definition: Exception.h:379
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:118
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
double mz_max_
Definition: SpectraMerger.h:120
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
void clear(bool clear_meta_data)
Clears all data and meta data.
Definition: MSExperiment.h:929
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
double getMzMin() const
returns the minimum m/z of the spectrum
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:646
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:207
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Definition: MSExperiment.h:837
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
int Int
Signed integer type.
Definition: Types.h:103
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:51
void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:89
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:108