64 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
65 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
71 rt_max_ = (double) param_.getValue(
"rt_tolerance");
72 mz_max_ = (double) param_.getValue(
"mz_tolerance");
78 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
85 double d_rt = fabs(first.
getRT() - second.
getRT());
86 double d_mz = fabs(first.
getMZ() - second.
getMZ());
88 if (d_rt > rt_max_ || d_mz > mz_max_)
94 double sim = getSimilarity(d_rt, d_mz);
111 typedef std::map<Size, std::vector<std::pair<Size, double> > >
AverageBlocks;
142 template <
typename MapType>
145 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
148 UInt rt_block_size(param_.getValue(
"block_method:rt_block_size"));
149 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
151 if (rt_max_length == 0)
153 rt_max_length = (std::numeric_limits<double>::max)();
156 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
160 UInt block_size_count(rt_block_size + 1);
161 Size idx_spectrum(0);
164 if (
Int(it1->getMSLevel()) == *it_mslevel)
167 if (++block_size_count >= rt_block_size ||
168 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
170 block_size_count = 0;
171 idx_block = idx_spectrum;
175 spectra_to_merge[idx_block].push_back(idx_spectrum);
182 if (block_size_count == 0)
184 spectra_to_merge[idx_block] = std::vector<Size>();
188 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
195 template <
typename MapType>
201 std::vector<BinaryTreeNode> tree;
202 std::map<Size, Size> index_mapping;
205 std::vector<BaseFeature> data;
207 for (
Size i = 0; i < exp.
size(); ++i)
209 if (exp[i].getMSLevel() != 2)
215 index_mapping[data.size()] = i;
219 bf.
setRT(exp[i].getRT());
220 const auto& pcs = exp[i].getPrecursors();
228 OPENMS_LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
230 bf.
setMZ(pcs[0].getMZ());
233 data_size = data.size();
247 std::vector<std::vector<Size> > clusters;
250 for (
Size ii = 0; ii < tree.size(); ++ii)
252 if (tree[ii].distance >= 1)
254 tree[ii].distance = -1;
256 if (tree[ii].distance != -1)
261 ca.
cut(data_size - node_count, tree, clusters);
266 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
268 if (clusters[i_outer].size() <= 1)
273 Size cl_index0 = clusters[i_outer][0];
274 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
276 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
278 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
283 mergeSpectra_(exp, spectra_to_merge, 2);
298 if (mz1 == mz2 || tol_ppm <= 0)
304 const int max_iso_diff = 5;
305 const double max_charge_diff_ratio = 3.0;
307 for (
int c1 = min_c; c1 <= max_c; ++c1)
309 double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
311 for (
int c2 = min_c; c2 <= max_c; ++c2)
313 if (c1 / c2 > max_charge_diff_ratio)
317 if (c2 / c1 > max_charge_diff_ratio)
322 double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
324 if (fabs(mass1 - mass2) > max_iso_diff)
328 for (
int i = -max_iso_diff; i <= max_iso_diff; ++i)
330 if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
347 template <
typename MapType>
353 ms_level = param_.getValue(
"average_gaussian:ms_level");
354 if (average_type ==
"tophat")
356 ms_level = param_.getValue(
"average_tophat:ms_level");
361 std::string spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
362 if (average_type ==
"tophat")
364 spectrum_type = std::string(param_.getValue(
"average_tophat:spectrum_type"));
368 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
369 double factor = -4 * log(2.0) / (fwhm * fwhm);
370 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
371 double precursor_mass_ppm = param_.getValue(
"average_gaussian:precursor_mass_tol");
372 int precursor_max_charge = param_.getValue(
"average_gaussian:precursor_max_charge");
375 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
376 double range(param_.getValue(
"average_tophat:rt_range"));
377 double range_seconds = range / 2;
378 int range_scans =
static_cast<int>(range);
379 if ((range_scans % 2) == 0)
383 range_scans = (range_scans - 1) / 2;
392 if (
Int(it_rt->getMSLevel()) == ms_level)
403 terminate_now =
false;
404 while (it_rt_2 != exp.
end() && !terminate_now)
406 if (
Int(it_rt_2->getMSLevel()) == ms_level)
410 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
411 it_rt_2->getPrecursors().size() > 0)
413 double mz1 = it_rt->getPrecursors()[0].getMZ();
414 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
415 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
421 if (average_type ==
"gaussian")
424 double base = it_rt_2->getRT() - it_rt->getRT();
425 weight = std::exp(factor * base * base);
427 std::pair<Size, double> p(m, weight);
428 spectra_to_average_over[n].push_back(p);
432 if (average_type ==
"gaussian")
435 double base = it_rt_2->getRT() - it_rt->getRT();
436 terminate_now = std::exp(factor * base * base) < cutoff;
441 terminate_now = (steps > range_scans);
446 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
456 terminate_now =
false;
457 while (it_rt_2 != exp.
begin() && !terminate_now)
459 if (
Int(it_rt_2->getMSLevel()) == ms_level)
463 if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
464 it_rt_2->getPrecursors().size() > 0)
466 double mz1 = it_rt->getPrecursors()[0].getMZ();
467 double mz2 = it_rt_2->getPrecursors()[0].getMZ();
468 add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
473 if (average_type ==
"gaussian")
475 double base = it_rt_2->getRT() - it_rt->getRT();
476 weight = std::exp(factor * base * base);
478 std::pair<Size, double> p(m, weight);
479 spectra_to_average_over[n].push_back(p);
483 if (average_type ==
"gaussian")
486 double base = it_rt_2->getRT() - it_rt->getRT();
487 terminate_now = std::exp(factor * base * base) < cutoff;
492 terminate_now = (steps > range_scans);
497 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
512 OPENMS_PRETTY_FUNCTION,
513 "Input mzML does not have any spectra of MS level specified by ms_level.");
517 for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
520 for (
const auto& weight: it->second)
522 sum += weight.second;
525 for (
auto& weight: it->second)
527 weight.second /= sum;
533 if (spectrum_type ==
"automatic")
535 Size idx = spectra_to_average_over.begin()->first;
536 type = exp[idx].getType(
true);
538 else if (spectrum_type ==
"profile")
540 type = SpectrumSettings::PROFILE;
542 else if (spectrum_type ==
"centroid")
544 type = SpectrumSettings::CENTROID;
548 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,
"Spectrum type has to be one of automatic, profile or centroid.");
552 if (type == SpectrumSettings::CENTROID)
554 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
558 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
578 template <
typename MapType>
581 double mz_binning_width(param_.getValue(
"mz_binning_width"));
582 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
587 std::map<Size, Size> cluster_sizes;
588 std::set<Size> merged_indices;
593 p.
setValue(
"tolerance", mz_binning_width);
594 if (!(mz_binning_unit ==
"Da" || mz_binning_unit ==
"ppm"))
599 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
601 std::vector<std::pair<Size, Size> > alignment;
603 Size count_peaks_aligned(0);
604 Size count_peaks_overall(0);
607 for (
auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
609 ++cluster_sizes[it->second.size() + 1];
614 merged_indices.insert(it->first);
616 double rt_average = consensus_spec.
getRT();
617 double precursor_mz_average = 0.0;
618 Size precursor_count(0);
621 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
625 count_peaks_overall += consensus_spec.size();
630 for (
auto sit = it->second.begin(); sit != it->second.end(); ++sit)
632 consensus_spec.
unify(exp[*sit]);
633 merged_indices.insert(*sit);
635 rt_average += exp[*sit].getRT();
636 if (ms_level >= 2 && exp[*sit].getPrecursors().
size() > 0)
638 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
643 consensus_native_id +=
",";
644 consensus_native_id += exp[*sit].getNativeID();
648 count_peaks_aligned += alignment.size();
649 count_peaks_overall += exp[*sit].
size();
652 Size spec_b_index(0);
655 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
656 for (
auto pit = exp[*sit].begin(); pit != exp[*sit].
end(); ++pit)
658 if (alignment.empty() || alignment[align_index].second != spec_b_index)
661 consensus_spec.push_back(*pit);
667 Size copy_of_align_index(align_index);
669 while (!alignment.empty() &&
670 copy_of_align_index < alignment.size() &&
671 alignment[copy_of_align_index].second == spec_b_index)
673 ++copy_of_align_index;
677 while (!alignment.empty() &&
678 align_index < alignment.size() &&
679 alignment[align_index].second == spec_b_index)
681 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
682 (pit->getIntensity() / (
double)counter));
684 if (align_index == alignment.size())
689 align_size = align_size + 1 - counter;
694 if (spec_a + spec_b - align_size != consensus_spec.size())
696 OPENMS_LOG_WARN <<
"wrong number of features after merge. Expected: " << spec_a + spec_b - align_size <<
" got: " << consensus_spec.size() <<
"\n";
699 rt_average /= it->second.size() + 1;
700 consensus_spec.
setRT(rt_average);
709 precursor_mz_average /= precursor_count;
713 pcs[0].setMZ(precursor_mz_average);
717 if (consensus_spec.empty())
723 merged_spectra.
addSpectrum(std::move(consensus_spec));
728 for (
const auto& cl_size : cluster_sizes)
730 OPENMS_LOG_INFO <<
" size " << cl_size.first <<
": " << cl_size.second <<
"x\n";
734 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
735 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
741 for (
Size i = 0; i < exp.
size(); ++i)
743 if (merged_indices.count(i) == 0)
753 std::make_move_iterator(exp_tmp.
end()));
757 std::make_move_iterator(merged_spectra.
end()));
781 template <
typename MapType>
786 double mz_binning_width(param_.getValue(
"mz_binning_width"));
787 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
789 unsigned progress = 0;
790 std::stringstream progress_message;
791 progress_message <<
"averaging profile spectra of MS level " << ms_level;
792 startProgress(0, spectra_to_average_over.size(), progress_message.str());
795 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
797 setProgress(++progress);
800 std::vector<double> mz_positions_all;
801 for (
const auto& spec : it->second)
806 mz_positions_all.push_back(it_mz->getMZ());
810 sort(mz_positions_all.begin(), mz_positions_all.end());
812 std::vector<double> mz_positions;
813 std::vector<double> intensities;
814 double last_mz = std::numeric_limits<double>::min();
815 double delta_mz(mz_binning_width);
816 for (
const auto mz_pos : mz_positions_all)
818 if (mz_binning_unit ==
"ppm")
820 delta_mz = mz_binning_width * mz_pos / 1000000;
823 if ((mz_pos - last_mz) > delta_mz)
825 mz_positions.push_back(mz_pos);
826 intensities.push_back(0.0);
832 for (
const auto& spec : it->second)
838 for (
Size i = spline.
getPosMin(); i < mz_positions.size(); ++i)
842 intensities[i] += nav.
eval(mz_positions[i]) * (spec.second);
849 average_spec.
clear(
false);
852 for (
Size i = 0; i < mz_positions.size(); ++i)
855 peak.
setMZ(mz_positions[i]);
857 average_spec.push_back(peak);
868 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
870 exp[it->first] = exp_tmp[n];
890 template <
typename MapType>
895 double mz_binning_width(param_.getValue(
"mz_binning_width"));
896 std::string mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
898 unsigned progress = 0;
900 std::stringstream progress_message;
901 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
902 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
905 for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
911 std::vector<std::pair<double, double> > mz_intensity_all;
912 for (
const auto& weightedMZ: it->second)
917 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second));
918 mz_intensity_all.push_back(mz_intensity);
922 sort(mz_intensity_all.begin(), mz_intensity_all.end());
925 std::vector<double> mz_new;
926 std::vector<double> intensity_new;
927 double last_mz = std::numeric_limits<double>::min();
928 double delta_mz = mz_binning_width;
930 double sum_intensity(0);
932 for (
const auto& mz_pos : mz_intensity_all)
934 if (mz_binning_unit ==
"ppm")
936 delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
939 if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
941 mz_new.push_back(sum_mz / count);
942 intensity_new.push_back(sum_intensity);
947 last_mz = mz_pos.first;
951 sum_mz += mz_pos.first;
952 sum_intensity += mz_pos.second;
957 mz_new.push_back(sum_mz / count);
958 intensity_new.push_back(sum_intensity);
963 average_spec.
clear(
false);
966 for (
Size i = 0; i < mz_new.size(); ++i)
969 peak.
setMZ(mz_new[i]);
971 average_spec.push_back(peak);
982 for (
const auto& spectral_index : spectra_to_average_over)
984 exp[spectral_index.first] = std::move(exp_tmp[n]);
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition LogStream.h:452
A basic LC-MS feature.
Definition BaseFeature.h:34
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition ClusterAnalyzer.h:26
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Hierarchical clustering with generic clustering functions.
Definition ClusterHierarchical.h:37
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition ClusterHierarchical.h:84
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
void setParameters(const Param ¶m)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition DistanceMatrix.h:42
Illegal self operation exception.
Definition Exception.h:346
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin() noexcept
Size size() const noexcept
The number of spectra.
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition MSExperiment.h:98
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition MSExperiment.h:86
void clear(bool clear_meta_data)
Clears all data and meta data.
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition Param.h:46
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition Peak1D.h:86
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition Peak1D.h:95
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition Peak2D.h:173
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition Peak2D.h:179
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition Peak2D.h:191
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition Peak2D.h:185
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
SingleLinkage ClusterMethod.
Definition SingleLinkage.h:31
Definition SpectraMerger.h:59
double getSimilarity(const double d_rt, const double d_mz) const
Definition SpectraMerger.h:75
SpectraDistance_()
Definition SpectraMerger.h:61
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition SpectraMerger.h:82
double mz_max_
Definition SpectraMerger.h:101
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition SpectraMerger.h:69
double rt_max_
Definition SpectraMerger.h:100
Offers spectra merging and averaging algorithms to increase the quality of a spectrum.
Definition SpectraMerger.h:46
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition SpectraMerger.h:348
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition SpectraMerger.h:296
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition SpectraMerger.h:891
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition SpectraMerger.h:111
~SpectraMerger() override
destructor
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition SpectraMerger.h:196
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition SpectraMerger.h:782
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition SpectraMerger.h:579
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition SpectraMerger.h:108
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition SpectraMerger.h:143
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition SpectrumAlignment.h:43
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition SpectrumAlignment.h:62
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
SpectrumType
Spectrum peak type.
Definition SpectrumSettings.h:50
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
iterator class for access of spline packages
Definition SplineInterpolatedPeaks.h:84
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition SplineInterpolatedPeaks.h:34
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition ListUtils.h:29
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19