82 defaults_.setValue(
"rt_tolerance", 10.0,
"Maximal RT distance (in [s]) for two spectra's precursors.");
83 defaults_.setValue(
"mz_tolerance", 1.0,
"Maximal m/z distance (in Da) for two spectra's precursors.");
89 rt_max_ = (
double) param_.getValue(
"rt_tolerance");
90 mz_max_ = (
double) param_.getValue(
"mz_tolerance");
96 return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
103 double d_rt = fabs(first.
getRT() - second.
getRT());
104 double d_mz = fabs(first.
getMZ() - second.
getMZ());
106 if (d_rt > rt_max_ || d_mz > mz_max_)
112 double sim = getSimilarity(d_rt, d_mz);
152 template <
typename MapType>
155 IntList ms_levels = param_.getValue(
"block_method:ms_levels");
156 Int rt_block_size(param_.getValue(
"block_method:rt_block_size"));
157 double rt_max_length = (param_.getValue(
"block_method:rt_max_length"));
159 if (rt_max_length == 0)
161 rt_max_length = (std::numeric_limits<double>::max)();
164 for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
168 SignedSize block_size_count(rt_block_size + 1);
169 Size idx_spectrum(0);
172 if (
Int(it1->getMSLevel()) == *it_mslevel)
175 if (++block_size_count >= rt_block_size ||
176 exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
178 block_size_count = 0;
179 idx_block = idx_spectrum;
183 spectra_to_merge[idx_block].push_back(idx_spectrum);
190 if (block_size_count == 0)
192 spectra_to_merge[idx_block] = std::vector<Size>();
196 mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
203 template <
typename MapType>
209 std::vector<BinaryTreeNode> tree;
213 std::vector<BaseFeature> data;
215 for (
Size i = 0; i < exp.
size(); ++i)
217 if (exp[i].getMSLevel() != 2)
223 index_mapping[data.size()] = i;
227 bf.
setRT(exp[i].getRT());
228 std::vector<Precursor> pcs = exp[i].getPrecursors();
235 OPENMS_LOG_WARN <<
"More than one precursor found. Using first one!" << std::endl;
237 bf.
setMZ(pcs[0].getMZ());
240 data_size = data.size();
255 std::vector<std::vector<Size> > clusters;
258 for (
Size ii = 0; ii < tree.size(); ++ii)
260 if (tree[ii].distance >= 1)
262 tree[ii].distance = -1;
264 if (tree[ii].distance != -1)
269 ca.
cut(data_size - node_count, tree, clusters);
277 for (
Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
279 if (clusters[i_outer].size() <= 1)
284 Size cl_index0 = clusters[i_outer][0];
285 spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
287 for (
Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
289 Size cl_index = clusters[i_outer][i_inner];
290 spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
295 mergeSpectra_(exp, spectra_to_merge, 2);
306 template <
typename MapType>
310 int ms_level = param_.getValue(
"average_gaussian:ms_level");
311 if (average_type ==
"tophat")
313 ms_level = param_.getValue(
"average_tophat:ms_level");
317 String spectrum_type = param_.getValue(
"average_gaussian:spectrum_type");
318 if (average_type ==
"tophat")
320 spectrum_type = param_.getValue(
"average_tophat:spectrum_type");
324 double fwhm(param_.getValue(
"average_gaussian:rt_FWHM"));
325 double factor = -4 * log(2.0) / (fwhm * fwhm);
326 double cutoff(param_.getValue(
"average_gaussian:cutoff"));
329 bool unit(param_.getValue(
"average_tophat:rt_unit") ==
"scans");
330 double range(param_.getValue(
"average_tophat:rt_range"));
331 double range_seconds = range / 2;
332 int range_scans = static_cast<int>(range);
333 if ((range_scans % 2) == 0)
337 range_scans = (range_scans - 1) / 2;
345 if (
Int(it_rt->getMSLevel()) == ms_level)
356 terminate_now =
false;
357 while (it_rt_2 != exp.
end() && !terminate_now)
359 if (
Int(it_rt_2->getMSLevel()) == ms_level)
362 if (average_type ==
"gaussian")
364 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
366 std::pair<Size, double> p(m, weight);
367 spectra_to_average_over[n].push_back(p);
370 if (average_type ==
"gaussian")
373 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
378 terminate_now = (steps > range_scans);
383 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
393 terminate_now =
false;
394 while (it_rt_2 != exp.
begin() && !terminate_now)
396 if (
Int(it_rt_2->getMSLevel()) == ms_level)
399 if (average_type ==
"gaussian")
401 weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
403 std::pair<Size, double> p(m, weight);
404 spectra_to_average_over[n].push_back(p);
407 if (average_type ==
"gaussian")
410 terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
415 terminate_now = (steps > range_scans);
420 terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
434 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
439 for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
441 (*it2).second /=
sum;
447 if (spectrum_type ==
"automatic")
449 Size idx = spectra_to_average_over.begin()->first;
450 type = exp[idx].getType(
true);
452 else if (spectrum_type ==
"profile")
456 else if (spectrum_type ==
"centroid")
462 throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION,
"Spectrum type has to be one of automatic, profile or centroid.");
468 averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
472 averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
492 template <
typename MapType>
495 double mz_binning_width(param_.getValue(
"mz_binning_width"));
496 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
502 std::set<Size> merged_indices;
507 p.
setValue(
"tolerance", mz_binning_width);
508 if (!(mz_binning_unit ==
"Da" || mz_binning_unit ==
"ppm"))
513 p.
setValue(
"is_relative_tolerance", mz_binning_unit ==
"Da" ?
"false" :
"true");
515 std::vector<std::pair<Size, Size> > alignment;
517 Size count_peaks_aligned(0);
518 Size count_peaks_overall(0);
521 for (
auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
523 ++cluster_sizes[it->second.size() + 1];
529 merged_indices.insert(it->first);
532 double rt_average = consensus_spec.
getRT();
533 double precursor_mz_average = 0.0;
534 Size precursor_count(0);
537 precursor_mz_average = consensus_spec.
getPrecursors()[0].getMZ();
541 count_peaks_overall += consensus_spec.size();
544 for (
auto sit = it->second.begin(); sit != it->second.end(); ++sit)
546 consensus_spec.
unify(exp[*sit]);
547 merged_indices.insert(*sit);
549 rt_average += exp[*sit].getRT();
550 if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
552 precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
559 count_peaks_aligned += alignment.size();
560 count_peaks_overall += exp[*sit].
size();
563 Size spec_b_index(0);
566 Size spec_a = consensus_spec.size(), spec_b = exp[*sit].
size(), align_size = alignment.size();
567 for (
auto pit = exp[*sit].begin(); pit != exp[*sit].
end(); ++pit)
569 if (alignment.size() == 0 || alignment[align_index].second != spec_b_index)
572 consensus_spec.push_back(*pit);
578 Size copy_of_align_index(align_index);
580 while (alignment.size() > 0 &&
581 copy_of_align_index < alignment.size() &&
582 alignment[copy_of_align_index].second == spec_b_index)
584 ++copy_of_align_index;
588 while (alignment.size() > 0 &&
589 align_index < alignment.size() &&
590 alignment[align_index].second == spec_b_index)
592 consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
593 (pit->getIntensity() / (
double)counter));
595 if (align_index == alignment.size())
600 align_size = align_size + 1 - counter;
605 if (spec_a + spec_b - align_size != consensus_spec.size())
607 OPENMS_LOG_WARN <<
"wrong number of features after merge. Expected: " << spec_a + spec_b - align_size <<
" got: " << consensus_spec.size() <<
"\n";
610 rt_average /= it->second.size() + 1;
611 consensus_spec.
setRT(rt_average);
617 precursor_mz_average /= precursor_count;
622 pcs[0].setMZ(precursor_mz_average);
626 if (consensus_spec.empty())
632 merged_spectra.addSpectrum(consensus_spec);
639 OPENMS_LOG_INFO <<
" size " << it->first <<
": " << it->second <<
"x\n";
643 sprintf(buffer,
"%d/%d (%.2f %%) of blocked spectra", (
int)count_peaks_aligned,
644 (
int)count_peaks_overall,
float(count_peaks_aligned) /
float(count_peaks_overall) * 100.);
650 for (
Size i = 0; i < exp.
size(); ++i)
652 if (merged_indices.count(i) == 0)
667 exp.
getSpectra().insert(exp.
end(), merged_spectra.begin(), merged_spectra.end());
691 template <
typename MapType>
696 double mz_binning_width(param_.getValue(
"mz_binning_width"));
697 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
699 unsigned progress = 0;
700 std::stringstream progress_message;
701 progress_message <<
"averaging profile spectra of MS level " << ms_level;
702 startProgress(0, spectra_to_average_over.size(), progress_message.str());
707 setProgress(++progress);
710 std::vector<double> mz_positions_all;
711 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
716 mz_positions_all.push_back(it_mz->getMZ());
720 sort(mz_positions_all.begin(), mz_positions_all.end());
722 std::vector<double> mz_positions;
723 std::vector<double> intensities;
724 double last_mz = std::numeric_limits<double>::min();
725 double delta_mz(mz_binning_width);
726 for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
728 if (mz_binning_unit ==
"ppm")
730 delta_mz = mz_binning_width * (*it_mz) / 1000000;
733 if (((*it_mz) - last_mz) > delta_mz)
735 mz_positions.push_back(*it_mz);
736 intensities.push_back(0.0);
742 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
748 for (
Size i = 0; i < mz_positions.size(); ++i)
752 intensities[i] += nav.
eval(mz_positions[i]) * (it2->second);
759 average_spec.
clear(
false);
763 for (
Size i = 0; i < mz_positions.size(); ++i)
766 peak.
setMZ(mz_positions[i]);
768 average_spec.push_back(peak);
782 exp[it->first] = exp_tmp[n];
804 template <
typename MapType>
809 double mz_binning_width(param_.getValue(
"mz_binning_width"));
810 String mz_binning_unit(param_.getValue(
"mz_binning_width_unit"));
812 unsigned progress = 0;
814 std::stringstream progress_message;
815 progress_message <<
"averaging centroid spectra of MS level " << ms_level;
816 logger.
startProgress(0, spectra_to_average_over.size(), progress_message.str());
825 std::vector<std::pair<double, double> > mz_intensity_all;
826 for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
831 std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second));
832 mz_intensity_all.push_back(mz_intensity);
839 std::vector<double> mz_new;
840 std::vector<double> intensity_new;
841 double last_mz = std::numeric_limits<double>::min();
842 double delta_mz = mz_binning_width;
844 double sum_intensity(0);
846 for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
848 if (mz_binning_unit ==
"ppm")
850 delta_mz = mz_binning_width * (it_mz->first) / 1000000;
853 if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
855 mz_new.push_back(sum_mz / count);
856 intensity_new.push_back(sum_intensity);
861 last_mz = it_mz->first;
865 sum_mz += it_mz->first;
866 sum_intensity += it_mz->second;
871 mz_new.push_back(sum_mz / count);
872 intensity_new.push_back(sum_intensity);
877 average_spec.
clear(
false);
881 for (
Size i = 0; i < mz_new.size(); ++i)
884 peak.
setMZ(mz_new[i]);
886 average_spec.push_back(peak);
900 exp[it->first] = exp_tmp[n];
909 bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
911 return i.first < j.first;