89 double lm = intercept + rt_coef * diff_rt * diff_rt +
91 return 1.0 / (1.0 + exp(-lm));
110 return (rt - min_rt) / (max_rt - min_rt) * 100;
152 double feature_rt,
DoubleList& feature_intensities,
153 const std::set<std::string>& transition_ids = std::set<std::string>());
171 n_decoys_ = n_decoys;
172 n_transitions_ = n_transitions;
173 rt_trafo_ = rt_trafo;
188 glm_.intercept = intercept;
189 glm_.rt_coef = rt_coef;
190 glm_.int_coef = int_coef;
226 "There need to be at least 2 assays in the library for ConfidenceScoring.");
229 if (n_assays - 1 < n_decoys_)
232 <<
") is higher than the number of unrelated assays in the "
233 <<
"library (" << n_assays - 1 <<
"). "
234 <<
"Using all unrelated assays as decoys." << std::endl;
236 if (n_assays - 1 <= n_decoys_) n_decoys_ = 0;
238 decoy_index_.resize(n_assays);
239 for (
Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
245 const std::string& ref = library_.
getTransitions()[i].getPeptideRef();
246 transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
250 rt_norm_.min_rt = std::numeric_limits<double>::infinity();
251 rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
252 for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
256 double current_rt = getAssayRT_(*it);
257 if (current_rt == -1.0)
continue;
258 rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
259 rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
264 startProgress(0, features.
size(),
"scoring features");
267 feat_it != features.
end(); ++feat_it)
270 <<
" (ID '" << feat_it->getUniqueId() <<
"')"<< std::endl;
271 scoreFeature_(*feat_it);
272 setProgress(feat_it - features.
begin());
#define OPENMS_LOG_DEBUG
Macro for debug information - includes file and line info.
Definition LogStream.h:591
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:583
Confidence scoring for SRM/MRM/PRM features against a targeted assay library.
Definition ConfidenceScoring.h:56
void scoreMap(FeatureMap &features)
Score every feature in features in place, writing per-feature scores and an updated overall-quality v...
Definition ConfidenceScoring.h:219
void chooseDecoys_()
Permute decoy_index_ in place to pick a fresh random decoy sample for the next feature.
TargetedExperiment library_
Targeted-assay library: one peptide per assay, each with its transitions.
Definition ConfidenceScoring.h:114
Math::RandomShuffler shuffler_
Random shuffler used to draw decoy samples (seed depends on test mode — see ctor)
Definition ConfidenceScoring.h:126
IntList decoy_index_
Indexes into library_.getPeptides() used as decoys for the current feature.
Definition ConfidenceScoring.h:116
Size n_decoys_
Number of decoy assays to sample per feature (0 = use all unrelated assays as decoys)
Definition ConfidenceScoring.h:118
double getAssayRT_(const TargetedExperiment::Peptide &assay)
Read the (single) retention time from an assay's TargetedExperiment::Peptide; the assay is required t...
~ConfidenceScoring() override
Destructor.
Definition ConfidenceScoring.h:70
TransformationDescription rt_trafo_
Optional RT transformation applied to measured feature RTs before comparison with library RTs.
Definition ConfidenceScoring.h:124
double scoreAssay_(const TargetedExperiment::Peptide &assay, double feature_rt, DoubleList &feature_intensities, const std::set< std::string > &transition_ids=std::set< std::string >())
Score one feature against one candidate assay.
double manhattanDist_(DoubleList x, DoubleList y)
Manhattan (L1) distance between two equal-length vectors.
ConfidenceScoring(bool test_mode_=false)
Construct an empty scorer.
void scoreFeature_(Feature &feature)
Score one feature against its matching assay plus a random decoy sample; writes the per-assay scores ...
void initializeGlm(double intercept, double rt_coef, double int_coef)
Install the GLM coefficients fitted externally on a training set.
Definition ConfidenceScoring.h:186
Size n_transitions_
Number of top-intensity transitions to keep when computing the intensity-distance term (0 = keep all)
Definition ConfidenceScoring.h:122
std::map< std::string, IntList > transition_map_
Lookup assay-id -> indexes into library_.getTransitions()
Definition ConfidenceScoring.h:120
void initialize(const TargetedExperiment &library, const Size n_decoys, const Size n_transitions, const TransformationDescription &rt_trafo)
Install the configuration needed before scoreMap() can run.
Definition ConfidenceScoring.h:168
A method or algorithm argument contains illegal values.
Definition Exception.h:630
size_t size() const noexcept
Definition ExposedVector.h:128
iterator begin() noexcept
Definition ExposedVector.h:104
iterator end() noexcept
Definition ExposedVector.h:108
A container for features.
Definition FeatureMap.h:78
iterator Iterator
Definition FeatureMap.h:85
An LC-MS feature.
Definition Feature.h:46
Definition MathFunctions.h:478
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Represents a peptide (amino acid sequence)
Definition TargetedExperimentHelper.h:335
A description of a targeted experiment containing precursor and production ions.
Definition TargetedExperiment.h:40
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
const std::vector< Peptide > & getPeptides() const
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition ListUtils.h:29
std::vector< double > DoubleList
Vector of double precision real types.
Definition ListUtils.h:36
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Binomial GLM used to map (squared-normalised-RT-diff, intensity-distance) -> [0, 1] confidence.
Definition ConfidenceScoring.h:81
double rt_coef
GLM coefficient on the squared RT difference (units: 1/RT²)
Definition ConfidenceScoring.h:83
double int_coef
GLM coefficient on the Manhattan intensity distance.
Definition ConfidenceScoring.h:84
double operator()(double diff_rt, double dist_int) const
Evaluate the GLM at (diff_rt, dist_int); returns a probability in [0, 1].
Definition ConfidenceScoring.h:87
double intercept
GLM intercept term.
Definition ConfidenceScoring.h:82
Map RT values into the [0, 100] interval using min/max RT of the assay library.
Definition ConfidenceScoring.h:103
double min_rt
Smallest assay RT in the library; set by scoreMap()
Definition ConfidenceScoring.h:104
double max_rt
Largest assay RT in the library; set by scoreMap()
Definition ConfidenceScoring.h:105
double operator()(double rt) const
Map rt into [0, 100] using the cached min/max library RTs.
Definition ConfidenceScoring.h:108