OpenMS
Loading...
Searching...
No Matches
MessagePasserFactory.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Julianus Pfeuffer $
6// $Authors: Julianus Pfeuffer $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <cmath>
12#include <Evergreen/evergreen.hpp>
13
14typedef unsigned long int uiint;
15
16namespace OpenMS
17{
18 namespace Internal
19{
24 template <typename Label>
26 private:
27 //const int minInputsPAF = 3; //@todo could be used to decide when brute force is better.
28
31
34 std::map<int, double> chgLLhoods = {{1, 0.7}, {2, 0.9}, {3, 0.7}, {4, 0.5}, {5, 0.5}};
35
39 inline double notConditionalGivenSum(unsigned long summ) {
40 // use log for better precision
41 return std::pow(2., log2(1. - beta_) + summ * log2(1. - alpha_));
42 //return std::pow((1.0 - alpha_), summ) * (1.0 - beta_); // standard way
43 }
44
45 public:
47 evergreen::TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
49 evergreen::TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
50
54 evergreen::TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
55
61 evergreen::TableDependency<Label> createRegularizingSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id);
62
67 evergreen::TableDependency<Label> createSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id);
68
69 //For extended model. @todo currently unused
70 evergreen::TableDependency<Label> createSumFactor(size_t nr_parents, Label nId);
71 evergreen::TableDependency<Label> createReplicateFactor(Label seqId, Label repId);
72 evergreen::TableDependency<Label> createChargeFactor(Label repId, Label chargeId, int chg);
73
75 evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
77 evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
79 evergreen::PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency <Label> > & deps);
80
89 MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior);
90
91
92
94 // TODO we could recollect the protIDs from the union of parents.
95 void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
96 const std::vector<std::vector<Label>> & parentsOfPeps,
97 const std::vector<double> & pepEvidences,
98 evergreen::InferenceGraphBuilder<Label> & igb);
99
100 //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
101 // const std::vector<std::vector<Label>> & parentsOfPeps,
102 // const std::vector<double> & pepEvidences,
103 // evergreen::InferenceGraphBuilder<Label> & igb);
104 };
105
106 //IMPLEMENTATIONS:
107
108 template <typename Label>
109 MessagePasserFactory<Label>::MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior) {
110 assert(0. <= alpha && alpha <= 1.);
111 assert(0. <= beta && beta <= 1.);
112 assert(0. <= gamma && gamma <= 1.);
113 //Note: smaller than 1 might be possible but is untested right now.
114 assert(p >= 1.);
115 assert(0. < pep_prior && pep_prior < 1.);
116 alpha_ = alpha;
117 beta_ = beta;
118 gamma_ = gamma;
119 p_ = p;
120 pepPrior_ = pep_prior;
121 }
122
123 template <typename Label>
124 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, int nrMissingPeps) {
125 double prior = gamma_;
126 if (nrMissingPeps > 0)
127 {
128 double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
129 prior = -prior/(prior * powFactor - prior - powFactor);
130 }
131 double table[] = {1.0 - prior, prior};
132 evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
133 return evergreen::TableDependency<Label>(lpmf,p_);
134 }
135
136 template <typename Label>
137 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, double prior, int nrMissingPeps) {
138 if (nrMissingPeps > 0)
139 {
140 double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
141 prior = -prior/(prior * powFactor - prior - powFactor);
142 }
143 double table[] = {1.0 - prior, prior};
144 evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
145 return evergreen::TableDependency<Label>(lpmf,p_);
146 }
147
148 template <typename Label>
149 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createPeptideEvidenceFactor(Label id, double prob) {
150 double table[] = {(1 - prob) * (1 - pepPrior_), prob * pepPrior_};
151 evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
152 return evergreen::TableDependency<Label>(lpmf,p_);
153 }
154
155
156 template <typename Label>
157 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
158 evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
159 for (unsigned long i=0; i <= nrParents; ++i) {
160 double notConditional = notConditionalGivenSum(i);
161 unsigned long indexArr[2] = {i,0ul};
162 table[indexArr] = notConditional;
163 unsigned long indexArr2[2] = {i,1ul};
164 table[indexArr2] = 1.0 - notConditional;
165 }
166 //std::cout << table << std::endl;
167 evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
168 //std::cout << lpmf << std::endl;
169 return evergreen::TableDependency<Label>(lpmf,p_);
170 }
171
172 template <typename Label>
173 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
174 evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
175 unsigned long z[2]{0ul,0ul};
176 unsigned long z1[2]{0ul,1ul};
177 table[z] = 1. - beta_;
178 table[z1] = beta_;
179 for (unsigned long i=1; i <= nrParents; ++i) {
180 double notConditional = notConditionalGivenSum(i);
181 unsigned long indexArr[2] = {i,0ul};
182 table[indexArr] = notConditional / i;
183 unsigned long indexArr2[2] = {i,1ul};
184 table[indexArr2] = (1.0 - notConditional) / i;
185 }
186 //std::cout << table << std::endl;
187 evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
188 //std::cout << lpmf << std::endl;
189 return evergreen::TableDependency<Label>(lpmf,p_);
190 }
191
192 template <typename Label>
193 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumFactor(size_t nrParents, Label nId) {
194 evergreen::Tensor<double> table({nrParents+1});
195 for (unsigned long i=0; i <= nrParents; ++i) {
196 table[i] = 1.0/(nrParents+1.);
197 }
198 //std::cout << table << std::endl;
199 evergreen::LabeledPMF<Label> lpmf({nId}, evergreen::PMF({0L}, table));
200 //std::cout << lpmf << std::endl;
201 return evergreen::TableDependency<Label>(lpmf,p_);
202 }
203
204 template <typename Label>
205 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createReplicateFactor(Label seqId, Label repId) {
206 using arr = unsigned long[2];
207 evergreen::Tensor<double> table({2,2});
208 table[arr{0,0}] = 0.999;
209 table[arr{0,1}] = 0.001;
210 table[arr{1,0}] = 0.1;
211 table[arr{1,1}] = 0.9;
212 //std::cout << table << std::endl;
213 evergreen::LabeledPMF<Label> lpmf({seqId,repId}, evergreen::PMF({0L,0L}, table));
214 //std::cout << lpmf << std::endl;
215 return evergreen::TableDependency<Label>(lpmf,p_);
216 }
217
218 template <typename Label>
219 evergreen::TableDependency<Label> MessagePasserFactory<Label>::createChargeFactor(Label repId, Label chgId, int chg) {
220 double chgPrior = chgLLhoods[chg];
221 using arr = unsigned long[2];
222 evergreen::Tensor<double> table({2,2});
223 table[arr{0,0}] = 0.999;
224 table[arr{0,1}] = 0.001;
225 table[arr{1,0}] = 0.1;
226 table[arr{1,1}] = chgPrior;
227 //std::cout << table << std::endl;
228 evergreen::LabeledPMF<Label> lpmf({repId,chgId}, evergreen::PMF({0L,0L}, table));
229 //std::cout << lpmf << std::endl;
230 return evergreen::TableDependency<Label>(lpmf,p_);
231 }
232
233 template <typename Label>
234 evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId) {
235 std::vector<std::vector<Label>> parents;
236 std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
237 return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
238 }
239
240 template <typename Label>
241 evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId) {
242 std::vector<std::vector<Label>> parents;
243 std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
244 return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
245 }
246
247 template <typename Label>
248 evergreen::PseudoAdditiveDependency<Label> MessagePasserFactory<Label>::createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency<Label>> & deps) {
249 std::vector<std::vector<Label>> parents;
250 std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
251 return evergreen::PseudoAdditiveDependency<Label>(parents, {nId}, deps, p_);
252 }
253
255 // TODO we could recollect the protIDs from the union of parents.
256 template <typename Label>
257 void MessagePasserFactory<Label>::fillVectorsOfMessagePassers(const std::vector<Label>& protIDs,
258 const std::vector<std::vector<Label>>& parentsOfPeps,
259 const std::vector<double>& pepEvidences,
260 evergreen::InferenceGraphBuilder<Label>& igb)
261 {
262 //TODO asserts could be loosened
263 assert(parentsOfPeps.size() == pepEvidences.size());
264 for (const std::vector<uiint>& parents : parentsOfPeps)
265 for (Label parent : parents)
266 assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
267
268 for (uiint pid : protIDs)
269 igb.insert_dependency(createProteinFactor(pid));
270
271 for (uiint j = 0; j < parentsOfPeps.size(); j++)
272 {
273 igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
274 igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
275 igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
276 }
277 }
278
279 /* unused but working
280 template <typename Label>
281 void MessagePasserFactory<Label>::fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
282 const std::vector<std::vector<Label>> & parentsOfPeps,
283 const std::vector<double> & pepEvidences,
284 evergreen::InferenceGraphBuilder<Label> & igb)
285 {
286 assert(parentsOfPeps.size() == pepEvidences.size());
287 for (std::vector<uiint> parents : parentsOfPeps)
288 for (uiint parent : parents)
289 assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
290
291 for (uiint pid : protIDs)
292 igb.insert_dependency(createProteinFactor(pid));
293
294 for (uiint j = 0; j < parentsOfPeps.size(); j++)
295 {
296 std::vector<evergreen::TableDependency<std::string> > deps;
297 auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
298 auto sumdep = createSumFactor(parentsOfPeps[j],j);
299 igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
300 igb.insert_dependency(pepdep);
301 deps.push_back(sumdep);
302 for (const auto& parent : parentsOfPeps[j]) {
303 deps.push_back(createProteinFactor(parent));
304 }
305
306 //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
307 igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
308 }
309 }
310 */
311
312 } // namespace Internal
313} // namespace OpenMS
unsigned long int uiint
Definition MessagePasserFactory.h:14
Definition MessagePasserFactory.h:25
evergreen::TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
Protein Factor initialized with model prior (missing peps are experimental)
Definition MessagePasserFactory.h:124
double notConditionalGivenSum(unsigned long summ)
Definition MessagePasserFactory.h:39
evergreen::AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
To sum up distributions for the number of parent proteins of a peptide with convolution trees.
Definition MessagePasserFactory.h:234
double alpha_
the model parameters
Definition MessagePasserFactory.h:30
evergreen::TableDependency< Label > createSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id)
Definition MessagePasserFactory.h:157
MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior)
Constructor.
Definition MessagePasserFactory.h:109
double beta_
Definition MessagePasserFactory.h:30
evergreen::TableDependency< Label > createRegularizingSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id)
Definition MessagePasserFactory.h:173
evergreen::TableDependency< Label > createChargeFactor(Label repId, Label chargeId, int chg)
Definition MessagePasserFactory.h:219
double gamma_
Definition MessagePasserFactory.h:30
double pepPrior_
Definition MessagePasserFactory.h:30
evergreen::TableDependency< Label > createReplicateFactor(Label seqId, Label repId)
Definition MessagePasserFactory.h:205
evergreen::TableDependency< Label > createSumFactor(size_t nr_parents, Label nId)
Definition MessagePasserFactory.h:193
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label > > &parentsOfPeps, const std::vector< double > &pepEvidences, evergreen::InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition MessagePasserFactory.h:257
evergreen::TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition MessagePasserFactory.h:149
double p_
Definition MessagePasserFactory.h:30
std::map< int, double > chgLLhoods
Definition MessagePasserFactory.h:34
evergreen::PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< evergreen::TableDependency< Label > > &deps)
To sum up distributions for the number of parent proteins of a peptide brute-force.
Definition MessagePasserFactory.h:248
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19