OpenMS
MessagePasserFactory.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <cmath>
12 #include <Evergreen/evergreen.hpp>
13 
14 typedef unsigned long int uiint;
15 
16 namespace OpenMS
17 {
18  namespace Internal
19 {
24  template <typename Label>
26  private:
27  //const int minInputsPAF = 3; //@todo could be used to decide when brute force is better.
28 
31 
34  std::map<int, double> chgLLhoods = {{1, 0.7}, {2, 0.9}, {3, 0.7}, {4, 0.5}, {5, 0.5}};
35 
39  inline double notConditionalGivenSum(unsigned long summ) {
40  // use log for better precision
41  return std::pow(2., log2(1. - beta_) + summ * log2(1. - alpha_));
42  //return std::pow((1.0 - alpha_), summ) * (1.0 - beta_); // standard way
43  }
44 
45  public:
47  evergreen::TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
49  evergreen::TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
50 
54  evergreen::TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
55 
61  evergreen::TableDependency<Label> createRegularizingSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id);
62 
67  evergreen::TableDependency<Label> createSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id);
68 
69  //For extended model. @todo currently unused
70  evergreen::TableDependency<Label> createSumFactor(size_t nr_parents, Label nId);
71  evergreen::TableDependency<Label> createReplicateFactor(Label seqId, Label repId);
72  evergreen::TableDependency<Label> createChargeFactor(Label repId, Label chargeId, int chg);
73 
75  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
77  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
79  evergreen::PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency <Label> > & deps);
80 
89  MessagePasserFactory<Label>(double alpha, double beta, double gamma, double p, double pep_prior);
90 
91 
92 
94  // TODO we could recollect the protIDs from the union of parents.
95  void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
96  const std::vector<std::vector<Label>> & parentsOfPeps,
97  const std::vector<double> & pepEvidences,
98  evergreen::InferenceGraphBuilder<Label> & igb);
99 
100  //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
101  // const std::vector<std::vector<Label>> & parentsOfPeps,
102  // const std::vector<double> & pepEvidences,
103  // evergreen::InferenceGraphBuilder<Label> & igb);
104  };
105 
106  //IMPLEMENTATIONS:
107 
108  template <typename Label>
109  MessagePasserFactory<Label>::MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior) {
110  assert(0. <= alpha && alpha <= 1.);
111  assert(0. <= beta && beta <= 1.);
112  assert(0. <= gamma && gamma <= 1.);
113  //Note: smaller than 1 might be possible but is untested right now.
114  assert(p >= 1.);
115  assert(0. < pep_prior && pep_prior < 1.);
116  alpha_ = alpha;
117  beta_ = beta;
118  gamma_ = gamma;
119  p_ = p;
120  pepPrior_ = pep_prior;
121  }
122 
123  template <typename Label>
124  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, int nrMissingPeps) {
125  double prior = gamma_;
126  if (nrMissingPeps > 0)
127  {
128  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
129  prior = -prior/(prior * powFactor - prior - powFactor);
130  }
131  double table[] = {1.0 - prior, prior};
132  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
133  return evergreen::TableDependency<Label>(lpmf,p_);
134  }
135 
136  template <typename Label>
137  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, double prior, int nrMissingPeps) {
138  if (nrMissingPeps > 0)
139  {
140  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
141  prior = -prior/(prior * powFactor - prior - powFactor);
142  }
143  double table[] = {1.0 - prior, prior};
144  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
145  return evergreen::TableDependency<Label>(lpmf,p_);
146  }
147 
148  template <typename Label>
149  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createPeptideEvidenceFactor(Label id, double prob) {
150  double table[] = {(1 - prob) * (1 - pepPrior_), prob * pepPrior_};
151  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
152  return evergreen::TableDependency<Label>(lpmf,p_);
153  }
154 
155 
156  template <typename Label>
157  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
158  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
159  for (unsigned long i=0; i <= nrParents; ++i) {
160  double notConditional = notConditionalGivenSum(i);
161  unsigned long indexArr[2] = {i,0ul};
162  table[indexArr] = notConditional;
163  unsigned long indexArr2[2] = {i,1ul};
164  table[indexArr2] = 1.0 - notConditional;
165  }
166  //std::cout << table << std::endl;
167  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
168  //std::cout << lpmf << std::endl;
169  return evergreen::TableDependency<Label>(lpmf,p_);
170  }
171 
172  template <typename Label>
173  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
174  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
175  unsigned long z[2]{0ul,0ul};
176  unsigned long z1[2]{0ul,1ul};
177  table[z] = 1. - beta_;
178  table[z1] = beta_;
179  for (unsigned long i=1; i <= nrParents; ++i) {
180  double notConditional = notConditionalGivenSum(i);
181  unsigned long indexArr[2] = {i,0ul};
182  table[indexArr] = notConditional / i;
183  unsigned long indexArr2[2] = {i,1ul};
184  table[indexArr2] = (1.0 - notConditional) / i;
185  }
186  //std::cout << table << std::endl;
187  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
188  //std::cout << lpmf << std::endl;
189  return evergreen::TableDependency<Label>(lpmf,p_);
190  }
191 
192  template <typename Label>
193  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumFactor(size_t nrParents, Label nId) {
194  evergreen::Tensor<double> table({nrParents+1});
195  for (unsigned long i=0; i <= nrParents; ++i) {
196  table[i] = 1.0/(nrParents+1.);
197  }
198  //std::cout << table << std::endl;
199  evergreen::LabeledPMF<Label> lpmf({nId}, evergreen::PMF({0L}, table));
200  //std::cout << lpmf << std::endl;
201  return evergreen::TableDependency<Label>(lpmf,p_);
202  }
203 
204  template <typename Label>
205  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createReplicateFactor(Label seqId, Label repId) {
206  using arr = unsigned long[2];
207  evergreen::Tensor<double> table({2,2});
208  table[arr{0,0}] = 0.999;
209  table[arr{0,1}] = 0.001;
210  table[arr{1,0}] = 0.1;
211  table[arr{1,1}] = 0.9;
212  //std::cout << table << std::endl;
213  evergreen::LabeledPMF<Label> lpmf({seqId,repId}, evergreen::PMF({0L,0L}, table));
214  //std::cout << lpmf << std::endl;
215  return evergreen::TableDependency<Label>(lpmf,p_);
216  }
217 
218  template <typename Label>
219  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createChargeFactor(Label repId, Label chgId, int chg) {
220  double chgPrior = chgLLhoods[chg];
221  using arr = unsigned long[2];
222  evergreen::Tensor<double> table({2,2});
223  table[arr{0,0}] = 0.999;
224  table[arr{0,1}] = 0.001;
225  table[arr{1,0}] = 0.1;
226  table[arr{1,1}] = chgPrior;
227  //std::cout << table << std::endl;
228  evergreen::LabeledPMF<Label> lpmf({repId,chgId}, evergreen::PMF({0L,0L}, table));
229  //std::cout << lpmf << std::endl;
230  return evergreen::TableDependency<Label>(lpmf,p_);
231  }
232 
233  template <typename Label>
234  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId) {
235  std::vector<std::vector<Label>> parents;
236  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
237  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
238  }
239 
240  template <typename Label>
241  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId) {
242  std::vector<std::vector<Label>> parents;
243  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
244  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
245  }
246 
247  template <typename Label>
248  evergreen::PseudoAdditiveDependency<Label> MessagePasserFactory<Label>::createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency<Label>> & deps) {
249  std::vector<std::vector<Label>> parents;
250  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
251  return evergreen::PseudoAdditiveDependency<Label>(parents, {nId}, deps, p_);
252  }
253 
255  // TODO we could recollect the protIDs from the union of parents.
256  template <typename Label>
257  void MessagePasserFactory<Label>::fillVectorsOfMessagePassers(const std::vector<Label>& protIDs,
258  const std::vector<std::vector<Label>>& parentsOfPeps,
259  const std::vector<double>& pepEvidences,
260  evergreen::InferenceGraphBuilder<Label>& igb)
261  {
262  //TODO asserts could be loosened
263  assert(parentsOfPeps.size() == pepEvidences.size());
264  for (const std::vector<uiint>& parents : parentsOfPeps)
265  for (Label parent : parents)
266  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
267 
268  for (uiint pid : protIDs)
269  igb.insert_dependency(createProteinFactor(pid));
270 
271  for (uiint j = 0; j < parentsOfPeps.size(); j++)
272  {
273  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
274  igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
275  igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
276  }
277  }
278 
279  /* unused but working
280  template <typename Label>
281  void MessagePasserFactory<Label>::fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
282  const std::vector<std::vector<Label>> & parentsOfPeps,
283  const std::vector<double> & pepEvidences,
284  evergreen::InferenceGraphBuilder<Label> & igb)
285  {
286  assert(parentsOfPeps.size() == pepEvidences.size());
287  for (std::vector<uiint> parents : parentsOfPeps)
288  for (uiint parent : parents)
289  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
290 
291  for (uiint pid : protIDs)
292  igb.insert_dependency(createProteinFactor(pid));
293 
294  for (uiint j = 0; j < parentsOfPeps.size(); j++)
295  {
296  std::vector<evergreen::TableDependency<std::string> > deps;
297  auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
298  auto sumdep = createSumFactor(parentsOfPeps[j],j);
299  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
300  igb.insert_dependency(pepdep);
301  deps.push_back(sumdep);
302  for (const auto& parent : parentsOfPeps[j]) {
303  deps.push_back(createProteinFactor(parent));
304  }
305 
306  //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
307  igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
308  }
309  }
310  */
311 
312  } // namespace Internal
313 } // namespace OpenMS
unsigned long int uiint
Definition: MessagePasserFactory.h:14
Definition: MessagePasserFactory.h:25
evergreen::TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
Protein Factor initialized with model prior (missing peps are experimental)
Definition: MessagePasserFactory.h:124
double notConditionalGivenSum(unsigned long summ)
Definition: MessagePasserFactory.h:39
evergreen::AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
To sum up distributions for the number of parent proteins of a peptide with convolution trees.
Definition: MessagePasserFactory.h:234
double alpha_
the model parameters
Definition: MessagePasserFactory.h:30
evergreen::TableDependency< Label > createSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id)
Definition: MessagePasserFactory.h:157
MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior)
Constructor.
Definition: MessagePasserFactory.h:109
double beta_
Definition: MessagePasserFactory.h:30
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label >> &parentsOfPeps, const std::vector< double > &pepEvidences, evergreen::InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition: MessagePasserFactory.h:257
evergreen::TableDependency< Label > createRegularizingSumEvidenceFactor(size_t nr_parents, Label id, Label pep_id)
Definition: MessagePasserFactory.h:173
evergreen::TableDependency< Label > createChargeFactor(Label repId, Label chargeId, int chg)
Definition: MessagePasserFactory.h:219
double gamma_
Definition: MessagePasserFactory.h:30
double pepPrior_
Definition: MessagePasserFactory.h:30
evergreen::TableDependency< Label > createReplicateFactor(Label seqId, Label repId)
Definition: MessagePasserFactory.h:205
evergreen::TableDependency< Label > createSumFactor(size_t nr_parents, Label nId)
Definition: MessagePasserFactory.h:193
evergreen::TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition: MessagePasserFactory.h:149
double p_
Definition: MessagePasserFactory.h:30
std::map< int, double > chgLLhoods
Definition: MessagePasserFactory.h:34
evergreen::PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< evergreen::TableDependency< Label > > &deps)
To sum up distributions for the number of parent proteins of a peptide brute-force.
Definition: MessagePasserFactory.h:248
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19