OpenMS
MessagePasserFactory.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <cmath>
12 #include <Evergreen/evergreen.hpp>
13 
14 typedef unsigned long int uiint;
15 
16 namespace OpenMS
17 {
18  namespace Internal
19 {
24  template <typename Label>
26  private:
27  //const int minInputsPAF = 3; //@todo could be used to decide when brute force is better.
28 
31 
34  std::map<int, double> chgLLhoods = {{1, 0.7}, {2, 0.9}, {3, 0.7}, {4, 0.5}, {5, 0.5}};
35 
39  inline double notConditionalGivenSum(unsigned long summ) {
40  // use log for better precision
41  return std::pow(2., log2(1. - beta_) + summ * log2(1. - alpha_));
42  //return std::pow((1.0 - alpha_), summ) * (1.0 - beta_); // standard way
43  }
44 
45  public:
47  evergreen::TableDependency<Label> createProteinFactor(Label id, int nrMissingPeps = 0);
49  evergreen::TableDependency<Label> createProteinFactor(Label id, double prior, int nrMissingPeps = 0);
50 
53  evergreen::TableDependency<Label> createPeptideEvidenceFactor(Label id, double prob);
54 
58  evergreen::TableDependency<Label> createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
59 
62  evergreen::TableDependency<Label> createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId);
63 
64  //For extended model. @todo currently unused
65  evergreen::TableDependency<Label> createSumFactor(size_t nrParents, Label nId);
66  evergreen::TableDependency<Label> createReplicateFactor(Label seqId, Label repId);
67  evergreen::TableDependency<Label> createChargeFactor(Label repId, Label chargeId, int chg);
68 
70  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId);
72  evergreen::AdditiveDependency<Label> createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId);
74  evergreen::PseudoAdditiveDependency<Label> createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency <Label> > & deps);
75 
84  MessagePasserFactory<Label>(double alpha, double beta, double gamma, double p, double pep_prior);
85 
86 
87 
89  // TODO we could recollect the protIDs from the union of parents.
90  void fillVectorsOfMessagePassers(const std::vector<Label> & protIDs,
91  const std::vector<std::vector<Label>> & parentsOfPeps,
92  const std::vector<double> & pepEvidences,
93  evergreen::InferenceGraphBuilder<Label> & igb);
94 
95  //void fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
96  // const std::vector<std::vector<Label>> & parentsOfPeps,
97  // const std::vector<double> & pepEvidences,
98  // evergreen::InferenceGraphBuilder<Label> & igb);
99  };
100 
101  //IMPLEMENTATIONS:
102 
103  template <typename Label>
104  MessagePasserFactory<Label>::MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior) {
105  assert(0. <= alpha && alpha <= 1.);
106  assert(0. <= beta && beta <= 1.);
107  assert(0. <= gamma && gamma <= 1.);
108  //Note: smaller than 1 might be possible but is untested right now.
109  assert(p >= 1.);
110  assert(0. < pep_prior && pep_prior < 1.);
111  alpha_ = alpha;
112  beta_ = beta;
113  gamma_ = gamma;
114  p_ = p;
115  pepPrior_ = pep_prior;
116  }
117 
118  template <typename Label>
119  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, int nrMissingPeps) {
120  double prior = gamma_;
121  if (nrMissingPeps > 0)
122  {
123  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
124  prior = -prior/(prior * powFactor - prior - powFactor);
125  }
126  double table[] = {1.0 - prior, prior};
127  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
128  return evergreen::TableDependency<Label>(lpmf,p_);
129  }
130 
131  template <typename Label>
132  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createProteinFactor(Label id, double prior, int nrMissingPeps) {
133  if (nrMissingPeps > 0)
134  {
135  double powFactor = std::pow(1.0 - alpha_, -nrMissingPeps);
136  prior = -prior/(prior * powFactor - prior - powFactor);
137  }
138  double table[] = {1.0 - prior, prior};
139  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
140  return evergreen::TableDependency<Label>(lpmf,p_);
141  }
142 
143  template <typename Label>
144  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createPeptideEvidenceFactor(Label id, double prob) {
145  double table[] = {(1 - prob) * (1 - pepPrior_), prob * pepPrior_};
146  evergreen::LabeledPMF<Label> lpmf({id}, evergreen::PMF({0L}, evergreen::Tensor<double>::from_array(table)));
147  return evergreen::TableDependency<Label>(lpmf,p_);
148  }
149 
150 
151  template <typename Label>
152  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
153  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
154  for (unsigned long i=0; i <= nrParents; ++i) {
155  double notConditional = notConditionalGivenSum(i);
156  unsigned long indexArr[2] = {i,0ul};
157  table[indexArr] = notConditional;
158  unsigned long indexArr2[2] = {i,1ul};
159  table[indexArr2] = 1.0 - notConditional;
160  }
161  //std::cout << table << std::endl;
162  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
163  //std::cout << lpmf << std::endl;
164  return evergreen::TableDependency<Label>(lpmf,p_);
165  }
166 
167  template <typename Label>
168  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId) {
169  evergreen::Tensor<double> table({static_cast<unsigned long>(nrParents + 1) , 2});
170  unsigned long z[2]{0ul,0ul};
171  unsigned long z1[2]{0ul,1ul};
172  table[z] = 1. - beta_;
173  table[z1] = beta_;
174  for (unsigned long i=1; i <= nrParents; ++i) {
175  double notConditional = notConditionalGivenSum(i);
176  unsigned long indexArr[2] = {i,0ul};
177  table[indexArr] = notConditional / i;
178  unsigned long indexArr2[2] = {i,1ul};
179  table[indexArr2] = (1.0 - notConditional) / i;
180  }
181  //std::cout << table << std::endl;
182  evergreen::LabeledPMF<Label> lpmf({nId, pepId}, evergreen::PMF({0L,0L}, table));
183  //std::cout << lpmf << std::endl;
184  return evergreen::TableDependency<Label>(lpmf,p_);
185  }
186 
187  template <typename Label>
188  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createSumFactor(size_t nrParents, Label nId) {
189  evergreen::Tensor<double> table({nrParents+1});
190  for (unsigned long i=0; i <= nrParents; ++i) {
191  table[i] = 1.0/(nrParents+1.);
192  }
193  //std::cout << table << std::endl;
194  evergreen::LabeledPMF<Label> lpmf({nId}, evergreen::PMF({0L}, table));
195  //std::cout << lpmf << std::endl;
196  return evergreen::TableDependency<Label>(lpmf,p_);
197  }
198 
199  template <typename Label>
200  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createReplicateFactor(Label seqId, Label repId) {
201  using arr = unsigned long[2];
202  evergreen::Tensor<double> table({2,2});
203  table[arr{0,0}] = 0.999;
204  table[arr{0,1}] = 0.001;
205  table[arr{1,0}] = 0.1;
206  table[arr{1,1}] = 0.9;
207  //std::cout << table << std::endl;
208  evergreen::LabeledPMF<Label> lpmf({seqId,repId}, evergreen::PMF({0L,0L}, table));
209  //std::cout << lpmf << std::endl;
210  return evergreen::TableDependency<Label>(lpmf,p_);
211  }
212 
213  template <typename Label>
214  evergreen::TableDependency<Label> MessagePasserFactory<Label>::createChargeFactor(Label repId, Label chgId, int chg) {
215  double chgPrior = chgLLhoods[chg];
216  using arr = unsigned long[2];
217  evergreen::Tensor<double> table({2,2});
218  table[arr{0,0}] = 0.999;
219  table[arr{0,1}] = 0.001;
220  table[arr{1,0}] = 0.1;
221  table[arr{1,1}] = chgPrior;
222  //std::cout << table << std::endl;
223  evergreen::LabeledPMF<Label> lpmf({repId,chgId}, evergreen::PMF({0L,0L}, table));
224  //std::cout << lpmf << std::endl;
225  return evergreen::TableDependency<Label>(lpmf,p_);
226  }
227 
228  template <typename Label>
229  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId) {
230  std::vector<std::vector<Label>> parents;
231  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
232  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
233  }
234 
235  template <typename Label>
236  evergreen::AdditiveDependency<Label> MessagePasserFactory<Label>::createPeptideProbabilisticAdderFactor(const std::vector<Label> & parentProteinIDs, Label nId) {
237  std::vector<std::vector<Label>> parents;
238  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
239  return evergreen::AdditiveDependency<Label>(parents, {nId}, p_);
240  }
241 
242  template <typename Label>
243  evergreen::PseudoAdditiveDependency<Label> MessagePasserFactory<Label>::createBFPeptideProbabilisticAdderFactor(const std::set<Label> & parentProteinIDs, Label nId, const std::vector<evergreen::TableDependency<Label>> & deps) {
244  std::vector<std::vector<Label>> parents;
245  std::transform(parentProteinIDs.begin(), parentProteinIDs.end(), std::back_inserter(parents), [](const Label& l){return std::vector<Label>{l};});
246  return evergreen::PseudoAdditiveDependency<Label>(parents, {nId}, deps, p_);
247  }
248 
250  // TODO we could recollect the protIDs from the union of parents.
251  template <typename Label>
252  void MessagePasserFactory<Label>::fillVectorsOfMessagePassers(const std::vector<Label>& protIDs,
253  const std::vector<std::vector<Label>>& parentsOfPeps,
254  const std::vector<double>& pepEvidences,
255  evergreen::InferenceGraphBuilder<Label>& igb)
256  {
257  //TODO asserts could be loosened
258  assert(parentsOfPeps.size() == pepEvidences.size());
259  for (const std::vector<uiint>& parents : parentsOfPeps)
260  for (Label parent : parents)
261  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
262 
263  for (uiint pid : protIDs)
264  igb.insert_dependency(createProteinFactor(pid));
265 
266  for (uiint j = 0; j < parentsOfPeps.size(); j++)
267  {
268  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
269  igb.insert_dependency(createSumEvidenceFactor(parentsOfPeps[j],j,j));
270  igb.insert_dependency(createPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
271  }
272  }
273 
274  /* unused but working
275  template <typename Label>
276  void MessagePasserFactory<Label>::fillVectorsOfMessagePassersBruteForce(const std::vector<Label> & protIDs,
277  const std::vector<std::vector<Label>> & parentsOfPeps,
278  const std::vector<double> & pepEvidences,
279  evergreen::InferenceGraphBuilder<Label> & igb)
280  {
281  assert(parentsOfPeps.size() == pepEvidences.size());
282  for (std::vector<uiint> parents : parentsOfPeps)
283  for (uiint parent : parents)
284  assert(std::find(protIDs.begin(), protIDs.end(), parent) != protIDs.end());
285 
286  for (uiint pid : protIDs)
287  igb.insert_dependency(createProteinFactor(pid));
288 
289  for (uiint j = 0; j < parentsOfPeps.size(); j++)
290  {
291  std::vector<evergreen::TableDependency<std::string> > deps;
292  auto pepdep = createSumEvidenceFactor(parentsOfPeps[j],j,j);
293  auto sumdep = createSumFactor(parentsOfPeps[j],j);
294  igb.insert_dependency(createPeptideEvidenceFactor(j,pepEvidences[j]));
295  igb.insert_dependency(pepdep);
296  deps.push_back(sumdep);
297  for (const auto& parent : parentsOfPeps[j]) {
298  deps.push_back(createProteinFactor(parent));
299  }
300 
301  //igb.insert_dependency(createEmptyPeptideProbabilisticAdderFactor(parentsOfPeps[j],j));
302  igb.insert_dependency(createBFPeptideProbabilisticAdderFactor(parentsOfPeps[j],j,deps));
303  }
304  }
305  */
306 
307  } // namespace Internal
308 } // namespace OpenMS
unsigned long int uiint
Definition: MessagePasserFactory.h:14
Definition: MessagePasserFactory.h:25
evergreen::TableDependency< Label > createProteinFactor(Label id, int nrMissingPeps=0)
Protein Factor initialized with model prior (missing peps are experimental)
Definition: MessagePasserFactory.h:119
double notConditionalGivenSum(unsigned long summ)
Definition: MessagePasserFactory.h:39
evergreen::AdditiveDependency< Label > createPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId)
To sum up distributions for the number of parent proteins of a peptide with convolution trees.
Definition: MessagePasserFactory.h:229
double alpha_
the model parameters
Definition: MessagePasserFactory.h:30
MessagePasserFactory(double alpha, double beta, double gamma, double p, double pep_prior)
Constructor.
Definition: MessagePasserFactory.h:104
double beta_
Definition: MessagePasserFactory.h:30
void fillVectorsOfMessagePassers(const std::vector< Label > &protIDs, const std::vector< std::vector< Label >> &parentsOfPeps, const std::vector< double > &pepEvidences, evergreen::InferenceGraphBuilder< Label > &igb)
Works on a vector of protein indices (potentially not consecutive)
Definition: MessagePasserFactory.h:252
evergreen::TableDependency< Label > createChargeFactor(Label repId, Label chargeId, int chg)
Definition: MessagePasserFactory.h:214
double gamma_
Definition: MessagePasserFactory.h:30
double pepPrior_
Definition: MessagePasserFactory.h:30
evergreen::TableDependency< Label > createReplicateFactor(Label seqId, Label repId)
Definition: MessagePasserFactory.h:200
evergreen::TableDependency< Label > createSumFactor(size_t nrParents, Label nId)
Definition: MessagePasserFactory.h:188
evergreen::TableDependency< Label > createPeptideEvidenceFactor(Label id, double prob)
Definition: MessagePasserFactory.h:144
double p_
Definition: MessagePasserFactory.h:30
std::map< int, double > chgLLhoods
Definition: MessagePasserFactory.h:34
evergreen::TableDependency< Label > createRegularizingSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:168
evergreen::TableDependency< Label > createSumEvidenceFactor(size_t nrParents, Label nId, Label pepId)
Definition: MessagePasserFactory.h:152
evergreen::PseudoAdditiveDependency< Label > createBFPeptideProbabilisticAdderFactor(const std::set< Label > &parentProteinIDs, Label nId, const std::vector< evergreen::TableDependency< Label > > &deps)
To sum up distributions for the number of parent proteins of a peptide brute-force.
Definition: MessagePasserFactory.h:243
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22