OpenMS
Loading...
Searching...
No Matches
IntegerMassDecomposer.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Anton Pervukhin <Anton.Pervukhin@CeBiTec.Uni-Bielefeld.DE> $
7// --------------------------------------------------------------------------
8//
9
10#pragma once
11
12#include <vector>
13#include <utility>
14
17
19
20namespace OpenMS
21{
22
23 namespace ims
24 {
25
42 template <typename ValueType = long unsigned int,
43 typename DecompositionValueType = unsigned int>
45 public MassDecomposer<ValueType, DecompositionValueType>
46 {
47public:
50
53
56
59
61 typedef typename decomposition_type::size_type size_type;
62
68 explicit IntegerMassDecomposer(const Weights & alphabet);
69
76 bool exist(value_type mass) override;
77
85
93
103
104private:
105
109 typedef std::vector<std::pair<size_type, decomposition_value_type> > witness_vector_type;
110
114 typedef std::vector<value_type> residues_table_row_type;
115
119 typedef std::vector<residues_table_row_type> residues_table_type;
120
125
132
138
145
150
156
160 void fillExtendedResidueTable_(const Weights & _alphabet, residues_table_row_type & _lcms,
161 residues_table_row_type & _mass_in_lcms, const value_type _infty,
162 witness_vector_type & _witness_vector, residues_table_type & _ertable);
163
172 void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,
173 decomposition_type decomposition, decompositions_type & decompositionsStore);
174 };
175
176
177 template <typename ValueType, typename DecompositionValueType>
179 const Weights & alphabet) :
180 alphabet_(alphabet)
181 {
182
183 lcms_.resize(alphabet.size());
184 mass_in_lcms_.resize(alphabet.size());
185
186 infty_ = alphabet.getWeight(0) * alphabet.getWeight(alphabet.size() - 1);
187
189
190 }
191
192 template <typename ValueType, typename DecompositionValueType>
194 const Weights & _alphabet, residues_table_row_type & _lcms, residues_table_row_type & _mass_in_lcms,
195 const value_type _infty, witness_vector_type & _witnessVector, residues_table_type & _ertable)
196 {
197
198 if (_alphabet.size() < 2)
199 {
200 return;
201 }
202 // caches the most often used mass - smallest mass
203 value_type smallestMass = _alphabet.getWeight(0), secondMass = _alphabet.getWeight(1);
204
205 // initializes table: infinity everywhere except in the first field of every column
206 _ertable.reserve(_alphabet.size());
207 _ertable.assign(_alphabet.size(), std::vector<value_type>(smallestMass, _infty));
208
209 for (size_type i = 0; i < _alphabet.size(); ++i)
210 {
211 _ertable[i][0] = 0;
212 }
213
214 // initializes witness vector
215 _witnessVector.resize(smallestMass);
216
217 // fills second column (the first one is already correct)
218 size_type it_inc = secondMass % smallestMass, witness = 1;
219 //typename residues_table_row_type::iterator it = _ertable[1].begin() + it_inc;
220 value_type mass = secondMass;
221 // initializes counter to create a witness vector
222 decomposition_value_type counter = 0;
223 size_type it_i = it_inc;
224 while (it_i != 0)
225 {
226 _ertable[1][it_i] = mass;
227 mass += secondMass;
228 ++counter;
229 _witnessVector[it_i] = std::make_pair(witness, counter);
230 //std::cerr << "BLA: " << counter << " " << &_ertable[1][0] << " " << it - _ertable[1].begin() << " " << _ertable[1].size() << std::endl;
231 it_i += it_inc;
232 if (it_i >= _ertable[1].size())
233 {
234 it_i -= _ertable[1].size();
235 }
236 }
237 // fills cache variables for i==1
238 value_type tmp_d = Math::gcd(smallestMass, secondMass);
239 _lcms[1] = secondMass * smallestMass / tmp_d;
240 _mass_in_lcms[1] = smallestMass / tmp_d;
241
242 // fills remaining table. i is the column index.
243 for (size_type i = 2; i < _alphabet.size(); ++i)
244 {
245 // caches often used i-th alphabet mass
246 value_type currentMass = _alphabet.getWeight(i);
247
248 value_type d = Math::gcd(smallestMass, currentMass);
249
250 // fills cache for various variables.
251 // note that values for i==0 are never assigned since they're unused anyway.
252 _lcms[i] = currentMass * smallestMass / d;
253 _mass_in_lcms[i] = smallestMass / d;
254
255 // Nijenhuis' improvement: Is currentMass composable with smaller alphabet?
256 if (currentMass >= _ertable[i - 1][currentMass % smallestMass])
257 {
258 _ertable[i] = _ertable[i - 1];
259 continue;
260 }
261
262 const residues_table_row_type & prev_column = _ertable[i - 1];
263 residues_table_row_type & cur_column = _ertable[i];
264
265 if (d == 1)
266 {
267 // This loop is for the case that the gcd is 1. The optimization used below
268 // is not applicable here.
269
270 // p_inc is used to change residue (p) efficiently
271 size_type p_inc = currentMass % smallestMass;
272
273 // n is the value that will be written into the table
274 value_type n = 0;
275 // current residue (in paper variable 'r' is used)
276 size_type p = 0;
277 // counter for creation of witness vector
278 decomposition_value_type local_counter = 0;
279
280 for (size_type m = smallestMass; m > 0; --m)
281 {
282 n += currentMass;
283 p += p_inc;
284 ++local_counter;
285 if (p >= smallestMass)
286 {
287 p -= smallestMass;
288 }
289 if (n > prev_column[p])
290 {
291 n = prev_column[p];
292 local_counter = 0;
293 }
294 else
295 {
296 _witnessVector[p] = std::make_pair(i, local_counter);
297 }
298 cur_column[p] = n;
299 }
300 }
301 else
302 {
303 // If we're here, the gcd is not 1. We can use the following cache-optimized
304 // version of the algorithm. The trick is to put the iteration over all
305 // residue classes into the _inner_ loop.
306 //
307 // One could see it as going through one column in blocks which are gcd entries long.
308 size_type cur = currentMass % smallestMass;
309 size_type prev = 0;
310 size_type p_inc = cur - d;
311 // counters for creation of one witness vector
312 std::vector<decomposition_value_type> counters(smallestMass);
313
314 // copies first block from prev_column to cur_column
315 for (size_type j = 1; j < d; ++j)
316 {
317 cur_column[j] = prev_column[j];
318 }
319
320 // first loop: goes through all blocks, updating cur_column for the first time.
321 for (size_type m = smallestMass / d; m > 1; m--)
322 {
323 // r: current residue class
324 for (size_type r = 0; r < d; r++)
325 {
326
327 ++counters[cur];
328 if (cur_column[prev] + currentMass > prev_column[cur])
329 {
330 cur_column[cur] = prev_column[cur];
331 counters[cur] = 0;
332 }
333 else
334 {
335 cur_column[cur] = cur_column[prev] + currentMass;
336 _witnessVector[cur] = std::make_pair(i, counters[cur]);
337 }
338
339 prev++;
340 cur++;
341 }
342
343 prev = cur - d;
344
345 // this does: cur = (cur + currentMass) % smallestMass - d;
346 cur += p_inc;
347 if (cur >= smallestMass)
348 {
349 cur -= smallestMass;
350 }
351 }
352
353 // second loop:
354 bool cont = true;
355 while (cont)
356 {
357 cont = false;
358 prev++;
359 cur++;
360 ++counters[cur];
361 for (size_type r = 1; r < d; ++r)
362 {
363 if (cur_column[prev] + currentMass < cur_column[cur])
364 {
365 cur_column[cur] = cur_column[prev] + currentMass;
366 cont = true;
367 _witnessVector[cur] = std::make_pair(i, counters[cur]);
368 }
369 else
370 {
371 counters[cur] = 0;
372 }
373 prev++;
374 cur++;
375 }
376
377 prev = cur - d;
378
379 cur += p_inc;
380 if (cur >= smallestMass)
381 {
382 cur -= smallestMass;
383 }
384 }
385 }
386
387 }
388 }
389
390 template <typename ValueType, typename DecompositionValueType>
392 exist(value_type mass)
393 {
394
395 value_type residue = ertable_.back().at(mass % alphabet_.getWeight(0));
396 return residue != infty_ && mass >= residue;
397 }
398
399 template <typename ValueType, typename DecompositionValueType>
402 {
403
404 decomposition_type decomposition;
405 if (!this->exist(mass))
406 {
407 return decomposition;
408 }
409
410 decomposition.reserve(alphabet_.size());
411 decomposition.resize(alphabet_.size());
412
413 // initial mass residue: in FIND-ONE algorithm in paper corresponds variable "r"
414 value_type r = mass % alphabet_.getWeight(0);
415 value_type m = ertable_.back().at(r);
416
417 decomposition.at(0) = static_cast<decomposition_value_type>
418 ((mass - m) / alphabet_.getWeight(0));
419
420 while (m != 0)
421 {
422 size_type i = witness_vector_.at(r).first;
423 decomposition_value_type j = witness_vector_.at(r).second;
424 decomposition.at(i) += j;
425 if (m < j * alphabet_.getWeight(i))
426 {
427 break;
428 }
429 m -= j * alphabet_.getWeight(i);
430 r = m % alphabet_.getWeight(0);
431 }
432 return decomposition;
433 }
434
435 template <typename ValueType, typename DecompositionValueType>
438 {
439 decompositions_type decompositionsStore;
440 decomposition_type decomposition(alphabet_.size());
441 collectDecompositionsRecursively_(mass, alphabet_.size() - 1, decomposition, decompositionsStore);
442 return decompositionsStore;
443 }
444
445 template <typename ValueType, typename DecompositionValueType>
448 decomposition_type decomposition, decompositions_type & decompositionsStore)
449 {
450 if (alphabetMassIndex == 0)
451 {
452 value_type numberOfMasses0 = mass / alphabet_.getWeight(0);
453 if (numberOfMasses0 * alphabet_.getWeight(0) == mass)
454 {
455 decomposition[0] = static_cast<decomposition_value_type>(numberOfMasses0);
456 decompositionsStore.push_back(decomposition);
457 }
458 return;
459 }
460
461 // tested: caching these values gives us 15% better performance, at least
462 // with aminoacid-mono.masses
463 const value_type lcm = lcms_[alphabetMassIndex];
464 const value_type mass_in_lcm = mass_in_lcms_[alphabetMassIndex]; // this is alphabet mass divided by gcd
465
466 value_type mass_mod_alphabet0 = mass % alphabet_.getWeight(0); // trying to avoid modulo
467 const value_type mass_mod_decrement = alphabet_.getWeight(alphabetMassIndex) % alphabet_.getWeight(0);
468
469 for (value_type i = 0; i < mass_in_lcm; ++i)
470 {
471 // here is the conversion from value_type to decomposition_value_type
472 decomposition[alphabetMassIndex] = static_cast<decomposition_value_type>(i);
473
474 // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
475 // and that will end up in infinite loop
476 if (mass < i * alphabet_.getWeight(alphabetMassIndex))
477 {
478 break;
479 }
480
481 // r: current residue class. will stay the same in the following loop
482 value_type r = ertable_[alphabetMassIndex - 1][mass_mod_alphabet0];
483
484 // TODO: if infty was std::numeric_limits<...>... the following 'if' would not be necessary
485 if (r != infty_)
486 {
487 for (value_type m = mass - i * alphabet_.getWeight(alphabetMassIndex); m >= r; m -= lcm)
488 {
489 // the condition of the 'for' loop (m >= r) and decrementing the mass
490 // in steps of the lcm ensures that m is decomposable. Therefore
491 // the recursion will result in at least one witness.
492 collectDecompositionsRecursively_(m, alphabetMassIndex - 1, decomposition, decompositionsStore);
493 decomposition[alphabetMassIndex] += mass_in_lcm;
494 // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
495 // and that will end up in infinite loop
496 if (m < lcm)
497 {
498 break;
499 }
500 }
501 }
502 // subtle way of changing the modulo, instead of plain calculation it from (mass - i*currentAlphabetMass) % alphabetMass0 every time
503 if (mass_mod_alphabet0 < mass_mod_decrement)
504 {
505 mass_mod_alphabet0 += alphabet_.getWeight(0) - mass_mod_decrement;
506 }
507 else
508 {
509 mass_mod_alphabet0 -= mass_mod_decrement;
510 }
511 }
512
513 }
514
522 template <typename ValueType, typename DecompositionValueType>
524 DecompositionValueType>::getNumberOfDecompositions(value_type mass)
525 {
526 return static_cast<typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type>(getAllDecompositions(mass).size());
527 }
528
529 } // namespace ims
530} // namespace OpenMS
531
Implements MassDecomposer interface using algorithm and data structures described in paper "Efficient...
Definition IntegerMassDecomposer.h:46
MassDecomposer< ValueType, DecompositionValueType >::decompositions_type decompositions_type
Type of container for many decompositions.
Definition IntegerMassDecomposer.h:58
residues_table_row_type lcms_
Definition IntegerMassDecomposer.h:137
Weights alphabet_
Definition IntegerMassDecomposer.h:124
value_type infty_
Definition IntegerMassDecomposer.h:149
IntegerMassDecomposer(const Weights &alphabet)
Definition IntegerMassDecomposer.h:178
MassDecomposer< ValueType, DecompositionValueType >::decomposition_type decomposition_type
Type of decomposition.
Definition IntegerMassDecomposer.h:55
std::vector< residues_table_row_type > residues_table_type
Definition IntegerMassDecomposer.h:119
std::vector< std::pair< size_type, decomposition_value_type > > witness_vector_type
Definition IntegerMassDecomposer.h:109
void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex, decomposition_type decomposition, decompositions_type &decompositionsStore)
Definition IntegerMassDecomposer.h:447
decompositions_type getAllDecompositions(value_type mass) override
Definition IntegerMassDecomposer.h:437
MassDecomposer< ValueType, DecompositionValueType >::decomposition_value_type decomposition_value_type
Type of decomposition value.
Definition IntegerMassDecomposer.h:52
MassDecomposer< ValueType, DecompositionValueType >::value_type value_type
Type of value to be decomposed.
Definition IntegerMassDecomposer.h:49
std::vector< value_type > residues_table_row_type
Definition IntegerMassDecomposer.h:114
residues_table_row_type mass_in_lcms_
Definition IntegerMassDecomposer.h:144
decomposition_type getDecomposition(value_type mass) override
Definition IntegerMassDecomposer.h:401
decomposition_type::size_type size_type
Type of decomposition's size.
Definition IntegerMassDecomposer.h:61
witness_vector_type witness_vector_
Definition IntegerMassDecomposer.h:155
bool exist(value_type mass) override
Definition IntegerMassDecomposer.h:392
residues_table_type ertable_
Definition IntegerMassDecomposer.h:131
decomposition_value_type getNumberOfDecompositions(value_type mass) override
Definition IntegerMassDecomposer.h:524
void fillExtendedResidueTable_(const Weights &_alphabet, residues_table_row_type &_lcms, residues_table_row_type &_mass_in_lcms, const value_type _infty, witness_vector_type &_witness_vector, residues_table_type &_ertable)
Definition IntegerMassDecomposer.h:193
An interface to handle decomposing of integer values/masses over a set of integer weights (alphabet).
Definition MassDecomposer.h:42
DecompositionValueType decomposition_value_type
Definition MassDecomposer.h:52
ValueType value_type
Definition MassDecomposer.h:47
std::vector< decomposition_type > decompositions_type
Definition MassDecomposer.h:62
std::vector< decomposition_value_type > decomposition_type
Definition MassDecomposer.h:57
Represents a set of weights (double values and scaled with a certain precision their integer counterp...
Definition Weights.h:42
size_type size() const
Definition Weights.h:98
weight_type getWeight(size_type i) const
Definition Weights.h:109
T gcd(T a, T b)
Returns the greatest common divisor (gcd) of two numbers by applying the Euclidean algorithm.
Definition MathFunctions.h:310
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19