nightly/html/IntegerMassDecomposer_8h_source.html

// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin

// SPDX-License-Identifier: BSD-3-Clause

//

// --------------------------------------------------------------------------

// $Maintainer: Timo Sachsenberg $

// $Authors: Anton Pervukhin <Anton.Pervukhin@CeBiTec.Uni-Bielefeld.DE> $

// --------------------------------------------------------------------------

//


#pragma once


#include <vector>

#include <utility>


#include <OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/Weights.h>

#include <OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/MassDecomposer.h>


#include <OpenMS/MATH/MathFunctions.h>


namespace OpenMS

{


  namespace ims

  {


    template <typename ValueType = long unsigned int,

              typename DecompositionValueType = unsigned int>


    class IntegerMassDecomposer :

      public MassDecomposer<ValueType, DecompositionValueType>

    {

public:

      typedef typename MassDecomposer<ValueType, DecompositionValueType>::value_type value_type;


      typedef typename MassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type decomposition_value_type;


      typedef typename MassDecomposer<ValueType, DecompositionValueType>::decomposition_type decomposition_type;


      typedef typename MassDecomposer<ValueType, DecompositionValueType>::decompositions_type decompositions_type;


      typedef typename decomposition_type::size_type size_type;


      explicit IntegerMassDecomposer(const Weights & alphabet);


      bool exist(value_type mass) override;


      decomposition_type getDecomposition(value_type mass) override;


      decompositions_type getAllDecompositions(value_type mass) override;


      decomposition_value_type getNumberOfDecompositions(value_type mass) override;


private:


      typedef std::vector<std::pair<size_type, decomposition_value_type> > witness_vector_type;


      typedef std::vector<value_type> residues_table_row_type;


      typedef std::vector<residues_table_row_type> residues_table_type;


      Weights alphabet_;


      residues_table_type ertable_;


      residues_table_row_type lcms_;


      residues_table_row_type mass_in_lcms_;


      value_type infty_;


      witness_vector_type witness_vector_;


      void fillExtendedResidueTable_(const Weights & _alphabet, residues_table_row_type & _lcms,

                                     residues_table_row_type & _mass_in_lcms, const value_type _infty,

                                     witness_vector_type & _witness_vector, residues_table_type & _ertable);


      void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,

                                             decomposition_type decomposition, decompositions_type & decompositionsStore);

    };


    template <typename ValueType, typename DecompositionValueType>


    IntegerMassDecomposer<ValueType, DecompositionValueType>::IntegerMassDecomposer(

      const Weights & alphabet) :

      alphabet_(alphabet)

    {


      lcms_.resize(alphabet.size());

      mass_in_lcms_.resize(alphabet.size());


      infty_ = alphabet.getWeight(0) * alphabet.getWeight(alphabet.size() - 1);


      fillExtendedResidueTable_(alphabet, lcms_, mass_in_lcms_, infty_, witness_vector_, ertable_);


    }


    template <typename ValueType, typename DecompositionValueType>


    void IntegerMassDecomposer<ValueType, DecompositionValueType>::fillExtendedResidueTable_(

      const Weights & _alphabet, residues_table_row_type & _lcms, residues_table_row_type & _mass_in_lcms,

      const value_type _infty, witness_vector_type & _witnessVector, residues_table_type & _ertable)

    {


      if (_alphabet.size() < 2)

      {

        return;

      }

      // caches the most often used mass - smallest mass

      value_type smallestMass = _alphabet.getWeight(0), secondMass = _alphabet.getWeight(1);


      // initializes table: infinity everywhere except in the first field of every column

      _ertable.reserve(_alphabet.size());

      _ertable.assign(_alphabet.size(), std::vector<value_type>(smallestMass, _infty));


      for (size_type i = 0; i < _alphabet.size(); ++i)

      {

        _ertable[i][0] = 0;

      }


      // initializes witness vector

      _witnessVector.resize(smallestMass);


      // fills second column (the first one is already correct)

      size_type it_inc = secondMass % smallestMass, witness = 1;

      //typename residues_table_row_type::iterator it = _ertable[1].begin() + it_inc;

      value_type mass = secondMass;

      // initializes counter to create a witness vector

      decomposition_value_type counter = 0;

      size_type it_i = it_inc;

      while (it_i != 0)

      {

        _ertable[1][it_i] = mass;

        mass += secondMass;

        ++counter;

        _witnessVector[it_i] = std::make_pair(witness, counter);

        //std::cerr << "BLA: " << counter << " " << &_ertable[1][0] << " " << it - _ertable[1].begin() << " " << _ertable[1].size() << std::endl;

        it_i += it_inc;

        if (it_i >= _ertable[1].size())

        {

          it_i -= _ertable[1].size();

        }

      }

      // fills cache variables for i==1

      value_type tmp_d = Math::gcd(smallestMass, secondMass);

      _lcms[1] = secondMass * smallestMass / tmp_d;

      _mass_in_lcms[1] = smallestMass / tmp_d;


      // fills remaining table. i is the column index.

      for (size_type i = 2; i < _alphabet.size(); ++i)

      {

        // caches often used i-th alphabet mass

        value_type currentMass = _alphabet.getWeight(i);


        value_type d = Math::gcd(smallestMass, currentMass);


        // fills cache for various variables.

        // note that values for i==0 are never assigned since they're unused anyway.

        _lcms[i] = currentMass * smallestMass / d;

        _mass_in_lcms[i] = smallestMass / d;


        // Nijenhuis' improvement: Is currentMass composable with smaller alphabet?

        if (currentMass >= _ertable[i - 1][currentMass % smallestMass])

        {

          _ertable[i] = _ertable[i - 1];

          continue;

        }


        const residues_table_row_type & prev_column = _ertable[i - 1];

        residues_table_row_type & cur_column = _ertable[i];


        if (d == 1)

        {

          // This loop is for the case that the gcd is 1. The optimization used below

          // is not applicable here.


          // p_inc is used to change residue (p) efficiently

          size_type p_inc = currentMass % smallestMass;


          // n is the value that will be written into the table

          value_type n = 0;

          // current residue (in paper variable 'r' is used)

          size_type p = 0;

          // counter for creation of witness vector

          decomposition_value_type local_counter = 0;


          for (size_type m = smallestMass; m > 0; --m)

          {

            n += currentMass;

            p += p_inc;

            ++local_counter;

            if (p >= smallestMass)

            {

              p -= smallestMass;

            }

            if (n > prev_column[p])

            {

              n = prev_column[p];

              local_counter = 0;

            }

            else

            {

              _witnessVector[p] = std::make_pair(i, local_counter);

            }

            cur_column[p] = n;

          }

        }

        else

        {

          // If we're here, the gcd is not 1. We can use the following cache-optimized

          // version of the algorithm. The trick is to put the iteration over all

          // residue classes into the _inner_ loop.

          //

          // One could see it as going through one column in blocks which are gcd entries long.

          size_type cur = currentMass % smallestMass;

          size_type prev = 0;

          size_type p_inc = cur - d;

          // counters for creation of one witness vector

          std::vector<decomposition_value_type> counters(smallestMass);


          // copies first block from prev_column to cur_column

          for (size_type j = 1; j < d; ++j)

          {

            cur_column[j] = prev_column[j];

          }


          // first loop: goes through all blocks, updating cur_column for the first time.

          for (size_type m = smallestMass / d; m > 1; m--)

          {

            // r: current residue class

            for (size_type r = 0; r < d; r++)

            {


              ++counters[cur];

              if (cur_column[prev] + currentMass > prev_column[cur])

              {

                cur_column[cur] = prev_column[cur];

                counters[cur] = 0;

              }

              else

              {

                cur_column[cur] = cur_column[prev] + currentMass;

                _witnessVector[cur] = std::make_pair(i, counters[cur]);

              }


              prev++;

              cur++;

            }


            prev = cur - d;


            // this does: cur = (cur + currentMass) % smallestMass - d;

            cur += p_inc;

            if (cur >= smallestMass)

            {

              cur -= smallestMass;

            }

          }


          // second loop:

          bool cont = true;

          while (cont)

          {

            cont = false;

            prev++;

            cur++;

            ++counters[cur];

            for (size_type r = 1; r < d; ++r)

            {

              if (cur_column[prev] + currentMass < cur_column[cur])

              {

                cur_column[cur] = cur_column[prev] + currentMass;

                cont = true;

                _witnessVector[cur] = std::make_pair(i, counters[cur]);

              }

              else

              {

                counters[cur] = 0;

              }

              prev++;

              cur++;

            }


            prev = cur - d;


            cur += p_inc;

            if (cur >= smallestMass)

            {

              cur -= smallestMass;

            }

          }

        }


      }

    }


    template <typename ValueType, typename DecompositionValueType>


    bool IntegerMassDecomposer<ValueType, DecompositionValueType>::

    exist(value_type mass)

    {


      value_type residue = ertable_.back().at(mass % alphabet_.getWeight(0));

      return residue != infty_ && mass >= residue;

    }


    template <typename ValueType, typename DecompositionValueType>

    typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_type


    IntegerMassDecomposer<ValueType, DecompositionValueType>::getDecomposition(value_type mass)

    {


      decomposition_type decomposition;

      if (!this->exist(mass))

      {

        return decomposition;

      }


      decomposition.reserve(alphabet_.size());

      decomposition.resize(alphabet_.size());


      // initial mass residue: in FIND-ONE algorithm in paper corresponds variable "r"

      value_type r = mass % alphabet_.getWeight(0);

      value_type m = ertable_.back().at(r);


      decomposition.at(0) = static_cast<decomposition_value_type>

                            ((mass - m) / alphabet_.getWeight(0));


      while (m != 0)

      {

        size_type i = witness_vector_.at(r).first;

        decomposition_value_type j = witness_vector_.at(r).second;

        decomposition.at(i) += j;

        if (m < j * alphabet_.getWeight(i))

        {

          break;

        }

        m -= j * alphabet_.getWeight(i);

        r = m % alphabet_.getWeight(0);

      }

      return decomposition;

    }


    template <typename ValueType, typename DecompositionValueType>

    typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decompositions_type


    IntegerMassDecomposer<ValueType, DecompositionValueType>::getAllDecompositions(value_type mass)

    {

      decompositions_type decompositionsStore;

      decomposition_type decomposition(alphabet_.size());

      collectDecompositionsRecursively_(mass, alphabet_.size() - 1, decomposition, decompositionsStore);

      return decompositionsStore;

    }


    template <typename ValueType, typename DecompositionValueType>


    void IntegerMassDecomposer<ValueType, DecompositionValueType>::

    collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,

                                      decomposition_type decomposition, decompositions_type & decompositionsStore)

    {

      if (alphabetMassIndex == 0)

      {

        value_type numberOfMasses0 = mass / alphabet_.getWeight(0);

        if (numberOfMasses0 * alphabet_.getWeight(0) == mass)

        {

          decomposition[0] = static_cast<decomposition_value_type>(numberOfMasses0);

          decompositionsStore.push_back(decomposition);

        }

        return;

      }


      // tested: caching these values gives us 15% better performance, at least

      // with aminoacid-mono.masses

      const value_type lcm = lcms_[alphabetMassIndex];

      const value_type mass_in_lcm = mass_in_lcms_[alphabetMassIndex]; // this is alphabet mass divided by gcd


      value_type mass_mod_alphabet0 = mass % alphabet_.getWeight(0); // trying to avoid modulo

      const value_type mass_mod_decrement = alphabet_.getWeight(alphabetMassIndex) % alphabet_.getWeight(0);


      for (value_type i = 0; i < mass_in_lcm; ++i)

      {

        // here is the conversion from value_type to decomposition_value_type

        decomposition[alphabetMassIndex] = static_cast<decomposition_value_type>(i);


        // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge

        // and that will end up in infinite loop

        if (mass < i * alphabet_.getWeight(alphabetMassIndex))

        {

          break;

        }


        // r: current residue class. will stay the same in the following loop

        value_type r = ertable_[alphabetMassIndex - 1][mass_mod_alphabet0];


        // TODO: if infty was std::numeric_limits<...>... the following 'if' would not be necessary

        if (r != infty_)

        {

          for (value_type m = mass - i * alphabet_.getWeight(alphabetMassIndex); m >= r; m -= lcm)

          {

            // the condition of the 'for' loop (m >= r) and decrementing the mass

            // in steps of the lcm ensures that m is decomposable. Therefore

            // the recursion will result in at least one witness.

            collectDecompositionsRecursively_(m, alphabetMassIndex - 1, decomposition, decompositionsStore);

            decomposition[alphabetMassIndex] += mass_in_lcm;

            // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge

            // and that will end up in infinite loop

            if (m < lcm)

            {

              break;

            }

          }

        }

        // subtle way of changing the modulo, instead of plain calculation it from (mass - i*currentAlphabetMass) % alphabetMass0 every time

        if (mass_mod_alphabet0 < mass_mod_decrement)

        {

          mass_mod_alphabet0 += alphabet_.getWeight(0) - mass_mod_decrement;

        }

        else

        {

          mass_mod_alphabet0 -= mass_mod_decrement;

        }

      }


    }


    template <typename ValueType, typename DecompositionValueType>

    typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type IntegerMassDecomposer<ValueType,


                                                                                                                      DecompositionValueType>::getNumberOfDecompositions(value_type mass)

    {

      return static_cast<typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type>(getAllDecompositions(mass).size());

    }


  } // namespace ims

} // namespace OpenMS


MassDecomposer.h

MathFunctions.h

Weights.h

OpenMS::ims::IntegerMassDecomposer
Implements MassDecomposer interface using algorithm and data structures described in paper "Efficient...
Definition IntegerMassDecomposer.h:46

OpenMS::ims::IntegerMassDecomposer::decompositions_type
MassDecomposer< ValueType, DecompositionValueType >::decompositions_type decompositions_type
Type of container for many decompositions.
Definition IntegerMassDecomposer.h:58

OpenMS::ims::IntegerMassDecomposer::lcms_
residues_table_row_type lcms_
Definition IntegerMassDecomposer.h:137

OpenMS::ims::IntegerMassDecomposer::alphabet_
Weights alphabet_
Definition IntegerMassDecomposer.h:124

OpenMS::ims::IntegerMassDecomposer::infty_
value_type infty_
Definition IntegerMassDecomposer.h:149

OpenMS::ims::IntegerMassDecomposer::IntegerMassDecomposer
IntegerMassDecomposer(const Weights &alphabet)
Definition IntegerMassDecomposer.h:178

OpenMS::ims::IntegerMassDecomposer::decomposition_type
MassDecomposer< ValueType, DecompositionValueType >::decomposition_type decomposition_type
Type of decomposition.
Definition IntegerMassDecomposer.h:55

OpenMS::ims::IntegerMassDecomposer::residues_table_type
std::vector< residues_table_row_type > residues_table_type
Definition IntegerMassDecomposer.h:119

OpenMS::ims::IntegerMassDecomposer::witness_vector_type
std::vector< std::pair< size_type, decomposition_value_type > > witness_vector_type
Definition IntegerMassDecomposer.h:109

OpenMS::ims::IntegerMassDecomposer::collectDecompositionsRecursively_
void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex, decomposition_type decomposition, decompositions_type &decompositionsStore)
Definition IntegerMassDecomposer.h:447

OpenMS::ims::IntegerMassDecomposer::getAllDecompositions
decompositions_type getAllDecompositions(value_type mass) override
Definition IntegerMassDecomposer.h:437

OpenMS::ims::IntegerMassDecomposer::decomposition_value_type
MassDecomposer< ValueType, DecompositionValueType >::decomposition_value_type decomposition_value_type
Type of decomposition value.
Definition IntegerMassDecomposer.h:52

OpenMS::ims::IntegerMassDecomposer::value_type
MassDecomposer< ValueType, DecompositionValueType >::value_type value_type
Type of value to be decomposed.
Definition IntegerMassDecomposer.h:49

OpenMS::ims::IntegerMassDecomposer::residues_table_row_type
std::vector< value_type > residues_table_row_type
Definition IntegerMassDecomposer.h:114

OpenMS::ims::IntegerMassDecomposer::mass_in_lcms_
residues_table_row_type mass_in_lcms_
Definition IntegerMassDecomposer.h:144

OpenMS::ims::IntegerMassDecomposer::getDecomposition
decomposition_type getDecomposition(value_type mass) override
Definition IntegerMassDecomposer.h:401

OpenMS::ims::IntegerMassDecomposer::size_type
decomposition_type::size_type size_type
Type of decomposition's size.
Definition IntegerMassDecomposer.h:61

OpenMS::ims::IntegerMassDecomposer::witness_vector_
witness_vector_type witness_vector_
Definition IntegerMassDecomposer.h:155

OpenMS::ims::IntegerMassDecomposer::exist
bool exist(value_type mass) override
Definition IntegerMassDecomposer.h:392

OpenMS::ims::IntegerMassDecomposer::ertable_
residues_table_type ertable_
Definition IntegerMassDecomposer.h:131

OpenMS::ims::IntegerMassDecomposer::getNumberOfDecompositions
decomposition_value_type getNumberOfDecompositions(value_type mass) override
Definition IntegerMassDecomposer.h:524

OpenMS::ims::IntegerMassDecomposer::fillExtendedResidueTable_
void fillExtendedResidueTable_(const Weights &_alphabet, residues_table_row_type &_lcms, residues_table_row_type &_mass_in_lcms, const value_type _infty, witness_vector_type &_witness_vector, residues_table_type &_ertable)
Definition IntegerMassDecomposer.h:193

OpenMS::ims::MassDecomposer
An interface to handle decomposing of integer values/masses over a set of integer weights (alphabet).
Definition MassDecomposer.h:42

OpenMS::ims::MassDecomposer::decomposition_value_type
DecompositionValueType decomposition_value_type
Definition MassDecomposer.h:52

OpenMS::ims::MassDecomposer::value_type
ValueType value_type
Definition MassDecomposer.h:47

OpenMS::ims::MassDecomposer::decompositions_type
std::vector< decomposition_type > decompositions_type
Definition MassDecomposer.h:62

OpenMS::ims::MassDecomposer::decomposition_type
std::vector< decomposition_value_type > decomposition_type
Definition MassDecomposer.h:57

OpenMS::ims::Weights
Represents a set of weights (double values and scaled with a certain precision their integer counterp...
Definition Weights.h:42

OpenMS::ims::Weights::size
size_type size() const
Definition Weights.h:98

OpenMS::ims::Weights::getWeight
weight_type getWeight(size_type i) const
Definition Weights.h:109

OpenMS::Math::gcd
T gcd(T a, T b)
Returns the greatest common divisor (gcd) of two numbers by applying the Euclidean algorithm.
Definition MathFunctions.h:310

OpenMS
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19