2.6.0/html/GaussFilterAlgorithm_8h_source.html

// --------------------------------------------------------------------------

//                   OpenMS -- Open-Source Mass Spectrometry

// --------------------------------------------------------------------------

// Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,

// ETH Zurich, and Freie Universitaet Berlin 2002-2020.

//

// This software is released under a three-clause BSD license:

//  * Redistributions of source code must retain the above copyright

//    notice, this list of conditions and the following disclaimer.

//  * Redistributions in binary form must reproduce the above copyright

//    notice, this list of conditions and the following disclaimer in the

//    documentation and/or other materials provided with the distribution.

//  * Neither the name of any author or any participating institution

//    may be used to endorse or promote products derived from this software

//    without specific prior written permission.

// For a full list of authors, refer to the file AUTHORS.

// --------------------------------------------------------------------------

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

// ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING

// INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

//

// --------------------------------------------------------------------------

// $Maintainer: Hannes Roest $

// $Authors: Eva Lange $

// --------------------------------------------------------------------------


#pragma once


#include <OpenMS/CONCEPT/Types.h>

#include <OpenMS/CONCEPT/Constants.h>

#include <OpenMS/INTERFACES/DataStructures.h>

#include <OpenMS/INTERFACES/ISpectrumAccess.h>

#include <cmath>

#include <vector>


namespace OpenMS

{

// #define DEBUG_FILTERING


  class OPENMS_DLLAPI GaussFilterAlgorithm

  {

public:

    GaussFilterAlgorithm();


    virtual ~GaussFilterAlgorithm();


    bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)

    {

      // create new arrays for mz / intensity data and set their size

      OpenMS::Interfaces::BinaryDataArrayPtr intensity_array(new OpenMS::Interfaces::BinaryDataArray);

      OpenMS::Interfaces::BinaryDataArrayPtr mz_array(new OpenMS::Interfaces::BinaryDataArray);

      mz_array->data.resize(spectrum->getMZArray()->data.size());

      intensity_array->data.resize(spectrum->getMZArray()->data.size());


      // apply the filter

      bool ret_val = filter(

          spectrum->getMZArray()->data.begin(),

          spectrum->getMZArray()->data.end(),

          spectrum->getIntensityArray()->data.begin(),

          mz_array->data.begin(), intensity_array->data.begin()

          );

      // set the data of the spectrum to the new mz / int arrays

      spectrum->setMZArray(mz_array);

      spectrum->setIntensityArray(intensity_array);

      return ret_val;

    }


    bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)

    {

      // create new arrays for rt / intensity data and set their size

      OpenMS::Interfaces::BinaryDataArrayPtr intensity_array(new OpenMS::Interfaces::BinaryDataArray);

      OpenMS::Interfaces::BinaryDataArrayPtr rt_array(new OpenMS::Interfaces::BinaryDataArray);

      rt_array->data.resize(chromatogram->getTimeArray()->data.size());

      intensity_array->data.resize(chromatogram->getTimeArray()->data.size());


      // apply the filter

      bool ret_val = filter(

          chromatogram->getTimeArray()->data.begin(),

          chromatogram->getTimeArray()->data.end(),

          chromatogram->getIntensityArray()->data.begin(),

          rt_array->data.begin(), intensity_array->data.begin()

          );

      // set the data of the chromatogram to the new rt / int arrays

      chromatogram->setTimeArray(rt_array);

      chromatogram->setIntensityArray(intensity_array);

      return ret_val;

    }


    template <typename ConstIterT, typename IterT>

    bool filter(

        ConstIterT mz_in_start,

        ConstIterT mz_in_end,

        ConstIterT int_in_start,

        IterT mz_out,

        IterT int_out)

    {

      bool found_signal = false;


      ConstIterT mz_it = mz_in_start;

      ConstIterT int_it = int_in_start;

      for (; mz_it != mz_in_end; mz_it++, int_it++)

      {

        // if ppm tolerance is used, calculate a reasonable width value for this m/z

        if (use_ppm_tolerance_)

        {

          initialize((*mz_it) * ppm_tolerance_ * 10e-6, spacing_, ppm_tolerance_, use_ppm_tolerance_ );

        }


        double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);


        // store new intensity and m/z into output iterator

        *mz_out = *mz_it;

        *int_out = new_int;

        ++mz_out;

        ++int_out;


        if (fabs(new_int) > 0) found_signal = true;

      }

      return found_signal;

    }


    void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);


protected:


    std::vector<double> coeffs_;

    double sigma_;

    double spacing_;


    // tolerance in ppm

    bool use_ppm_tolerance_;

    double ppm_tolerance_;


    template <typename InputPeakIterator>

    double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)

    {

      double v = 0.;

      // norm the gaussian kernel area to one

      double norm = 0.;

      Size middle = coeffs_.size();


      double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);

      double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));


      InputPeakIterator help_x = x;

      InputPeakIterator help_y = y;

#ifdef DEBUG_FILTERING


      std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;

#endif


      //integrate from middle to start_pos

      while ((help_x != first) && (*(help_x - 1) > start_pos))

      {

        // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)

        double distance_in_gaussian = fabs(*x - *help_x);

        Size left_position = (Size)floor(distance_in_gaussian / spacing_);


        // search for the true left adjacent data point (because of rounding errors)

        for (int j = 0; ((j < 3) &&  (distance(first, help_x - j) >= 0)); ++j)

        {

          if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))

          {

            left_position -= j;

            break;

          }


          if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))

          {

            left_position += j;

            break;

          }

        }


        // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian

        Size right_position = left_position + 1;

        double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;

        // check if the right data point in the gaussian exists

        double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]

                                  : coeffs_[left_position];

#ifdef DEBUG_FILTERING


        std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;

        std::cout << " right_position " << right_position << std::endl;

        std::cout << " left_position " << left_position << std::endl;

        std::cout << "coeffs_ at left_position "  <<  coeffs_[left_position] << std::endl;

        std::cout << "coeffs_ at right_position "  <<  coeffs_[right_position] << std::endl;

        std::cout << "interpolated value left " << coeffs_right << std::endl;

#endif


        // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)

        distance_in_gaussian = fabs((*x) - (*(help_x - 1)));

        left_position = (Size)floor(distance_in_gaussian / spacing_);


        // search for the true left adjacent data point (because of rounding errors)

        for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)

        {

          if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))

          {

            left_position -= j;

            break;

          }


          if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))

          {

            left_position += j;

            break;

          }

        }


        // start the interpolation for the true value in the gaussian

        right_position = left_position + 1;

        d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;

        double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]

                                 : coeffs_[left_position];

#ifdef DEBUG_FILTERING


        std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;

        std::cout << " right_position " << right_position << std::endl;

        std::cout << " left_position " << left_position << std::endl;

        std::cout << "coeffs_ at left_position " <<  coeffs_[left_position] << std::endl;

        std::cout << "coeffs_ at right_position " <<   coeffs_[right_position] << std::endl;

        std::cout << "interpolated value right " << coeffs_left << std::endl;


        std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right

                  << std::endl;

#endif


        norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);


        v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);

        --help_x;

        --help_y;

      }


      //integrate from middle to end_pos

      help_x = x;

      help_y = y;

#ifdef DEBUG_FILTERING


      std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;

#endif


      while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))

      {

        // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)

        double distance_in_gaussian = fabs((*x) - (*help_x));

        int left_position = (UInt)floor(distance_in_gaussian / spacing_);


        // search for the true left adjacent data point (because of rounding errors)

        for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)

        {

          if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))

          {

            left_position -= j;

            break;

          }


          if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))

          {

            left_position += j;

            break;

          }

        }

        // start the interpolation for the true value in the gaussian

        Size right_position = left_position + 1;

        double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;

        double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]

                                 : coeffs_[left_position];


#ifdef DEBUG_FILTERING


        std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;

        std::cout << " left_position " << left_position << std::endl;

        std::cout << "coeffs_ at right_position " <<  coeffs_[left_position] << std::endl;

        std::cout << "coeffs_ at left_position " <<  coeffs_[right_position] << std::endl;

        std::cout << "interpolated value left " << coeffs_left << std::endl;

#endif


        // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)

        distance_in_gaussian = fabs((*x) - (*(help_x + 1)));

        left_position = (UInt)floor(distance_in_gaussian / spacing_);


        // search for the true left adjacent data point (because of rounding errors)

        for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)

        {

          if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))

          {

            left_position -= j;

            break;

          }


          if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))

          {

            left_position += j;

            break;

          }

        }


        // start the interpolation for the true value in the gaussian

        right_position = left_position + 1;

        d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;

        double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]

                                  : coeffs_[left_position];

#ifdef DEBUG_FILTERING


        std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;

        std::cout << " left_position " << left_position << std::endl;

        std::cout << "coeffs_ at right_position " <<   coeffs_[left_position] << std::endl;

        std::cout << "coeffs_ at left_position " <<  coeffs_[right_position] << std::endl;

        std::cout << "interpolated value right " << coeffs_right << std::endl;


        std::cout << " intensity " <<  fabs(*help_x - *(help_x + 1)) / 2.

                  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)

                  << "* " << coeffs_right

                  << std::endl;

#endif

        norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);


        v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);

        ++help_x;

        ++help_y;

      }


      if (v > 0)

      {

        return v / norm;

      }

      else

      {

        return 0;

      }

    }


  };


} // namespace OpenMS