Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
MapAlignmentAlgorithmIdentification.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Eva Lange, Clemens Groepl, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_ANALYSIS_MAPMATCHING_MAPALIGNMENTALGORITHMIDENTIFICATION_H
36 #define OPENMS_ANALYSIS_MAPMATCHING_MAPALIGNMENTALGORITHMIDENTIFICATION_H
37 
47 
48 #include <cmath> // for "abs"
49 #include <limits> // for "max"
50 #include <map>
51 
52 namespace OpenMS
53 {
72  public DefaultParamHandler,
73  public ProgressLogger
74  {
75 public:
78 
81 
82  // Set a reference for the alignment
83  template <typename DataType> void setReference(DataType& data)
84  {
85  reference_.clear();
86  if (data.empty()) return; // empty input resets the reference
87  SeqToList rt_data;
88  bool sorted = getRetentionTimes_(data, rt_data);
89  computeMedians_(rt_data, reference_, sorted);
90  if (reference_.empty())
91  {
92  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not extract retention time information from the reference file");
93  }
94  }
95 
103  template <typename DataType>
104  void align(std::vector<DataType>& data,
105  std::vector<TransformationDescription>& transformations,
106  Int reference_index = -1)
107  {
108  checkParameters_(data.size());
109  startProgress(0, 3, "aligning maps");
110 
111  reference_index_ = reference_index;
112  // is reference one of the input files?
113  bool use_internal_reference = (reference_index >= 0);
114  if (use_internal_reference)
115  {
116  if (reference_index >= data.size())
117  {
118  throw Exception::IndexOverflow(__FILE__, __LINE__,
119  OPENMS_PRETTY_FUNCTION, reference_index,
120  data.size());
121  }
122  setReference(data[reference_index]);
123  }
124 
125  // one set of RT data for each input map, except reference (if any):
126  std::vector<SeqToList> rt_data(data.size() - use_internal_reference);
127  bool all_sorted = true;
128  for (Size i = 0, j = 0; i < data.size(); ++i)
129  {
130  if ((reference_index >= 0) && (i == Size(reference_index)))
131  {
132  continue; // skip reference map, if any
133  }
134  all_sorted &= getRetentionTimes_(data[i], rt_data[j++]);
135  }
136  setProgress(1);
137 
138  computeTransformations_(rt_data, transformations, all_sorted);
139  setProgress(2);
140 
141  setProgress(3);
142  endProgress();
143  }
144 
145 protected:
146 
148  typedef std::map<String, DoubleList> SeqToList;
149 
151  typedef std::map<String, double> SeqToValue;
152 
155 
157  SeqToValue reference_;
158 
161 
171  void computeMedians_(SeqToList& rt_data, SeqToValue& medians,
172  bool sorted = false);
173 
182  bool getRetentionTimes_(std::vector<PeptideIdentification>& peptides,
183  SeqToList& rt_data);
184 
193  bool getRetentionTimes_(PeakMap& experiment, SeqToList& rt_data);
194 
207  template <typename MapType>
208  bool getRetentionTimes_(MapType& features, SeqToList& rt_data)
209  {
210  bool use_feature_rt = param_.getValue("use_feature_rt").toBool();
211  for (typename MapType::Iterator feat_it = features.begin();
212  feat_it != features.end(); ++feat_it)
213  {
214  if (use_feature_rt)
215  {
216  // find the peptide ID closest in RT to the feature centroid:
217  String sequence;
218  double rt_distance = std::numeric_limits<double>::max();
219  bool any_hit = false;
220  for (std::vector<PeptideIdentification>::iterator pep_it =
221  feat_it->getPeptideIdentifications().begin(); pep_it !=
222  feat_it->getPeptideIdentifications().end(); ++pep_it)
223  {
224  if (!pep_it->getHits().empty())
225  {
226  any_hit = true;
227  double current_distance = fabs(pep_it->getRT() -
228  feat_it->getRT());
229  if (current_distance < rt_distance)
230  {
231  pep_it->sort();
232  sequence = pep_it->getHits()[0].getSequence().toString();
233  rt_distance = current_distance;
234  }
235  }
236  }
237 
238  if (any_hit) rt_data[sequence].push_back(feat_it->getRT());
239  }
240  else
241  {
242  getRetentionTimes_(feat_it->getPeptideIdentifications(), rt_data);
243  }
244  }
245 
246  if (!use_feature_rt &&
247  param_.getValue("use_unassigned_peptides").toBool())
248  {
249  getRetentionTimes_(features.getUnassignedPeptideIdentifications(),
250  rt_data);
251  }
252 
253  // remove duplicates (can occur if a peptide ID was assigned to several
254  // features due to overlap or annotation tolerance):
255  for (SeqToList::iterator rt_it = rt_data.begin(); rt_it != rt_data.end();
256  ++rt_it)
257  {
258  DoubleList& rt_values = rt_it->second;
259  sort(rt_values.begin(), rt_values.end());
260  DoubleList::iterator it = unique(rt_values.begin(), rt_values.end());
261  rt_values.resize(it - rt_values.begin());
262  }
263  return true; // RTs were already sorted for duplicate detection
264  }
265 
273  void computeTransformations_(std::vector<SeqToList>& rt_data,
274  std::vector<TransformationDescription>&
275  transforms, bool sorted = false);
276 
284  void checkParameters_(const Size runs);
285 
292  void getReference_();
293 
294 private:
295 
298 
301 
302  };
303 
304 } // namespace OpenMS
305 
306 #endif // OPENMS_ANALYSIS_MAPMATCHING_MAPALIGNMENTALGORITHMIDENTIFICATION_H
A more convenient string class.
Definition: String.h:57
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:66
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:116
Int overflow exception.
Definition: Exception.h:255
Iterator begin()
Definition: MSExperiment.h:162
bool getRetentionTimes_(MapType &features, SeqToList &rt_data)
Collect retention time data ("RT" MetaInfo) from peptide IDs contained in feature maps or consensus m...
Definition: MapAlignmentAlgorithmIdentification.h:208
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Int reference_index_
Index of input file to use as reference (if any)
Definition: MapAlignmentAlgorithmIdentification.h:154
Iterator end()
Definition: MSExperiment.h:172
A map alignment algorithm based on peptide identifications from MS2 spectra.
Definition: MapAlignmentAlgorithmIdentification.h:71
void align(std::vector< DataType > &data, std::vector< TransformationDescription > &transformations, Int reference_index=-1)
Align feature maps, consensus maps, peak maps, or peptide identifications.
Definition: MapAlignmentAlgorithmIdentification.h:104
std::map< String, DoubleList > SeqToList
Type to store retention times given for individual peptide sequences.
Definition: MapAlignmentAlgorithmIdentification.h:148
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:82
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:128
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
Size min_run_occur_
Minimum number of runs a peptide must occur in.
Definition: MapAlignmentAlgorithmIdentification.h:160
SeqToValue reference_
Reference retention times (per peptide sequence)
Definition: MapAlignmentAlgorithmIdentification.h:157
int Int
Signed integer type.
Definition: Types.h:103
void setReference(DataType &data)
Definition: MapAlignmentAlgorithmIdentification.h:83
std::map< String, double > SeqToValue
Type to store one representative retention time per peptide sequence.
Definition: MapAlignmentAlgorithmIdentification.h:151
Not all required information provided.
Definition: Exception.h:196

OpenMS / TOPP release 2.3.0 Documentation generated on Tue Jan 9 2018 18:22:01 using doxygen 1.8.13