OpenMS  2.4.0
ProteinIdentification.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Nico Pfeifer, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 #include <set>
42 
43 namespace OpenMS
44 {
45  class PeptideIdentification;
46 
61  class OPENMS_DLLAPI ProteinIdentification :
62  public MetaInfoInterface
63  {
64 public:
67 
71  struct OPENMS_DLLAPI ProteinGroup
72  {
74  double probability;
75 
77  std::vector<String> accessions;
78 
79  ProteinGroup();
80 
82  bool operator==(const ProteinGroup& rhs) const;
83 
84  /*
85  @brief Comparison operator (for sorting)
86 
87  This operator is intended for sorting protein groups in a "best first" manner. That means higher probabilities are "less" than lower probabilities (!); smaller groups are "less" than larger groups; everything else being equal, accessions are compared lexicographically.
88  */
89  bool operator<(const ProteinGroup& rhs) const;
90  };
91 
94  {
97  SIZE_OF_PEAKMASSTYPE
98  };
100  static const std::string NamesOfPeakMassType[SIZE_OF_PEAKMASSTYPE];
101 
103  struct OPENMS_DLLAPI SearchParameters :
104  public MetaInfoInterface
105  {
111  std::vector<String> fixed_modifications;
112  std::vector<String> variable_modifications;
119 
121 
122  bool operator==(const SearchParameters& rhs) const;
123 
124  bool operator!=(const SearchParameters& rhs) const;
125 
126  };
127 
128 
134  virtual ~ProteinIdentification();
138  ProteinIdentification& operator=(const ProteinIdentification& source);
140  bool operator==(const ProteinIdentification& rhs) const;
142  bool operator!=(const ProteinIdentification& rhs) const;
144 
146 
147  const std::vector<ProteinHit> & getHits() const;
150  std::vector<ProteinHit> & getHits();
152  void insertHit(const ProteinHit & input);
153 
159  void setHits(const std::vector<ProteinHit>& hits);
160 
162  std::vector<ProteinHit>::iterator findHit(const String& accession);
163 
165  const std::vector<ProteinGroup>& getProteinGroups() const;
167  std::vector<ProteinGroup>& getProteinGroups();
169  void insertProteinGroup(const ProteinGroup & group);
170 
172  const std::vector<ProteinGroup>& getIndistinguishableProteins() const;
174  std::vector<ProteinGroup>& getIndistinguishableProteins();
176  void insertIndistinguishableProteins(const ProteinGroup& group);
177 
179  double getSignificanceThreshold() const;
181  void setSignificanceThreshold(double value);
183  const String& getScoreType() const;
185  void setScoreType(const String& type);
187  bool isHigherScoreBetter() const;
189  void setHigherScoreBetter(bool higher_is_better);
191  void sort();
193  void assignRanks();
201  void computeCoverage(const std::vector<PeptideIdentification>& pep_ids);
203 
205 
206  const DateTime& getDateTime() const;
209  void setDateTime(const DateTime& date);
211  void setSearchEngine(const String& search_engine);
213  const String& getSearchEngine() const;
215  void setSearchEngineVersion(const String& search_engine_version);
217  const String& getSearchEngineVersion() const;
219  void setSearchParameters(const SearchParameters& search_parameters);
221  const SearchParameters& getSearchParameters() const;
223  SearchParameters& getSearchParameters();
225  const String& getIdentifier() const;
227  void setIdentifier(const String& id);
229  void setPrimaryMSRunPath(const StringList& s);
231  void getPrimaryMSRunPath(StringList& toFill) const;
233 
234 protected:
236 
243 
245 
248  std::vector<ProteinHit> protein_hits_;
249  std::vector<ProteinGroup> protein_groups_;
251  std::vector<ProteinGroup> indistinguishable_proteins_;
254  };
255 
256 } //namespace OpenMS
Representation of a protein identification run.
Definition: ProteinIdentification.h:61
String db
The used database.
Definition: ProteinIdentification.h:106
A more convenient string class.
Definition: String.h:57
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:111
String db_version
The database version.
Definition: ProteinIdentification.h:107
double probability
Probability of this group.
Definition: ProteinIdentification.h:74
Definition: ProteinIdentification.h:96
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:114
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
String search_engine_version_
Definition: ProteinIdentification.h:239
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:116
bool operator==(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:806
ProteinHit HitType
Hit type definition.
Definition: ProteinIdentification.h:66
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Search parameters of the DB search.
Definition: ProteinIdentification.h:103
bool operator<(const MultiplexDeltaMasses &dm1, const MultiplexDeltaMasses &dm2)
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:117
String taxonomy
The taxonomy restriction.
Definition: ProteinIdentification.h:108
PeakMassType
Peak mass type.
Definition: ProteinIdentification.h:93
std::vector< ProteinHit > protein_hits_
Definition: ProteinIdentification.h:248
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:118
String protein_score_type_
Definition: ProteinIdentification.h:246
double protein_significance_threshold_
Definition: ProteinIdentification.h:252
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition: ProteinIdentification.h:77
String id_
Definition: ProteinIdentification.h:237
UInt missed_cleavages
The number of allowed missed cleavages.
Definition: ProteinIdentification.h:113
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:55
std::vector< ProteinGroup > protein_groups_
Definition: ProteinIdentification.h:249
String search_engine_
Definition: ProteinIdentification.h:238
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
Representation of a protein hit.
Definition: ProteinHit.h:53
bool higher_score_better_
Definition: ProteinIdentification.h:247
DateTime date_
Definition: ProteinIdentification.h:241
Definition: ProteinIdentification.h:95
bool operator!=(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:824
DateTime Class.
Definition: DateTime.h:54
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:115
std::vector< ProteinGroup > indistinguishable_proteins_
Indistinguishable proteins: accessions[0] is "group leader", probability is meaningless.
Definition: ProteinIdentification.h:251
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:110
SearchParameters search_parameters_
Definition: ProteinIdentification.h:240
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:109
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:71
Representation of a digestion enzyme for proteins (protease)
Definition: DigestionEnzymeProtein.h:48
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:112