OpenMS  2.4.0
ProteinResolver.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 namespace OpenMS
45 {
56  class OPENMS_DLLAPI ProteinResolver :
57  public DefaultParamHandler
58  {
59 
60 public:
61 
62  //default constructor
64 
65  //copy constructor
66  ProteinResolver(const ProteinResolver & rhs);
67 
68  //assignment operator
69  ProteinResolver & operator=(const ProteinResolver & rhs);
70 
71  //destructor
72  ~ProteinResolver() override;
73 
74 
75  struct ProteinEntry;
76  struct PeptideEntry;
77  struct ISDGroup;
78  struct MSDGroup;
79  struct ResolverResult;
80 
82  struct ProteinEntry
83  {
84  std::list<PeptideEntry *> peptides;
85  bool traversed;
87  enum type {primary, secondary, primary_indistinguishable, secondary_indistinguishable} protein_type;
88  double weight; //monoisotopic
89  float coverage; //in percent
90  //if Protein is indistinguishable all his fellows are in the list indis
91  std::list<ProteinEntry *> indis;
93  Size msd_group; //index
94  Size isd_group; //index
96  };
97 
99  struct PeptideEntry
100  {
101  std::list<ProteinEntry *> proteins;
102  bool traversed;
107  Size msd_group; //index
108  Size isd_group; //index
110  float intensity;
112  };
113 
115  struct MSDGroup
116  {
117  std::list<ProteinEntry *> proteins;
118  std::list<PeptideEntry *> peptides;
124  float intensity; // intensity of the MSD Group. Defined as the median of the peptide intensities.
125  };
126 
127  struct ISDGroup
128  {
129  std::list<ProteinEntry *> proteins;
130  std::list<PeptideEntry *> peptides;
132  std::list<Size> msd_groups;
133  };
134 
136  {
138  std::vector<ISDGroup> * isds;
139  std::vector<MSDGroup> * msds;
140  std::vector<ProteinEntry> * protein_entries;
141  std::vector<PeptideEntry> * peptide_entries;
142  std::vector<Size> * reindexed_peptides;
143  std::vector<Size> * reindexed_proteins;
144  enum type {PeptideIdent, Consensus} input_type;
145  std::vector<PeptideIdentification> * peptide_identification;
147  };
148 
156  void resolveConsensus(ConsensusMap & consensus);
157 
165  void resolveID(std::vector<PeptideIdentification> & peptide_identifications);
166 
177  // void writeProteinsAndPeptidesmzTab(std::vector<ProteinEntry>& protein_nodes, std::vector<PeptideEntry>& peptide_nodes, std::vector<Size>& reindexed_proteins, std::vector<Size>& reindexed_peptides, std::vector<PeptideIdentification>& peptide_identifications, String& output );
186  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, std::vector<PeptideIdentification> & identifications, String & output_file); // not implemented
195  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, ConsensusMap & consensus, String & output_file); // not implemented
203  // void writeProteinTable(std::vector<ProteinEntry> & proteins, std::vector<Size> & reindexed_proteins, String & output_file); // not implemented
211  // void writeProteinGroups(std::vector<ISDGroup> & isd_groups, std::vector<MSDGroup> & msd_groups, String & output_file); // not implemented
212 
219  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, ConsensusMap & consensus);
220 
227  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, std::vector<PeptideIdentification> & peptide_nodes);
228 
229  void clearResult();
230 
231  void setProteinData(std::vector<FASTAFile::FASTAEntry> & protein_data);
232 
233  const std::vector<ResolverResult> & getResults();
234 
236  static const PeptideIdentification & getPeptideIdentification(const ConsensusMap & consensus, const PeptideEntry * peptide);
237  static const PeptideHit & getPeptideHit(const ConsensusMap & consensus, const PeptideEntry * peptide);
238  static const PeptideIdentification & getPeptideIdentification(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
239  static const PeptideHit & getPeptideHit(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
240 
241 private:
242 
243  std::vector<ResolverResult> resolver_result_;
244  std::vector<FASTAFile::FASTAEntry> protein_data_;
245 
246  void computeIntensityOfMSD_(std::vector<MSDGroup> & msd_groups);
247 
249  void traverseProtein_(ProteinEntry * prot_node, MSDGroup & group);
250  void traversePeptide_(PeptideEntry * pep_node, MSDGroup & group);
252  Size findPeptideEntry_(String seq, std::vector<PeptideEntry> & nodes);
254  Size binarySearchNodes_(String & seq, std::vector<PeptideEntry> & nodes, Size start, Size end);
256  Size includeMSMSPeptides_(std::vector<PeptideIdentification> & peptide_identifications, std::vector<PeptideEntry> & peptide_nodes);
259  Size includeMSMSPeptides_(ConsensusMap & consensus, std::vector<PeptideEntry> & peptide_nodes);
261  void reindexingNodes_(std::vector<MSDGroup> & msd_groups, std::vector<Size> & reindexed_proteins, std::vector<Size> & reindexed_peptides);
263  void primaryProteins_(std::vector<PeptideEntry> & peptide_nodes, std::vector<Size> & reindexed_peptides);
264  void buildingMSDGroups_(std::vector<MSDGroup> & msd_groups, std::vector<ISDGroup> & isd_groups);
265  void buildingISDGroups_(std::vector<ProteinEntry> & protein_nodes, std::vector<PeptideEntry> & peptide_nodes,
266  std::vector<ISDGroup> & isd_groups);
267  // disabled/buggy
268  //ProteinResolver::indistinguishableProteins(vector<MSDGroup>& msd_groups);
269 
270  }; // class
271 
272 } // namespace
273 
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:129
Definition: ProteinResolver.h:127
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:117
Size index
Definition: ProteinResolver.h:106
std::vector< Size > * reindexed_proteins
Definition: ProteinResolver.h:143
Size msd_group
Definition: ProteinResolver.h:107
A more convenient string class.
Definition: String.h:57
ISDGroup * isd_group
Definition: ProteinResolver.h:120
Size index
Definition: ProteinResolver.h:131
std::vector< PeptideIdentification > * peptide_identification
Definition: ProteinResolver.h:145
Size peptide_hit
Definition: ProteinResolver.h:105
std::vector< PeptideEntry > * peptide_entries
Definition: ProteinResolver.h:141
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:84
Size isd_group
Definition: ProteinResolver.h:94
String identifier
Definition: ProteinResolver.h:137
representation of an msd group. Contains peptides, proteins and a pointer to its ISD group ...
Definition: ProteinResolver.h:115
std::vector< FASTAFile::FASTAEntry > protein_data_
Definition: ProteinResolver.h:244
Size number_of_decoy
Definition: ProteinResolver.h:121
Size index
Definition: ProteinResolver.h:92
A container for consensus elements.
Definition: ConsensusMap.h:75
Size peptide_identification
Definition: ProteinResolver.h:104
float coverage
Definition: ProteinResolver.h:89
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
std::vector< Size > * reindexed_peptides
Definition: ProteinResolver.h:142
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:118
std::list< ProteinEntry * > indis
Definition: ProteinResolver.h:91
float intensity
Definition: ProteinResolver.h:110
Size index
Definition: ProteinResolver.h:119
Size number_of_target_plus_decoy
Definition: ProteinResolver.h:123
bool traversed
Definition: ProteinResolver.h:102
Helper class for peptide and protein quantification based on feature data annotated with IDs...
Definition: ProteinResolver.h:56
Size isd_group
Definition: ProteinResolver.h:108
Size number_of_experimental_peptides
Definition: ProteinResolver.h:95
std::vector< ProteinEntry > * protein_entries
Definition: ProteinResolver.h:140
type
Definition: ProteinResolver.h:144
String origin
Definition: ProteinResolver.h:111
Size msd_group
Definition: ProteinResolver.h:93
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:130
std::list< Size > msd_groups
Definition: ProteinResolver.h:132
Representation of a peptide hit.
Definition: PeptideHit.h:54
represents a protein from FASTA file
Definition: ProteinResolver.h:82
bool experimental
Definition: ProteinResolver.h:109
FASTAFile::FASTAEntry * fasta_entry
Definition: ProteinResolver.h:86
Definition: ProteinResolver.h:135
ConsensusMap * consensus_map
Definition: ProteinResolver.h:146
String sequence
Definition: ProteinResolver.h:103
Size number_of_target
Definition: ProteinResolver.h:122
std::vector< ResolverResult > resolver_result_
Definition: ProteinResolver.h:243
std::vector< MSDGroup > * msds
Definition: ProteinResolver.h:139
bool traversed
Definition: ProteinResolver.h:85
std::vector< ISDGroup > * isds
Definition: ProteinResolver.h:138
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
represents a peptide. First in silico. If experimental is set to true it is MS/MS derived...
Definition: ProteinResolver.h:99
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:101
type
Definition: ProteinResolver.h:87
float intensity
Definition: ProteinResolver.h:124
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
double weight
Definition: ProteinResolver.h:88