OpenMS
IDMergerAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Julianus Pfeuffer $
6 // $Authors: Julianus Pfeuffer $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
16 
17 #include <unordered_set>
18 
19 namespace OpenMS
20 {
21 
22  //TODO add params for checking consistency (i.e. how strict to check)
23  //TODO add another subclass that does score-aware merging? (i.e. only keep best per peptide[sequence])
24 
35  class OPENMS_DLLAPI IDMergerAlgorithm:
36  public DefaultParamHandler,
37  public ProgressLogger
38  {
39  public:
40  explicit IDMergerAlgorithm (const String& runIdentifier = "merged");
41 
44  void insertRuns(std::vector<ProteinIdentification>&& prots,
45  std::vector<PeptideIdentification>&& peps);
46  void insertRuns(const std::vector<ProteinIdentification>& prots,
47  const std::vector<PeptideIdentification>& peps);
48 
49  //TODO add methods to just insert prots or just peps. Especially makes sense if you do re-indexing anyway,
50  // then you do not need the proteins. But then we need origin information. Either externally in form of a
51  // String or StringList (like the one from ProteinID.getPrimaryMSRunPath). Or by having the file annotated
52  // at the PeptideID (with getBasename maybe?)
53  // Current solution would be to clear the ProteinIdentification if you do not need the proteins and add all the
54  // necessary information about origin(s) to this ProteinIdentification.
55 
58  std::vector<PeptideIdentification>& peps);
59 
60  private:
61 
64 
67 
74  const std::vector<ProteinIdentification>& protRuns,
75  const String& experiment_type) const;
76 
84  const std::vector<ProteinIdentification>& protRuns,
85  const ProteinIdentification& ref,
86  const String& experiment_type) const;
87 
91  std::vector<ProteinIdentification>&& old_protRuns
92  );
93 
98  std::vector<PeptideIdentification>&& pepIDs,
99  const std::map<String, Size>& runID_to_runIdx,
100  const std::vector<StringList>& originFiles,
101  bool annotate_origin
102  );
103 
104 
106  std::vector<PeptideIdentification>&& pepIDs,
107  std::vector<ProteinIdentification>&& old_protRuns
108  );
109 
112 
114  std::vector<PeptideIdentification> pep_result_;
115 
116  static size_t accessionHash_(const ProteinHit& p){
117  return std::hash<String>()(p.getAccession());
118  }
119  static bool accessionEqual_(const ProteinHit& p1, const ProteinHit& p2){
120  return p1.getAccession() == p2.getAccession();
121  }
122  using hash_type = std::size_t (*)(const ProteinHit&);
123  using equal_type = bool (*)(const ProteinHit&, const ProteinHit&);
124  std::unordered_set<ProteinHit, hash_type, equal_type> collected_protein_hits_;
125 
127  bool filled_ = false;
128 
130  std::map<String, Size> file_origin_to_idx_;
131 
134  };
135 } // namespace OpenMS
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
Creates a new Protein ID run into which other runs can be inserted. Creates union of protein hits but...
Definition: IDMergerAlgorithm.h:38
ProteinIdentification prot_result_
the resulting new Protein IDs
Definition: IDMergerAlgorithm.h:111
void movePepIDsAndRefProteinsToResultFaster_(std::vector< PeptideIdentification > &&pepIDs, std::vector< ProteinIdentification > &&old_protRuns)
bool(*)(const ProteinHit &, const ProteinHit &) equal_type
Definition: IDMergerAlgorithm.h:123
void insertProteinIDs_(std::vector< ProteinIdentification > &&old_protRuns)
void returnResultsAndClear(ProteinIdentification &prots, std::vector< PeptideIdentification > &peps)
Return the merged results and reset/clear all internal data.
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const ProteinIdentification &ref, const String &experiment_type) const
void insertRuns(const std::vector< ProteinIdentification > &prots, const std::vector< PeptideIdentification > &peps)
std::size_t(*)(const ProteinHit &) hash_type
Definition: IDMergerAlgorithm.h:122
static size_t accessionHash_(const ProteinHit &p)
Definition: IDMergerAlgorithm.h:116
std::vector< PeptideIdentification > pep_result_
the resulting new Peptide IDs
Definition: IDMergerAlgorithm.h:114
void insertRuns(std::vector< ProteinIdentification > &&prots, std::vector< PeptideIdentification > &&peps)
static bool accessionEqual_(const ProteinHit &p1, const ProteinHit &p2)
Definition: IDMergerAlgorithm.h:119
std::map< String, Size > file_origin_to_idx_
to keep track of the mzML origins of spectra
Definition: IDMergerAlgorithm.h:130
static void copySearchParams_(const ProteinIdentification &from, ProteinIdentification &to)
Copies over search parameters.
std::unordered_set< ProteinHit, hash_type, equal_type > collected_protein_hits_
Definition: IDMergerAlgorithm.h:124
String getNewIdentifier_() const
Returns the new identifier. The initial identifier plus a timestamp.
void updateAndMovePepIDs_(std::vector< PeptideIdentification > &&pepIDs, const std::map< String, Size > &runID_to_runIdx, const std::vector< StringList > &originFiles, bool annotate_origin)
String id_
the new identifier string
Definition: IDMergerAlgorithm.h:133
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const String &experiment_type) const
IDMergerAlgorithm(const String &runIdentifier="merged")
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19