OpenMS
Loading...
Searching...
No Matches
IDMergerAlgorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Julianus Pfeuffer $
6// $Authors: Julianus Pfeuffer $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19#include <map>
20#include <unordered_set>
21
22namespace OpenMS
23{
24
25 //TODO add params for checking consistency (i.e. how strict to check)
26 //TODO add another subclass that does score-aware merging? (i.e. only keep best per peptide[sequence])
27
55 class OPENMS_DLLAPI IDMergerAlgorithm:
57 public ProgressLogger
58 {
59 public:
68 explicit IDMergerAlgorithm (const String& runIdentifier = "merged", bool addTimeStampToID = true);
69
84 void insertRuns(std::vector<ProteinIdentification>&& prots,
86
100 void insertRuns(const std::vector<ProteinIdentification>& prots,
101 const PeptideIdentificationList& peps);
102
103 //TODO add methods to just insert prots or just peps. Especially makes sense if you do re-indexing anyway,
104 // then you do not need the proteins. But then we need origin information. Either externally in form of a
105 // String or StringList (like the one from ProteinID.getPrimaryMSRunPath). Or by having the file annotated
106 // at the PeptideID (with getBasename maybe?)
107 // Current solution would be to clear the ProteinIdentification if you do not need the proteins and add all the
108 // necessary information about origin(s) to this ProteinIdentification.
109
127
128 private:
129
139 String getNewIdentifier_(bool addTimeStampToID) const;
140
150
165 const std::vector<ProteinIdentification>& protRuns,
166 const String& experiment_type) const;
167
183 const std::vector<ProteinIdentification>& protRuns,
184 const ProteinIdentification& ref,
185 const String& experiment_type) const;
186
195 std::vector<ProteinIdentification>&& old_protRuns
196 );
197
211 const std::map<String, Size>& runID_to_runIdx,
212 const std::vector<StringList>& originFiles,
213 bool annotate_origin
214 );
215
227 std::vector<ProteinIdentification>&& old_protRuns
228 );
229
232
235
242 static size_t accessionHash_(const ProteinHit& p){
243 return std::hash<String>()(p.getAccession());
244 }
245
253 static bool accessionEqual_(const ProteinHit& p1, const ProteinHit& p2){
254 return p1.getAccession() == p2.getAccession();
255 }
256
258 using hash_type = std::size_t (*)(const ProteinHit&);
259
261 using equal_type = bool (*)(const ProteinHit&, const ProteinHit&);
262
264 std::unordered_set<ProteinHit, hash_type, equal_type> collected_protein_hits_;
265
267 bool filled_ = false;
268
270 std::map<String, Size> file_origin_to_idx_;
271
274
277 };
278} // namespace OpenMS
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Algorithm for merging multiple protein and peptide identification runs.
Definition IDMergerAlgorithm.h:58
ProteinIdentification prot_result_
The resulting merged protein identification.
Definition IDMergerAlgorithm.h:231
bool(*)(const ProteinHit &, const ProteinHit &) equal_type
Type alias for the equality function.
Definition IDMergerAlgorithm.h:261
void insertRuns(std::vector< ProteinIdentification > &&prots, PeptideIdentificationList &&peps)
Insert runs using move semantics.
bool fixed_identifier_
Flag indicating whether the identifier should be fixed (i.e., not contain a timestamp)
Definition IDMergerAlgorithm.h:276
void insertProteinIDs_(std::vector< ProteinIdentification > &&old_protRuns)
Insert protein identifications into the merged result.
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const ProteinIdentification &ref, const String &experiment_type) const
Check consistency of search engines and settings against a reference.
std::size_t(*)(const ProteinHit &) hash_type
Type alias for the hash function.
Definition IDMergerAlgorithm.h:258
static size_t accessionHash_(const ProteinHit &p)
Hash function for protein hits based on accession.
Definition IDMergerAlgorithm.h:242
static bool accessionEqual_(const ProteinHit &p1, const ProteinHit &p2)
Equality function for protein hits based on accession.
Definition IDMergerAlgorithm.h:253
IDMergerAlgorithm(const String &runIdentifier="merged", bool addTimeStampToID=true)
Constructor for the IDMergerAlgorithm.
String getNewIdentifier_(bool addTimeStampToID) const
Generate a new identifier for the merged run.
void insertRuns(const std::vector< ProteinIdentification > &prots, const PeptideIdentificationList &peps)
Insert runs using copy semantics.
std::map< String, Size > file_origin_to_idx_
Mapping to keep track of the mzML origins of spectra.
Definition IDMergerAlgorithm.h:270
void returnResultsAndClear(ProteinIdentification &prots, PeptideIdentificationList &peps)
Return the merged results and reset internal state.
static void copySearchParams_(const ProteinIdentification &from, ProteinIdentification &to)
Copy search parameters between protein identifications.
std::unordered_set< ProteinHit, hash_type, equal_type > collected_protein_hits_
Set of collected protein hits using custom hash and equality functions.
Definition IDMergerAlgorithm.h:264
void movePepIDsAndRefProteinsToResultFaster_(PeptideIdentificationList &&pepIDs, std::vector< ProteinIdentification > &&old_protRuns)
Optimized method to move peptide IDs and reference proteins to result.
String id_
The new identifier string for the merged run.
Definition IDMergerAlgorithm.h:273
void updateAndMovePepIDs_(PeptideIdentificationList &&pepIDs, const std::map< String, Size > &runID_to_runIdx, const std::vector< StringList > &originFiles, bool annotate_origin)
Update peptide ID references and move them to the result.
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const String &experiment_type) const
Check consistency of search engines and settings across runs.
PeptideIdentificationList pep_result_
The resulting merged peptide identifications.
Definition IDMergerAlgorithm.h:234
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Representation of a protein hit.
Definition ProteinHit.h:35
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition ProteinIdentification.h:54
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19