OpenMS
Loading...
Searching...
No Matches
IDMergerAlgorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Julianus Pfeuffer $
6// $Authors: Julianus Pfeuffer $
7// --------------------------------------------------------------------------
8
9#pragma once
10
17
18#include <map>
19#include <unordered_set>
20
21namespace OpenMS
22{
23
24 //TODO add params for checking consistency (i.e. how strict to check)
25 //TODO add another subclass that does score-aware merging? (i.e. only keep best per peptide[sequence])
26
54 class OPENMS_DLLAPI IDMergerAlgorithm:
56 public ProgressLogger
57 {
58 public:
67 explicit IDMergerAlgorithm (const std::string& runIdentifier = "merged", bool addTimeStampToID = true);
68
83 void insertRuns(std::vector<ProteinIdentification>&& prots,
85
99 void insertRuns(const std::vector<ProteinIdentification>& prots,
100 const PeptideIdentificationList& peps);
101
102 //TODO add methods to just insert prots or just peps. Especially makes sense if you do re-indexing anyway,
103 // then you do not need the proteins. But then we need origin information. Either externally in form of a
104 // std::string or StringList (like the one from ProteinID.getPrimaryMSRunPath). Or by having the file annotated
105 // at the PeptideID (with getBasename maybe?)
106 // Current solution would be to clear the ProteinIdentification if you do not need the proteins and add all the
107 // necessary information about origin(s) to this ProteinIdentification.
108
126
127 private:
128
138 std::string getNewIdentifier_(bool addTimeStampToID) const;
139
149
164 const std::vector<ProteinIdentification>& protRuns,
165 const std::string& experiment_type) const;
166
182 const std::vector<ProteinIdentification>& protRuns,
183 const ProteinIdentification& ref,
184 const std::string& experiment_type) const;
185
194 std::vector<ProteinIdentification>&& old_protRuns
195 );
196
210 const std::map<std::string, Size>& runID_to_runIdx,
211 const std::vector<StringList>& originFiles,
212 bool annotate_origin
213 );
214
226 std::vector<ProteinIdentification>&& old_protRuns
227 );
228
231
234
241 static size_t accessionHash_(const ProteinHit& p){
242 return std::hash<std::string>()(p.getAccession());
243 }
244
252 static bool accessionEqual_(const ProteinHit& p1, const ProteinHit& p2){
253 return p1.getAccession() == p2.getAccession();
254 }
255
257 using hash_type = std::size_t (*)(const ProteinHit&);
258
260 using equal_type = bool (*)(const ProteinHit&, const ProteinHit&);
261
263 std::unordered_set<ProteinHit, hash_type, equal_type> collected_protein_hits_;
264
266 bool filled_ = false;
267
269 std::map<std::string, Size> file_origin_to_idx_;
270
272 std::string id_;
273
276 };
277} // namespace OpenMS
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Algorithm for merging multiple protein and peptide identification runs.
Definition IDMergerAlgorithm.h:57
ProteinIdentification prot_result_
The resulting merged protein identification.
Definition IDMergerAlgorithm.h:230
void updateAndMovePepIDs_(PeptideIdentificationList &&pepIDs, const std::map< std::string, Size > &runID_to_runIdx, const std::vector< StringList > &originFiles, bool annotate_origin)
Update peptide ID references and move them to the result.
bool(*)(const ProteinHit &, const ProteinHit &) equal_type
Type alias for the equality function.
Definition IDMergerAlgorithm.h:260
void insertRuns(std::vector< ProteinIdentification > &&prots, PeptideIdentificationList &&peps)
Insert runs using move semantics.
bool fixed_identifier_
Flag indicating whether the identifier should be fixed (i.e., not contain a timestamp)
Definition IDMergerAlgorithm.h:275
void insertProteinIDs_(std::vector< ProteinIdentification > &&old_protRuns)
Insert protein identifications into the merged result.
std::string getNewIdentifier_(bool addTimeStampToID) const
Generate a new identifier for the merged run.
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const std::string &experiment_type) const
Check consistency of search engines and settings across runs.
std::size_t(*)(const ProteinHit &) hash_type
Type alias for the hash function.
Definition IDMergerAlgorithm.h:257
static size_t accessionHash_(const ProteinHit &p)
Hash function for protein hits based on accession.
Definition IDMergerAlgorithm.h:241
bool checkOldRunConsistency_(const std::vector< ProteinIdentification > &protRuns, const ProteinIdentification &ref, const std::string &experiment_type) const
Check consistency of search engines and settings against a reference.
IDMergerAlgorithm(const std::string &runIdentifier="merged", bool addTimeStampToID=true)
Constructor for the IDMergerAlgorithm.
static bool accessionEqual_(const ProteinHit &p1, const ProteinHit &p2)
Equality function for protein hits based on accession.
Definition IDMergerAlgorithm.h:252
void insertRuns(const std::vector< ProteinIdentification > &prots, const PeptideIdentificationList &peps)
Insert runs using copy semantics.
void returnResultsAndClear(ProteinIdentification &prots, PeptideIdentificationList &peps)
Return the merged results and reset internal state.
static void copySearchParams_(const ProteinIdentification &from, ProteinIdentification &to)
Copy search parameters between protein identifications.
std::unordered_set< ProteinHit, hash_type, equal_type > collected_protein_hits_
Set of collected protein hits using custom hash and equality functions.
Definition IDMergerAlgorithm.h:263
std::map< std::string, Size > file_origin_to_idx_
Mapping to keep track of the mzML origins of spectra.
Definition IDMergerAlgorithm.h:269
void movePepIDsAndRefProteinsToResultFaster_(PeptideIdentificationList &&pepIDs, std::vector< ProteinIdentification > &&old_protRuns)
Optimized method to move peptide IDs and reference proteins to result.
std::string id_
The new identifier string for the merged run.
Definition IDMergerAlgorithm.h:272
PeptideIdentificationList pep_result_
The resulting merged peptide identifications.
Definition IDMergerAlgorithm.h:233
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Representation of a protein hit.
Definition ProteinHit.h:34
const std::string & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition ProteinIdentification.h:55
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19