|
OpenMS
2.6.0
|
Go to the documentation of this file.
152 static const std::array<std::string, (
Size)Unmatched::SIZE_OF_UNMATCHED> names_of_unmatched;
159 SIZE_OF_MISSING_DECOY
161 static const std::array<std::string, (
Size)MissingDecoy::SIZE_OF_MISSING_DECOY> names_of_missing_decoy;
171 inline ExitCodes run(std::vector<FASTAFile::FASTAEntry>& proteins, std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids)
174 return run<TFI_Vector>(protein_container, prot_ids, pep_ids);
216 if (decoy_string_.empty())
224 OPENMS_LOG_WARN <<
"Unable to determine decoy string automatically (not enough decoys were detected)! Using default " << (r.is_prefix ?
"prefix" :
"suffix") <<
" decoy string '" << r.name <<
"'\n"
225 <<
"If you think that this is incorrect, please provide a decoy_string and its position manually!" << std::endl;
227 prefix_ = r.is_prefix;
228 decoy_string_ = r.name;
230 OPENMS_LOG_INFO <<
"Using " << (prefix_ ?
"prefix" :
"suffix") <<
" decoy string '" << decoy_string_ <<
"'" << std::endl;
237 if (!enzyme_name_.empty() && (enzyme_name_.compare(AUTO_MODE) != 0))
241 else if (!prot_ids.empty() && prot_ids[0].getSearchParameters().digestion_enzyme.getName() !=
"unknown_enzyme")
243 OPENMS_LOG_INFO <<
"Info: using '" << prot_ids[0].getSearchParameters().digestion_enzyme.getName() <<
"' as enzyme (obtained from idXML) for digestion." << std::endl;
244 enzyme.
setEnzyme(&prot_ids[0].getSearchParameters().digestion_enzyme);
248 OPENMS_LOG_WARN <<
"Warning: Enzyme name neither given nor deduceable from input. Defaulting to Trypsin!" << std::endl;
252 bool xtandem_fix_parameters =
true;
253 bool msgfplus_fix_parameters =
true;
256 for (
const auto& prot_id : prot_ids)
258 String search_engine = prot_id.getOriginalSearchEngineName();
260 OPENMS_LOG_INFO <<
"Peptide identification engine: " << search_engine << std::endl;
261 if (search_engine !=
"XTANDEM") { xtandem_fix_parameters =
false; }
262 if (!(search_engine ==
"MSGFPLUS" || search_engine ==
"MS-GF+")) { msgfplus_fix_parameters =
false; }
266 if (msgfplus_fix_parameters && enzyme.
getEnzymeName() ==
"Trypsin")
268 OPENMS_LOG_WARN <<
"MSGFPlus detected but enzyme cutting rules were set to Trypsin. Correcting to Trypsin/P to copy with special cutting rule in MSGFPlus." << std::endl;
274 if (!enzyme_specificity_.empty() && (enzyme_specificity_.compare(AUTO_MODE) != 0))
280 enzyme.
setSpecificity(prot_ids[0].getSearchParameters().enzyme_term_specificity);
285 OPENMS_LOG_WARN <<
"Warning: Enzyme specificity neither given nor present in the input file. Defaulting to 'full'!" << std::endl;
293 const size_t PROTEIN_CACHE_SIZE = 4e5;
295 this->startProgress(0, 1,
"Load first DB chunk");
296 proteins.cacheChunk(PROTEIN_CACHE_SIZE);
299 if (proteins.empty())
301 OPENMS_LOG_ERROR <<
"Error: An empty database was provided. Mapping makes no sense. Aborting..." << std::endl;
302 return DATABASE_EMPTY;
307 OPENMS_LOG_WARN <<
"Warning: An empty set of peptide identifications was provided. Output will be empty as well." << std::endl;
308 if (!keep_unreferenced_proteins_)
311 for (std::vector<ProteinIdentification>::iterator it = prot_ids.begin();
312 it != prot_ids.end(); ++it)
314 it->getHits().clear();
317 return PEPTIDE_IDS_EMPTY;
322 std::vector<bool> protein_is_decoy;
323 std::vector<std::string> protein_accessions;
325 bool invalid_protein_sequence =
false;
332 bool has_illegal_AAs(
false);
334 for (std::vector<PeptideIdentification>::const_iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
337 const std::vector<PeptideHit>& hits = it1->getHits();
338 for (std::vector<PeptideHit>::const_iterator it2 = hits.begin(); it2 != hits.end(); ++it2)
344 String seq = it2->getSequence().toUnmodifiedString().
remove(
'*');
347 OPENMS_LOG_ERROR <<
"Peptide sequence '" << it2->getSequence() <<
"' contains one or more ambiguous amino acids (B|J|Z|X).\n";
348 has_illegal_AAs =
true;
354 appendValue(pep_DB, seq.c_str());
359 OPENMS_LOG_ERROR <<
"One or more peptides contained illegal amino acids. This is not allowed!"
360 <<
"\nPlease either remove the peptide or replace it with one of the unambiguous ones (while allowing for ambiguous AA's to match the protein)." << std::endl;;
363 OPENMS_LOG_INFO <<
"Mapping " << length(pep_DB) <<
" peptides to " << (proteins.size() == PROTEIN_CACHE_SIZE ?
"? (unknown number of)" :
String(proteins.size())) <<
" proteins." << std::endl;
365 if (length(pep_DB) == 0)
367 OPENMS_LOG_WARN <<
"Warning: Peptide identifications have no hits inside! Output will be empty as well." << std::endl;
368 return PEPTIDE_IDS_EMPTY;
374 OPENMS_LOG_INFO <<
"Searching with up to " << aaa_max_ <<
" ambiguous amino acid(s) and " << mm_max_ <<
" mismatch(es)!" << std::endl;
385 uint16_t count_j_proteins(0);
386 bool has_active_data =
true;
387 const std::string jumpX(aaa_max_ + mm_max_ + 1,
'X');
389 this->startProgress(0, proteins.size() == PROTEIN_CACHE_SIZE ? std::numeric_limits<SignedSize>::max() : proteins.size(),
"Aho-Corasick");
390 std::atomic<int> progress_prots(0);
402 #pragma omp barrier // all threads need to be here, since we are about to swap protein data
405 DEBUG_ONLY std::cerr <<
" activating cache ...\n";
406 has_active_data = proteins.activateCache();
407 protein_accessions.resize(proteins.getChunkOffset() + proteins.chunkSize());
410 if (!has_active_data)
break;
415 DEBUG_ONLY std::cerr <<
"Filling Protein Cache ...";
416 proteins.cacheChunk(PROTEIN_CACHE_SIZE);
417 protein_is_decoy.resize(proteins.getChunkOffset() + prot_count);
420 const String& seq = proteins.chunkAt(i).identifier;
421 protein_is_decoy[i + proteins.getChunkOffset()] = (prefix_ ? seq.
hasPrefix(decoy_string_) : seq.
hasSuffix(decoy_string_));
425 DEBUG_ONLY std::cerr <<
" starting for loop \n";
427 #pragma omp for schedule(dynamic, 100) nowait
431 if (omp_get_thread_num() == 0)
433 this->setProgress(progress_prots);
436 prot = proteins.chunkAt(i).sequence;
440 if (prot.
has(
'[') || prot.
has(
'('))
442 invalid_protein_sequence =
true;
461 Size prot_idx = i + proteins.getChunkOffset();
470 size_t offset = -1, start = 0;
471 while ((offset = prot.find(jumpX, offset + 1)) != std::string::npos)
474 addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start, offset + jumpX.size() - start), prot, prot_idx, (
int)start, func_threads);
476 while (offset + jumpX.size() < prot.size() && prot[offset + jumpX.size()] ==
'X') ++offset;
481 if (start < prot.size())
483 addHits_(fuzzyAC, pattern, pep_DB, prot.
substr(start), prot, prot_idx, (
int)start, func_threads);
488 addHits_(fuzzyAC, pattern, pep_DB, prot, prot, prot_idx, 0, func_threads);
493 protein_accessions[prot_idx] = proteins.chunkAt(i).identifier;
494 acc_to_prot_thread[protein_accessions[prot_idx]] = prot_idx;
501 #pragma omp critical(PeptideIndexer_joinAC)
506 func.
merge(func_threads);
508 acc_to_prot.insert(acc_to_prot_thread.begin(), acc_to_prot_thread.end());
509 acc_to_prot_thread.clear();
515 std::cout <<
"Merge took: " << s.
toString() <<
"\n";
517 std::cout << mu.
delta(
"Aho-Corasick") <<
"\n\n";
523 <<
" ... rejected by enzyme filter: " << func.
filter_rejected << std::endl;
525 if (count_j_proteins)
527 OPENMS_LOG_WARN <<
"PeptideIndexer found " << count_j_proteins <<
" protein sequences in your database containing the amino acid 'J'."
528 <<
"To match 'J' in a protein, an ambiguous amino acid placeholder for I/L will be used.\n"
529 <<
"This costs runtime and eats into the 'aaa_max' limit, leaving less opportunity for B/Z/X matches.\n"
530 <<
"If you want 'J' to be treated as unambiguous, enable '-IL_equivalent'!" << std::endl;
540 for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
542 runid_to_runidx[prot_ids[run_idx].getIdentifier()] = run_idx;
546 Size stats_matched_unique(0);
547 Size stats_matched_multi(0);
548 Size stats_unmatched(0);
549 Size stats_count_m_t(0);
550 Size stats_count_m_d(0);
551 Size stats_count_m_td(0);
556 for (std::vector<PeptideIdentification>::iterator it1 = pep_ids.begin(); it1 != pep_ids.end(); ++it1)
559 Size run_idx = runid_to_runidx[it1->getIdentifier()];
561 std::vector<PeptideHit>& hits = it1->getHits();
563 for (std::vector<PeptideHit>::iterator it_hit = hits.begin(); it_hit != hits.end(); )
566 it_hit->setPeptideEvidences(std::vector<PeptideEvidence>());
571 bool matches_target(
false);
572 bool matches_decoy(
false);
574 std::set<Size> prot_indices;
576 for (std::set<PeptideProteinMatchInformation>::const_iterator it_i = func.
pep_to_prot[pep_idx].begin();
579 prot_indices.insert(it_i->protein_index);
580 const String& accession = protein_accessions[it_i->protein_index];
581 PeptideEvidence pe(accession, it_i->position, it_i->position + (
int)it_hit->getSequence().size() - 1, it_i->AABefore, it_i->AAAfter);
582 it_hit->addPeptideEvidence(pe);
584 runidx_to_protidx[run_idx].insert(it_i->protein_index);
586 if (protein_is_decoy[it_i->protein_index])
588 matches_decoy =
true;
592 matches_target =
true;
597 if (matches_decoy && matches_target)
599 it_hit->setMetaValue(
"target_decoy",
"target+decoy");
602 else if (matches_target)
604 it_hit->setMetaValue(
"target_decoy",
"target");
607 else if (matches_decoy)
609 it_hit->setMetaValue(
"target_decoy",
"decoy");
614 if (prot_indices.size() == 1)
616 it_hit->setMetaValue(
"protein_references",
"unique");
617 ++stats_matched_unique;
619 else if (prot_indices.size() > 1)
621 it_hit->setMetaValue(
"protein_references",
"non-unique");
622 ++stats_matched_multi;
627 if (stats_unmatched < 15)
OPENMS_LOG_INFO <<
"Unmatched peptide: " << it_hit->getSequence() <<
"\n";
628 else if (stats_unmatched == 15)
OPENMS_LOG_INFO <<
"Unmatched peptide: ...\n";
629 if (unmatched_action_ == Unmatched::REMOVE)
631 it_hit = hits.erase(it_hit);
636 it_hit->setMetaValue(
"protein_references",
"unmatched");
645 Size total_peptides = stats_count_m_t + stats_count_m_d + stats_count_m_td + stats_unmatched;
649 OPENMS_LOG_INFO <<
" unmatched : " << stats_unmatched <<
" (" << stats_unmatched * 100 / total_peptides <<
" %)\n";
651 OPENMS_LOG_INFO <<
" match to target DB only: " << stats_count_m_t <<
" (" << stats_count_m_t * 100 / total_peptides <<
" %)\n";
652 OPENMS_LOG_INFO <<
" match to decoy DB only : " << stats_count_m_d <<
" (" << stats_count_m_d * 100 / total_peptides <<
" %)\n";
653 OPENMS_LOG_INFO <<
" match to both : " << stats_count_m_td <<
" (" << stats_count_m_td * 100 / total_peptides <<
" %)\n";
656 OPENMS_LOG_INFO <<
" no match (to 0 protein) : " << stats_unmatched <<
"\n";
657 OPENMS_LOG_INFO <<
" unique match (to 1 protein) : " << stats_matched_unique <<
"\n";
658 OPENMS_LOG_INFO <<
" non-unique match (to >1 protein): " << stats_matched_multi << std::endl;
661 Size stats_matched_proteins(0), stats_matched_new_proteins(0), stats_orphaned_proteins(0), stats_proteins_target(0), stats_proteins_decoy(0);
664 for (
Size run_idx = 0; run_idx < prot_ids.size(); ++run_idx)
666 std::set<Size> masterset = runidx_to_protidx[run_idx];
668 std::vector<ProteinHit>& phits = prot_ids[run_idx].getHits();
671 std::vector<ProteinHit> orphaned_hits;
672 for (std::vector<ProteinHit>::iterator p_hit = phits.begin(); p_hit != phits.end(); ++p_hit)
674 const String& acc = p_hit->getAccession();
675 if (!acc_to_prot.
has(acc))
677 ++stats_orphaned_proteins;
678 if (keep_unreferenced_proteins_)
680 p_hit->setMetaValue(
"target_decoy",
"");
681 orphaned_hits.push_back(*p_hit);
686 phits = orphaned_hits;
691 phits.reserve(phits.size() + masterset.size());
692 for (std::set<Size>::const_iterator it = masterset.begin(); it != masterset.end(); ++it)
697 if (write_protein_sequence_ || write_protein_description_)
699 proteins.readAt(fe, *it);
700 if (write_protein_sequence_)
704 if (write_protein_description_)
709 if (protein_is_decoy[*it])
712 ++stats_proteins_decoy;
717 ++stats_proteins_target;
719 phits.push_back(hit);
720 ++stats_matched_new_proteins;
722 stats_matched_proteins += phits.size();
729 OPENMS_LOG_INFO <<
" total proteins searched: " << proteins.size() <<
"\n";
730 OPENMS_LOG_INFO <<
" matched proteins : " << stats_matched_proteins <<
" (" << stats_matched_new_proteins <<
" new)\n";
731 if (stats_matched_proteins)
733 OPENMS_LOG_INFO <<
" matched target proteins: " << stats_proteins_target <<
" (" << stats_proteins_target * 100 / stats_matched_proteins <<
" %)\n";
734 OPENMS_LOG_INFO <<
" matched decoy proteins : " << stats_proteins_decoy <<
" (" << stats_proteins_decoy * 100 / stats_matched_proteins <<
" %)\n";
736 OPENMS_LOG_INFO <<
" orphaned proteins : " << stats_orphaned_proteins << (keep_unreferenced_proteins_ ?
" (all kept)" :
" (all removed)\n");
741 bool has_error =
false;
743 if (invalid_protein_sequence)
745 OPENMS_LOG_ERROR <<
"Error: One or more protein sequences contained the characters '[' or '(', which are illegal in protein sequences."
746 <<
"\nPeptide hits might be masked by these characters (which usually indicate presence of modifications).\n";
750 if ((stats_count_m_d + stats_count_m_td) == 0)
752 String msg(
"No peptides were matched to the decoy portion of the database! Did you provide the correct concatenated database? Are your 'decoy_string' (=" +
String(decoy_string_) +
") and 'decoy_string_position' (=" +
String(param_.getValue(
"decoy_string_position")) +
") settings correct?");
753 if (missing_decoy_action_ == MissingDecoy::IS_ERROR)
755 OPENMS_LOG_ERROR <<
"Error: " << msg <<
"\nSet 'missing_decoy_action' to 'warn' if you are sure this is ok!\nAborting ..." << std::endl;
758 else if (missing_decoy_action_ == MissingDecoy::WARN)
760 OPENMS_LOG_WARN <<
"Warn: " << msg <<
"\nSet 'missing_decoy_action' to 'error' if you want to elevate this to an error!" << std::endl;
767 if (stats_unmatched > 0)
769 OPENMS_LOG_ERROR <<
"PeptideIndexer found unmatched peptides, which could not be associated to a protein.\n";
770 if (unmatched_action_ == Unmatched::IS_ERROR)
773 <<
"Potential solutions:\n"
774 <<
" - check your FASTA database is identical to the search DB (or use 'auto')\n"
775 <<
" - set 'enzyme:specificity' and 'enzyme:name' to 'auto' to match the parameters of the search engine\n"
776 <<
" - increase 'aaa_max' to allow more ambiguous amino acids\n"
777 <<
" - as a last resort: use the 'unmatched_action' option to accept or even remove unmatched peptides\n"
778 <<
" (note that unmatched peptides cannot be used for FDR calculation or quantification)\n";
781 else if (unmatched_action_ == Unmatched::WARN)
783 OPENMS_LOG_ERROR <<
" Warning: " << stats_unmatched <<
" unmatched hits have been found, but were not removed!\n"
784 <<
"These are not annotated with target/decoy information and might lead to issues with downstream tools (such as FDR).\n"
785 <<
"Switch to '" << names_of_unmatched[(
Size)Unmatched::REMOVE] <<
"' if you want to avoid these problems.\n";
787 else if (unmatched_action_ == Unmatched::REMOVE)
789 OPENMS_LOG_ERROR <<
" Warning: " << stats_unmatched <<
" unmatched hits have been removed!\n"
790 <<
"Make sure that these hits are actually a violation of the cutting rules by inspecting the database!\n";
791 if (xtandem_fix_parameters)
OPENMS_LOG_ERROR <<
"Since the results are from X!Tandem, this is probably ok (check anyways).\n";
802 OPENMS_LOG_ERROR <<
"Result files will be written, but PeptideIndexer will exit with an error code." << std::endl;
803 return UNEXPECTED_RESULT;
808 const String& getDecoyString()
const;
810 bool isPrefix()
const;
821 const std::tuple<const Size&, const Int&, const char&, const char&>
tie()
const
823 return std::tie(protein_index,
position, AABefore, AAAfter);
827 return tie() < other.
tie();
831 return tie() == other.
tie();
838 typedef std::map<OpenMS::Size, std::set<PeptideProteinMatchInformation> >
MapType;
849 pep_to_prot(), filter_passed(0), filter_rejected(0), enzyme_(enzyme), xtandem_(xtandem)
855 if (pep_to_prot.empty())
861 for (FoundProteinFunctor::MapType::const_iterator it = other.
pep_to_prot.begin(); it != other.
pep_to_prot.end(); ++it)
863 this->pep_to_prot[it->first].insert(other.
pep_to_prot[it->first].begin(), other.
pep_to_prot[it->first].end());
888 (
position + len_pep >= seq_prot.size()) ?
892 pep_to_prot[idx_pep].insert(match);
910 const seqan::Peptide& tmp_pep = pep_DB[fuzzyAC.
getHitDBIndex()];
915 void updateMembers_()
override;
918 bool prefix_{
false };
923 bool write_protein_sequence_{
false };
924 bool write_protein_description_{
false };
925 bool keep_unreferenced_proteins_{
false };
927 bool IL_equivalent_{
false };
MissingDecoy
Definition: PeptideIndexing.h:154
Base class for TOPP applications.
Definition: TOPPBase.h:144
static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY]
Names of the Specificity.
Definition: EnzymaticDigestion.h:77
void addHits_(AhoCorasickAmbiguous &fuzzyAC, const AhoCorasickAmbiguous::FuzzyACPattern &pattern, const AhoCorasickAmbiguous::PeptideDB &pep_DB, const String &prot, const String &full_prot, SignedSize idx_prot, Int offset, FoundProteinFunctor &func_threads) const
Definition: PeptideIndexing.h:905
Definition: PeptideIndexing.h:139
A convenience class to report either absolute or delta (between two timepoints) RAM usage.
Definition: SysInfo.h:83
void setDescription(const String &description)
sets the description of the protein
Definition: EnzymaticDigestion.h:71
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
void setSequence(const String &sequence)
sets the protein sequence
void setEnzyme(const String &name)
Sets the enzyme for the digestion (by name)
OpenMS::Size filter_passed
Definition: PeptideIndexing.h:840
bool findNext(const FuzzyACPattern &pattern)
Enumerate hits.
Definition: AhoCorasickAmbiguous.h:1037
void setProtein(const String &protein_sequence)
Reset to new protein sequence. All previous data is forgotten.
Definition: AhoCorasickAmbiguous.h:1024
ProteaseDigestion enzyme_
Definition: PeptideIndexing.h:844
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
bool has(const Key &key) const
Test whether the map contains the given key.
Definition: Map.h:108
String sequence
Definition: FASTAFile.h:80
A more convenient string class.
Definition: String.h:59
void addHit(const OpenMS::Size idx_pep, const OpenMS::Size idx_prot, const OpenMS::Size len_pep, const OpenMS::String &seq_prot, OpenMS::Int position)
Definition: PeptideIndexing.h:874
Unmatched
Action to take when peptide hits could not be matched.
Definition: PeptideIndexing.h:145
String description
Definition: FASTAFile.h:79
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Representation of a protein hit.
Definition: ProteinHit.h:58
Int getHitProteinPosition()
Offset into protein sequence where hit was found.
Definition: AhoCorasickAmbiguous.h:1057
String< AAcid, Alloc< void > > AAString
Definition: AhoCorasickAmbiguous.h:206
Extended Aho-Corasick algorithm capable of matching ambiguous amino acids in the pattern (i....
Definition: AhoCorasickAmbiguous.h:970
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
ExitCodes run(std::vector< FASTAFile::FASTAEntry > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
forward for old interface and pyOpenMS; use run<T>() for more control
Definition: PeptideIndexing.h:171
String delta(const String &event="delta")
#define DEBUG_ONLY
Definition: AhoCorasickAmbiguous.h:46
static void initPattern(const PeptideDB &pep_db, const int aaa_max, const int mm_max, FuzzyACPattern &pattern)
Construct a trie from a set of peptide sequences (which are to be found in a protein).
Definition: AhoCorasickAmbiguous.h:991
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
static Specificity getSpecificityByName(const String &name)
OpenMS::Size filter_rejected
Definition: PeptideIndexing.h:841
FASTA entry type (identifier, description and sequence)
Definition: FASTAFile.h:76
Definition: PeptideIndexing.h:141
bool hasPrefix(const String &string) const
true if String begins with string, false otherwise
MapType pep_to_prot
Definition: PeptideIndexing.h:839
fully enzyme specific, e.g., tryptic (ends with KR, AA-before is KR), or peptide is at protein termin...
Definition: EnzymaticDigestion.h:70
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
void setAccession(const String &accession)
sets the accession of the protein
static const char N_TERMINAL_AA
Definition: PeptideEvidence.h:60
ExitCodes run(FASTAContainer< T > &proteins, std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids)
Re-index peptide identifications honoring enzyme cutting rules, ambiguous amino acids and target/deco...
Definition: PeptideIndexing.h:213
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
bool toBool() const
Conversion to bool.
Size getHitDBIndex()
Get index of hit into peptide database of the pattern.
Definition: AhoCorasickAmbiguous.h:1047
void merge(FoundProteinFunctor &other)
Definition: PeptideIndexing.h:853
FoundProteinFunctor(const ProteaseDigestion &enzyme, bool xtandem)
Definition: PeptideIndexing.h:848
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
FASTAContainer<TFI_Vector> simply takes an existing vector of FASTAEntries and provides the same inte...
Definition: FASTAContainer.h:243
double getClockTime() const
void reset()
Clear the stop watch but keep running.
bool has(Byte byte) const
true if String contains the byte, false otherwise
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
void start()
Start the stop watch.
::seqan::Pattern< PeptideDB, ::seqan::FuzzyAC > FuzzyACPattern
Definition: AhoCorasickAmbiguous.h:974
void stop()
Stop the stop watch (can be resumed later). If the stop watch was not running an exception is thrown.
void setParameters(const Param ¶m)
Sets the parameters.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
const Param & getParameters() const
Non-mutable access to the parameters.
Not implemented exception.
Definition: Exception.h:436
::seqan::StringSet<::seqan::AAString > PeptideDB
Definition: AhoCorasickAmbiguous.h:973
String toString() const
get a compact representation of the current time status.
ExitCodes
Exit codes.
Definition: PeptideIndexing.h:135
void after()
record data for the second timepoint
This class is used to determine the current process' CPU (user and/or kernel) and wall time.
Definition: StopWatch.h:65
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
std::map< OpenMS::Size, std::set< PeptideProteinMatchInformation > > MapType
Definition: PeptideIndexing.h:838
bool update(const Param &p_outdated, const bool add_unknown=false)
Rescue parameter values from p_outdated to current param.
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
String getEnzymeName() const
Returns the enzyme for the digestion.
static char const *const AUTO_MODE
name of enzyme/specificity which signals that the enzyme/specificity should be taken from meta inform...
Definition: PeptideIndexing.h:132
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
Logger::LogStream OpenMS_Log_debug
Global static instance of a LogStream to capture messages classified as debug output....
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
Class for the enzymatic digestion of proteins.
Definition: ProteaseDigestion.h:60
static String findDatabase(const String &db_name)
Definition: PeptideIndexing.h:138
Refreshes the protein references for all peptide hits in a vector of PeptideIdentifications and adds ...
Definition: PeptideIndexing.h:126
static bool readable(const String &file)
Return true if the file exists and is readable.
static const char C_TERMINAL_AA
Definition: PeptideEvidence.h:61
bool xtandem_
Definition: PeptideIndexing.h:845
Management and storage of parameters / INI files.
Definition: Param.h:73
Definition: PeptideIndexing.h:835
String & remove(char what)
Remove all occurrences of the character what.
bool isAmbiguous(AAcid c)
Definition: AhoCorasickAmbiguous.h:578
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
static String & toUpper(String &this_s)
Definition: StringUtils.h:874
Definition: PeptideIndexing.h:137
Definition: PeptideIndexing.h:140
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:561
static Result findDecoyString(FASTAContainer< T > &proteins)
Heuristic to determine the decoy string given a set of protein names.
Definition: FASTAContainer.h:359
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
template parameter for vector-based FASTA access
Definition: FASTAContainer.h:82
Used to load and store idXML files.
Definition: IdXMLFile.h:63
FASTAContainer<TFI_File> will make FASTA entries available chunk-wise from start to end by loading it...
Definition: FASTAContainer.h:93