32class TheoreticalSpectrumGenerator;
81 bool fdr_applied =
false;
82 double achieved_psm_fdr = -1.0;
87 bool score_stats_valid =
false;
88 double hyperscore_min = 0.0;
89 double hyperscore_median = 0.0;
90 double hyperscore_max = 0.0;
91 bool prec_tol_valid =
false;
92 double prec_err_median = 0.0, prec_err_mad = 0.0, prec_err_recommended = 0.0;
93 bool frag_tol_valid =
false;
94 double frag_err_median = 0.0, frag_err_mad = 0.0, frag_err_recommended = 0.0;
95 double seconds_search = 0.0;
96 double seconds_calibration = 0.0;
97 double seconds_fdr = 0.0;
112 double precursor_tol_lower = 0.0, precursor_tol_upper = 0.0;
114 double fragment_tol = 0.0;
116 Int min_charge = 0, max_charge = 0;
118 std::vector<std::string>
fixed_mods, variable_mods, ion_series;
119 bool open_search =
false;
120 bool calibration_enabled =
false;
121 bool snes_mode =
false;
122 bool chunked =
false;
124 double psm_fdr_threshold = 0.0, protein_fdr_threshold = 0.0;
129 double seconds_index_build = 0.0;
130 double seconds_total = 0.0;
148 bool is_open_search =
false;
186 bool decoy_is_prefix =
true;
188 bool have_decoys =
false;
205 std::vector<FASTAFile::FASTAEntry>
db;
217 bool release_fragment_index_after_scoring =
false;
225 bool decoy_is_prefix =
true;
228 bool have_decoys =
false;
253 const std::string& in_db,
254 std::vector<ProteinIdentification>& prot_ids,
283 const std::string& decoy_string,
284 bool decoy_is_prefix,
333 const std::string& in_db,
334 const std::string& output_base_name =
"")
const;
354 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
355 std::vector<ProteinIdentification>& prot_ids,
400 std::vector<ProteinIdentification>& prot_ids,
414 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
415 const std::string& output_base_name =
"")
const;
444 const std::vector<std::string>& in_spectra_files,
445 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
446 const std::vector<std::string>& output_base_names = {},
447 const std::string& aggregate_base_name =
"")
const;
460 const std::vector<std::string>& in_spectra_files,
461 const std::string& in_db,
462 const std::vector<std::string>& output_base_names = {},
463 const std::string& aggregate_base_name =
"")
const;
478 double delta_mass = 0.0;
479 float prefix_fraction = 0;
480 float suffix_fraction = 0;
481 float mean_error = 0.0f;
482 int isotope_error = 0;
483 uint16_t applied_charge = 0;
484 uint16_t matched_prefix_ions = 0;
485 uint16_t matched_suffix_ions = 0;
495 static void preprocessSpectra_(
PeakMap& exp,
double fragment_mass_tolerance,
bool fragment_mass_tolerance_unit_ppm,
bool deisotope_requested,
Size peaks_keep_n,
Int peaks_window_top);
514 bool generate{
false};
515 bool strip_existing{
false};
516 bool have_decoys{
false};
518 bool is_prefix{
true};
520 bool strip_is_prefix{
true};
532 const std::vector<FASTAFile::FASTAEntry>& db)
const;
553 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
570 const std::vector<FASTAFile::FASTAEntry>& full_db)
const;
587 std::vector<FASTAFile::FASTAEntry>& full_db,
589 std::vector<ProteinIdentification>& protein_ids,
603 const std::vector<FASTAFile::FASTAEntry>& db,
605 double effective_fragment_tol,
606 bool fragment_mass_tolerance_unit_ppm,
607 bool open_search_mode,
608 std::vector<std::vector<AnnotatedHit_>>& annotated_hits,
609 const std::string& progress_label)
const;
636 std::vector<std::vector<ProSEAlgorithm::AnnotatedHit_> >& annotated_hits,
637 std::vector<ProteinIdentification>& protein_ids,
642 Int peptide_missed_cleavages,
643 double precursor_mass_tolerance,
644 double fragment_mass_tolerance,
645 const std::string& precursor_mass_tolerance_unit_ppm,
646 const std::string& fragment_mass_tolerance_unit_ppm,
647 const Int precursor_min_charge,
648 const Int precursor_max_charge,
649 const std::string& enzyme,
650 const std::string& database_name)
const;
655 mutable double precursor_mass_tolerance_lower_{10.0};
656 mutable double precursor_mass_tolerance_upper_{10.0};
657 std::string precursor_mass_tolerance_unit_{
"ppm"};
672 bool deisotope_requested_{
true};
674 Int peaks_window_top_{20};
687 double fdr_psm_{0.0};
688 double fdr_protein_{0.0};
701 bool add_a_ions_{
false};
702 bool add_b_ions_{
true};
703 bool add_c_ions_{
false};
704 bool add_x_ions_{
false};
705 bool add_y_ions_{
true};
706 bool add_z_ions_{
false};
710 bool calibration_enabled_{
false};
711 double calibration_subset_ratio_{0.1};
712 Size calibration_min_psms_{50};
723 double precursor_shift{0};
724 double precursor_spread{0};
727 double fragment_tolerance{0};
728 double fragment_shift{0};
729 bool extreme_bias{
false};
752 mutable double last_mod_match_tolerance_used_{-1.0};
766 if (precursor_mass_tolerance_lower_ <= 0.0)
return precursor_mass_tolerance_upper_;
767 if (precursor_mass_tolerance_upper_ <= 0.0)
return precursor_mass_tolerance_lower_;
768 return std::min(precursor_mass_tolerance_lower_, precursor_mass_tolerance_upper_);
785 const std::vector<FASTAFile::FASTAEntry>& db)
const;
790 const std::string& marker,
bool is_prefix);
812 const std::vector<ProteinIdentification>& protein_ids,
828 const std::string& enzyme,
854 const std::vector<std::pair<std::string, std::vector<std::string>>>& manifest,
863 return FragmentIndex::isOpenSearchMode(precursor_mass_tolerance_lower_,
864 precursor_mass_tolerance_upper_,
865 precursor_mass_tolerance_unit_ ==
"ppm");
Representation of a peptide/protein sequence.
Definition AASequence.h:88
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition FragmentIndex.h:35
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Combined result of open search modification analysis.
Definition OpenSearchModificationAnalysis.h:104
Management and storage of parameters / INI files.
Definition Param.h:46
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Fragment-index-based peptide database search algorithm (experimental).
Definition ProSEAlgorithm.h:51
ExitCodes search(PeakMap &spectra, const std::vector< FASTAFile::FASTAEntry > &fasta_db, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
In-memory search: search spectra against a protein database without file I/O.
std::string enzyme_
Definition ProSEAlgorithm.h:682
static std::string renderRunSummaryYaml(const MultiFileSearchResult &mfres, const std::vector< std::pair< std::string, std::vector< std::string > > > &manifest, Size files_failed, Size files_total)
FragmentIndex fragment_index
Definition ProSEAlgorithm.h:206
std::string fragment_tol_unit
Definition ProSEAlgorithm.h:115
Size peptide_max_size_
Definition ProSEAlgorithm.h:693
static void updateFinalStats(RunStatistics &stats, const PeptideIdentificationList &peptide_ids, const std::string &enzyme, bool fdr_applied)
SearchResult searchWithModificationAnalysis(const std::string &in_spectra, const std::string &in_db, const std::string &output_base_name="") const
Search with comprehensive results including modification analysis tables.
Size precursor_max_charge_
Definition ProSEAlgorithm.h:660
Size precursor_min_charge_
Definition ProSEAlgorithm.h:659
std::string decoy_prefix_
Definition ProSEAlgorithm.h:685
RunStatistics stats
Definition ProSEAlgorithm.h:149
Size report_top_hits_
Definition ProSEAlgorithm.h:699
Size modifications_max_variable_mods_per_peptide_
Definition ProSEAlgorithm.h:680
static void renderModificationSummary(const OpenSearchModificationAnalysis::OpenSearchAnalysisResult &mod_analysis, std::ostream &os)
SearchResult searchWithModificationAnalysis(PeakMap &spectra, const std::vector< FASTAFile::FASTAEntry > &fasta_db, const std::string &output_base_name="") const
In-memory search with modification analysis: no file I/O required.
std::string peptide_motif_
Definition ProSEAlgorithm.h:697
std::string input_file
spectrum file this run searched (basename or path)
Definition ProSEAlgorithm.h:76
std::map< Int, Size > charge_histogram
precursor charge -> PSM count
Definition ProSEAlgorithm.h:85
StringList modifications_fixed_
Definition ProSEAlgorithm.h:676
CalibrationResult_ runCalibrationPass_(PeakMap &spectra, FragmentIndex &fragment_index, const std::vector< FASTAFile::FASTAEntry > &db) const
Run a fast calibration pass on a subset of spectra to estimate mass accuracy.
static void capturePreFdrStats_(const PeptideIdentificationList &peptide_ids, RunStatistics &stats)
Param fragmentIndexParameters_() const
ProSE parameters made safe to hand to a FragmentIndex.
void postProcessHits_(const PeakMap &exp, std::vector< std::vector< ProSEAlgorithm::AnnotatedHit_ > > &annotated_hits, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, Size top_hits, const StringList &modifications_fixed, const StringList &modifications_variable, Int peptide_missed_cleavages, double precursor_mass_tolerance, double fragment_mass_tolerance, const std::string &precursor_mass_tolerance_unit_ppm, const std::string &fragment_mass_tolerance_unit_ppm, const Int precursor_min_charge, const Int precursor_max_charge, const std::string &enzyme, const std::string &database_name) const
Filter and annotate search results.
std::vector< ProteinIdentification > protein_ids
Definition ProSEAlgorithm.h:145
Size peptide_min_size_
Definition ProSEAlgorithm.h:692
std::vector< FASTAFile::FASTAEntry > buildDecoyAugmentedDB_(const std::vector< FASTAFile::FASTAEntry > &fasta_db, const DecoyStrategy_ &strategy) const
Build the searched database according to strategy.
SearchResult aggregate
Definition ProSEAlgorithm.h:179
IntList precursor_isotopes_
Definition ProSEAlgorithm.h:662
std::string database_file
FASTA path (empty for in-memory db)
Definition ProSEAlgorithm.h:110
MultiFileSearchResult searchWithModificationAnalysis(const std::vector< std::string > &in_spectra_files, const std::string &in_db, const std::vector< std::string > &output_base_names={}, const std::string &aggregate_base_name="") const
Multi-file search with modification analysis (FASTA file path).
std::string strip_string
marker of pre-existing decoys to strip
Definition ProSEAlgorithm.h:519
MultiFileSearchResult searchWithModificationAnalysis(const std::vector< std::string > &in_spectra_files, const std::vector< FASTAFile::FASTAEntry > &fasta_db, const std::vector< std::string > &output_base_names={}, const std::string &aggregate_base_name="") const
Multi-file search with modification analysis (in-memory FASTA).
std::map< Size, Size > missed_cleavage_histogram
missed cleavages -> PSM count
Definition ProSEAlgorithm.h:86
RunStatistics last_run_stats_
Definition ProSEAlgorithm.h:743
std::string fragment_mass_tolerance_unit_
Definition ProSEAlgorithm.h:666
StringList annotate_psm_
Definition ProSEAlgorithm.h:690
static void applyCompleteSetProteinFDR(std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, const std::string &decoy_string, bool decoy_is_prefix, double protein_fdr)
Finalize protein-level FDR on a COMPLETE protein set (a single input file, or a merged cross-file agg...
OpenSearchModificationAnalysis::OpenSearchAnalysisResult modification_analysis
Definition ProSEAlgorithm.h:147
ExitCodes search(const std::string &in_spectra, const std::string &in_db, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
Search spectra in a spectrum file (mzML or Bruker .d) against a protein database using an FI-backed w...
std::vector< FASTAFile::FASTAEntry > db
Definition ProSEAlgorithm.h:205
static bool accessionHasDecoyMarker_(const std::string &accession, const std::string &marker, bool is_prefix)
static void renderRunSummary(const RunStatistics &stats, const SharedSearchStats &shared, const OpenSearchModificationAnalysis::OpenSearchAnalysisResult &mod_analysis, bool is_open_search, std::ostream &os)
ExitCodes search(PeakMap &spectra, SearchContext &ctx, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
In-memory search using a pre-built SearchContext.
SharedSearchStats shared
Definition ProSEAlgorithm.h:192
bool isOpenSearchMode_() const
Helper function to determine if open search should be used based on tolerance.
Definition ProSEAlgorithm.h:861
ExitCodes searchChunked_(PeakMap &spectra, std::vector< FASTAFile::FASTAEntry > &full_db, const DecoyStrategy_ &strategy, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids) const
Chunked database search implementation.
std::string decoy_string
Definition ProSEAlgorithm.h:184
static void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool deisotope_requested, Size peaks_keep_n, Int peaks_window_top)
filter, deisotope, decharge spectra
DecoyStrategy_ resolveDecoyStrategy_(const std::vector< FASTAFile::FASTAEntry > &db) const
Decide how to obtain/recognise decoys for db.
std::vector< FASTAFile::FASTAEntry > buildCalibrationSample_(const std::vector< FASTAFile::FASTAEntry > &full_db) const
Build a strided protein sample for chunked calibration.
std::string decoy_mode
"generated" | "external" | "none (target-only)"
Definition ProSEAlgorithm.h:123
PeptideIdentificationList peptide_ids
Definition ProSEAlgorithm.h:146
std::string enzyme
Definition ProSEAlgorithm.h:111
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
ExitCodes
Exit codes.
Definition ProSEAlgorithm.h:57
StringList modifications_variable_
Definition ProSEAlgorithm.h:678
static double maxRetainedScore_(const PeptideIdentificationList &peptide_ids)
std::vector< SearchResult > per_file
Definition ProSEAlgorithm.h:178
double fragment_mass_tolerance_
Definition ProSEAlgorithm.h:664
SearchContext prepareContext(const std::vector< FASTAFile::FASTAEntry > &fasta_db) const
Build a SearchContext (decoy-augmented database + FragmentIndex) for reuse.
void scoreSpectraAgainstIndex_(const PeakMap &spectra, FragmentIndex &fi, const std::vector< FASTAFile::FASTAEntry > &db, const TheoreticalSpectrumGenerator &spectrum_generator, double effective_fragment_tol, bool fragment_mass_tolerance_unit_ppm, bool open_search_mode, std::vector< std::vector< AnnotatedHit_ > > &annotated_hits, const std::string &progress_label) const
Score all spectra against one FragmentIndex.
Size peptide_missed_cleavages_
Definition ProSEAlgorithm.h:694
CalibrationResult_ last_calibration_result_
Definition ProSEAlgorithm.h:736
double computeModMatchTolerance_() const
Definition ProSEAlgorithm.h:764
void collectRunStatistics_(const PeakMap &spectra, const std::vector< ProteinIdentification > &protein_ids, const PeptideIdentificationList &peptide_ids, RunStatistics &stats) const
std::vector< std::string > fixed_mods
Definition ProSEAlgorithm.h:118
std::string precursor_tol_unit
Definition ProSEAlgorithm.h:113
DecoyMode_
How decoys are obtained/recognised for a search (parameter "decoys").
Definition ProSEAlgorithm.h:499
Result of a calibration pass.
Definition ProSEAlgorithm.h:722
Resolved decoy handling for one concrete input database.
Definition ProSEAlgorithm.h:513
Multi-file search result bundle.
Definition ProSEAlgorithm.h:177
Per-run identification statistics for the end-of-search report.
Definition ProSEAlgorithm.h:75
Prepared per-database state shared across multiple spectrum files.
Definition ProSEAlgorithm.h:204
Comprehensive search result including modification analysis.
Definition ProSEAlgorithm.h:143
Configuration, database and fragment-index facts shared across all input files of one ProSE invocatio...
Definition ProSEAlgorithm.h:109
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Generates theoretical spectra for peptides with various options.
Definition TheoreticalSpectrumGenerator.h:45
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition ListUtils.h:29
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Slimmer structure as storing all scored candidates in PeptideHit objects takes too much space.
Definition ProSEAlgorithm.h:470
static bool hasBetterScore(const AnnotatedHit_ &a, const AnnotatedHit_ &b)
Definition ProSEAlgorithm.h:487
double score
main score
Definition ProSEAlgorithm.h:477
AASequence sequence
Definition ProSEAlgorithm.h:471