94 void getMultipleSpectra(std::map<Int, MSSpectrum>& spectra,
const NASequence& oligo,
const std::set<Int>& charges,
Int base_charge = 1)
97 void updateMembers_()
103 void addFragmentPeaks_(
MSSpectrum& spectrum,
const std::vector<double>& fragment_masses,
const String& ion_type,
double offset,
double intensity,
Size start = 0)
106 void addAMinusBPeaks_(
MSSpectrum& spectrum,
const std::vector<double>& fragment_masses,
const NASequence& oligo,
Size start = 0)
112 void addChargedSpectrum_(
MSSpectrum& spectrum,
const MSSpectrum& uncharged_spectrum,
Int charge,
bool add_precursor)
double aB_intensity_
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
NASequence sequence
void setProgress(SignedSize value) const
Sets the current progress.
Base class for TOPP applications.
File adapter for MzTab files.
double fragment_mass_tolerance
Definition: DBSearchParam.h:61
void getMultipleSpectra(std::map< Int, MSSpectrum > &spectra, const NASequence &oligo, const std::set< Int > &charges, Int base_charge=1) const
Generates spectra in multiple charge states for an oligonucleotide sequence.
Normalizes the peak intensities spectrum-wise.
Definition: Normalizer.h:57
bool add_z_ions_
Definition: NucleicAcidSpectrumGenerator.h:121
double z_intensity_
Definition: NucleicAcidSpectrumGenerator.h:134
double d_intensity_
Definition: NucleicAcidSpectrumGenerator.h:130
static double median(IteratorType begin, IteratorType end, bool sorted=false)
Calculates the median of a range of values.
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
Size findNearest(CoordinateType mz) const
Binary search for the peak nearest to a specific m/z.
PeakAnnotationSteps peak_annotations
Definition: MoleculeQueryMatch.h:71
Size scan_index
Definition: NucleicAcidSearchEngine.cpp:257
Representation of a nucleic acid sequence.
Definition: NASequence.h:62
bool modifyStrings(bool modify)
Switch modification of strings (quoting/replacing of separators) on/off.
static ProteaseDB * getInstance()
this member function serves as a replacement of the constructor
double a_intensity_
Definition: NucleicAcidSpectrumGenerator.h:127
IdentificationData::ScoreTypeRef applyToQueryMatches(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref) const
Calculate FDR on the level of molecule-query matches (e.g. peptide-spectrum matches) for "general" id...
void store(const String &filename, const PeakMap &experiment, bool compact=false)
stores the experiment data in a MascotGenericFile that can be used as input for MASCOT shell executio...
InputFileRef registerInputFile(const String &file)
Register an input file.
static String basename(const String &file)
Returns the basename of the file (without the path).
Invalid value exception.
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
This class serves for reading in and writing FASTA files.
PrecursorInfo(Size scan_index, Int charge, Size isotope, const String &adduct)
Definition: NucleicAcidSearchEngine.cpp:262
bool fragment_tolerance_ppm
Definition: DBSearchParam.h:63
WindowMower augments the highest peaks in a sliding or jumping window.
Definition: WindowMower.h:54
void store(const String &filename, const PeakMap &map) const
Stores a map in an MzML file.
Representation of a ribonucleotide (modified or unmodified)
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:121
void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool single_charge_spectra, bool negative_mode, Int min_charge, Int max_charge, bool include_unknown_charge)
Definition: NucleicAcidSearchEngine.cpp:531
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
const MzTabNucleicAcidSectionRows & getNucleicAcidSectionRows() const
File adapter for MzML files.
A more convenient string class.
static void applyVariableModifications(const std::set< ConstRibonucleotidePtr > &var_mods, const NASequence &seq, Size max_variable_mods_per_NASequence, std::vector< NASequence > &all_modified_NASequences, bool keep_original=true)
Applies variable modifications to a single NASequence. If keep_original is set the original (e....
Iterator begin()
Definition: MSExperiment.h:157
void deisotopeAndSingleChargeMSSpectrum_(MSSpectrum &in, Int min_charge, Int max_charge, double fragment_tolerance, bool fragment_unit_ppm, bool keep_only_deisotoped=false, Size min_isopeaks=3, Size max_isopeaks=10, bool make_single_charged=true)
Definition: NucleicAcidSearchEngine.cpp:375
IdentificationData::IdentifiedOligoRef oligo_ref
Definition: NucleicAcidSearchEngine.cpp:273
void digest(const NASequence &rna, std::vector< NASequence > &output, Size min_length=0, Size max_length=0) const
Performs the enzymatic digestion of a (potentially modified) RNA.
QueryMatchRef registerMoleculeQueryMatch(const MoleculeQueryMatch &match)
Register a molecule-query match (e.g. peptide-spectrum match)
void filterPeakSpectrum(PeakSpectrum &spectrum)
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
In-Memory representation of a mass spectrometry experiment.
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void getAllNames(std::vector< String > &all_names) const
returns all the enzyme names (does NOT include synonym names)
static void removeDecoys(IdentificationData &id_data)
void addMSLevel(int level)
adds a desired MS level for peaks to load
size_t Size
Size type e.g. used as variable which can hold result of size()
bool add_c_ions_
Definition: NucleicAcidSpectrumGenerator.h:116
void setEnzyme(const DigestionEnzyme *enzyme) override
Sets the enzyme for the digestion.
static RibonucleotideDB * getInstance()
replacement for constructor (singleton pattern)
Int charge
Definition: NucleicAcidSearchEngine.cpp:258
ProcessingSoftwareRef registerDataProcessingSoftware(const DataProcessingSoftware &software)
Register data processing software.
enum MassType mass_type
Definition: DBSearchParam.h:49
Data model of MzTab files. Please see the official MzTab specification at https://code....
const MoleculeQueryMatches & getMoleculeQueryMatches() const
Return the registered molecule-query matches (immutable)
bool precursor_tolerance_ppm
Definition: DBSearchParam.h:62
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
Size max_size_
Definition: NucleicAcidSearchEngine.cpp:328
Information about a score type.
std::vector< Int > IntList
Vector of signed integers.
const InputFiles & getInputFiles() const
Return the registered input files (immutable)
bool add_first_prefix_ion_
Definition: NucleicAcidSpectrumGenerator.h:123
Information about software used for data processing.
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
set< ConstRibonucleotidePtr > getModifications_(const set< String > &mod_names)
Definition: NucleicAcidSearchEngine.cpp:283
const double PROTON_MASS_U
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
void endProgress() const
Ends the progress display.
IdentifiedOligoRef registerIdentifiedOligo(const IdentifiedOligo &oligo)
Register an identified RNA oligonucleotide.
String database
Definition: DBSearchParam.h:51
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
const MzTabOligonucleotideSectionRows & getOligonucleotideSectionRows() const
void setVersion(const String &version)
Sets the software version.
Size size() const
Definition: MSExperiment.h:127
NLargest removes all but the n largest peaks.
Definition: NLargest.h:54
A base class for all classes handling default parameters.
ProcessingStepRef registerDataProcessingStep(const DataProcessingStep &step)
Register a data processing step.
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
bool fragment_mass_tolerance_ppm
Mass tolerance unit of fragment ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:233
static DateTime now()
Returns the current date and time.
bool add_x_ions_
Definition: NucleicAcidSpectrumGenerator.h:119
Parameters specific to a database search step.
double precursor_error_ppm
Definition: NucleicAcidSearchEngine.cpp:275
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Precursor meta information.
const DigestionEnzymeType * getEnzyme(const String &name) const
Definition: DigestionEnzymeDB.h:98
void calculateAndFilterFDR_(IdentificationData &id_data, bool only_top_hits)
Definition: NucleicAcidSearchEngine.cpp:841
Main OpenMS namespace.
HasInvalidLength(Size min_size, Size max_size)
Definition: NucleicAcidSearchEngine.cpp:330
void postProcessHits_(const PeakMap &exp, vector< HitsByScore > &annotated_hits, IdentificationData &id_data, bool negative_mode)
Definition: NucleicAcidSearchEngine.cpp:738
String adduct
Definition: NucleicAcidSearchEngine.cpp:260
void filterPeakMap(PeakMap &exp) const
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:230
double calculatePrecursorMass_(double mz, Int charge, Int isotope, double adduct_mass, bool negative_mode)
Definition: NucleicAcidSearchEngine.cpp:656
const DigestionEnzyme * digestion_enzyme
Definition: DBSearchParam.h:66
PeakMassType mass_type
Mass type of the peaks.
Definition: ProteinIdentification.h:228
Base class for all classes that want to report their progress.
bool add_metainfo_
Definition: NucleicAcidSpectrumGenerator.h:124
static void filterQueryMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
ScoreTypeRef registerScoreType(const ScoreType &score)
Register a score type.
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
bool add_b_ions_
Definition: NucleicAcidSpectrumGenerator.h:115
Stream class for writing to comma/tab/...-separated values files.
vector< String > fragment_ion_codes_
Definition: NucleicAcidSearchEngine.cpp:154
void generateLFQInput_(IdentificationData &id_data, const String &out_file)
Definition: NucleicAcidSearchEngine.cpp:869
String db
The used database.
Definition: ProteinIdentification.h:224
const DataQueries & getDataQueries() const
Return the registered data queries (immutable)
Definition: IdentificationData.h:336
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
std::vector< ScoreTypeRef > assigned_scores
Definition: DataProcessingSoftware.h:57
const MzTabOSMSectionRows & getOSMSectionRows() const
Search query, e.g. spectrum or feature.
map< String, String > ambiguous_mods_
Definition: NucleicAcidSearchEngine.cpp:155
CoordinateType getMZ() const
Non-mutable access to m/z.
Definition: Peak1D.h:115
bool add_precursor_peaks_
Definition: NucleicAcidSpectrumGenerator.h:125
bool isAmbiguous() const
Return whether this is an "ambiguous" modification (representing isobaric modifications on the base/r...
Macro for general debugging information.
Definition: LogStream.h:470
void setHigherScoreBetter(bool value)
sets the peptide score orientation
void store(const String &filename, const MzTab &mz_tab) const
void setCurrentProcessingStep(ProcessingStepRef step_ref)
Set a data processing step that will apply to all subsequent "register..." calls.
Meta data for a search hit (e.g. peptide-spectrum match).
SearchParamRef registerDBSearchParam(const DBSearchParam ¶m)
Register database search parameters.
std::set< String > variable_mods
Definition: DBSearchParam.h:58
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
Definition: NucleicAcidSearchEngine.cpp:271
static void exportIDs(const IdentificationData &id_data, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, bool export_oligonucleotides=false)
Export to legacy peptide/protein identifications.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setParameters(const Param ¶m)
Sets the parameters.
Size min_size_
Definition: NucleicAcidSearchEngine.cpp:327
const Param & getDefaults() const
Non-mutable access to the default parameters.
QString toQString() const
Conversion to Qt QString.
bool add_a_ions_
Definition: NucleicAcidSpectrumGenerator.h:114
void registerIDMetaData_(IdentificationData &id_data, const String &in_mzml, const vector< String > &primary_files, const IdentificationData::DBSearchParam &search_param)
Definition: NucleicAcidSearchEngine.cpp:807
static String path(const String &file)
Returns the path of the file (without the file name).
void setCharge(Int charge)
sets the charge of the peptide
const Param & getParameters() const
Non-mutable access to the parameters.
Size max_length
Definition: DBSearchParam.h:69
Read/write Mascot generic files (MGF).
bool find(TFinder &finder, const Pattern< TNeedle, FuzzyAC > &me, PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:884
std::set< Int > charges
Definition: DBSearchParam.h:55
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:232
bool add_aB_ions_
Definition: NucleicAcidSpectrumGenerator.h:122
A 1-dimensional raw data point or peak.
std::vector< String > StringList
Vector of String.
void sortByPosition()
Lexicographically sorts the peaks by their position.
Size min_length
Definition: DBSearchParam.h:68
double b_intensity_
Definition: NucleicAcidSpectrumGenerator.h:128
SeqType sequence
Definition: IdentifiedSequence.h:54
bool add_w_ions_
Definition: NucleicAcidSpectrumGenerator.h:118
void setSequence(const AASequence &sequence)
sets the peptide sequence
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:229
const ScoreTypes & getScoreTypes() const
Return the registered score types (immutable)
Definition: IdentificationData.h:330
vector< PeptideHit::PeakAnnotation > annotations
Definition: NucleicAcidSearchEngine.cpp:276
This class handles csv files. Currently only loading is implemented.
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
Calculates false discovery rates (FDR) from identifications.
int main(int argc, const char **argv)
double x_intensity_
Definition: NucleicAcidSpectrumGenerator.h:132
int main(int argc, const char **argv)
Definition: NucleicAcidSearchEngine.cpp:1378
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:234
Size missed_cleavages
Definition: DBSearchParam.h:67
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Representation of spectrum identification results and associated data.
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
bool add_d_ions_
Definition: NucleicAcidSpectrumGenerator.h:117
double c_intensity_
Definition: NucleicAcidSpectrumGenerator.h:129
double precursor_mass_tolerance
Definition: DBSearchParam.h:60
Represents the peptide hits for a spectrum.
void cleanup(bool require_query_match=true, bool require_identified_sequence=true, bool require_parent_match=true, bool require_parent_group=false, bool require_match_group=false)
Clean up the data structure after filtering parts of it.
String substr(size_t pos=0, size_t n=npos) const
Wrapper for the STL substr() method. Returns a String object with its contents initialized to a subst...
static void load(const String &filename, std::vector< FASTAEntry > &data)
loads a FASTA file given by 'filename' and stores the information in 'data'
ProcessingStepRef getCurrentProcessingStep()
EmpiricalFormula parseAdduct_(const String &adduct)
Definition: NucleicAcidSearchEngine.cpp:338
void filterPeakMap(PeakMap &exp)
Iterator end()
Definition: MSExperiment.h:167
const PrecursorInfo * precursor_ref
Definition: NucleicAcidSearchEngine.cpp:277
Identified sequence (peptide or oligonucleotide)
Options for loading files containing peak data.
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
Definition: MetaData.h:74
bool operator()(const NASequence &s)
Definition: NucleicAcidSearchEngine.cpp:334
ThresholdMower removes all peaks below a threshold.
static MzTab exportMzTab(const IdentificationData &id_data)
Export to mzTab format.
Exit codes.
void setName(const String &name)
Sets the name.
void addScore(ScoreTypeRef score_type, double score, const boost::optional< ProcessingStepRef > &processing_step_opt=boost::none)
Add a score (possibly connected to a processing step)
Definition: ScoredProcessingResult.h:97
std::string toString() const
Definition: MetaData.h:75
ExitCodes main_(int, const char **) override
The actual "main" method. main_() is invoked by main().
Definition: NucleicAcidSearchEngine.cpp:922
void clearMSLevels()
clears the MS levels
bool precursor_mass_tolerance_ppm
Mass tolerance unit of precursor ions (true: ppm, false: Dalton)
Definition: ProteinIdentification.h:235
Management and storage of parameters / INI files.
String & remove(char what)
Remove all occurrences of the character what.
Generates theoretical spectra for nucleic acid sequences.
const IdentifiedOligos & getIdentifiedOligos() const
Return the registered identified oligonucleotides (immutable)
Definition: IdentificationData.h:366
static void importSequences(IdentificationData &id_data, const std::vector< FASTAFile::FASTAEntry > &fasta, IdentificationData::MoleculeType type=IdentificationData::MoleculeType::PROTEIN, const String &decoy_pattern="")
Import FASTA sequences as parent molecules.
bool add_y_ions_
Definition: NucleicAcidSpectrumGenerator.h:120
Size isotope
Definition: NucleicAcidSearchEngine.cpp:259
bool add_all_precursor_charges_
Definition: NucleicAcidSpectrumGenerator.h:126
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
Data processing step that is applied to the data (e.g. database search, PEP calculation,...
void setCharge(Int charge)
Mutable access to the charge.
void calculateCoverages(bool check_molecule_length=false)
Calculate sequence coverages of parent molecules.
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
void setMSLevels(const std::vector< Int > &levels)
sets the desired MS levels for peaks to load
double y_intensity_
Definition: NucleicAcidSpectrumGenerator.h:133
void resolveAmbiguousMods_(HitsByScore &hits)
Definition: NucleicAcidSearchEngine.cpp:677
Element could not be found exception.
const double C13C12_MASSDIFF_U
Class for the enzymatic digestion of RNAs.
Size getNrSpectra() const
get the total number of spectra available
The representation of a 1D spectrum.
Search parameters of the DB search.
double w_intensity_
Definition: NucleicAcidSpectrumGenerator.h:131
void setMZ(double mz)
sets the MZ of the MS2 spectrum
enum MoleculeType molecule_type
Definition: DBSearchParam.h:48
ExitCodes main(int argc, const char **argv)
Main routine of all TOPP applications.
double precursor_intensity_
Definition: NucleicAcidSpectrumGenerator.h:136
void filterPeakSpectrum(PeakSpectrum &spectrum)
multimap< double, AnnotatedHit, greater< double > > HitsByScore
Definition: NucleicAcidSearchEngine.cpp:280
std::pair< ScoreTypeRef, bool > findScoreType(const String &score_name) const
Look up a score type by name.
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void setScoreType(const String &type)
sets the peptide score type
void registerOptionsAndFlags_()
Sets the valid command line options (with argument) and flags (without argument).
Definition: NucleicAcidSearchEngine.cpp:159
Used to load and store idXML files.
DataQueryRef registerDataQuery(const DataQuery &query)
Register a data query (e.g. MS2 spectrum or feature)
bool resolve_ambiguous_mods_
Definition: NucleicAcidSearchEngine.cpp:156
void setScore(double score)
sets the PSM score
Representation of a peptide hit.
