OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
7// --------------------------------------------------------------------------
8
16
18
20
21#include <iomanip> // setw
22
23using namespace OpenMS;
24using namespace std;
25
26//-------------------------------------------------------------
27//Doxygen docu
28//-------------------------------------------------------------
29
37// We do not want this class to show up in the docu:
39
40class TOPPFeatureLinkerBase :
41 public TOPPBase,
42 public ProgressLogger
43{
44
45public:
46 TOPPFeatureLinkerBase(String name, String description, bool official = true) :
47 TOPPBase(name, description, official)
48 {
49 }
50
51protected:
52 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
53 {
54 registerInputFileList_("in", "<files>", ListUtils::create<String>(""), "input files separated by blanks", true);
55 setValidFormats_("in", ListUtils::create<String>("featureXML,consensusXML"));
56 registerOutputFile_("out", "<file>", "", "Output file", true);
57 setValidFormats_("out", ListUtils::create<String>("consensusXML"));
58 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
59 setValidFormats_("design", ListUtils::create<String>("tsv"));
61 registerFlag_("keep_subelements", "For consensusXML input only: If set, the sub-features of the inputs are transferred to the output.");
62 }
63
64 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
65 bool labeled = false)
66 {
67 //-------------------------------------------------------------
68 // parameter handling
69 //-------------------------------------------------------------
70 StringList ins;
71 if (labeled)
72 {
73 ins.push_back(getStringOption_("in"));
74 }
75 else
76 {
77 ins = getStringList_("in");
78 }
79 String out = getStringOption_("out");
80
81 //-------------------------------------------------------------
82 // check for valid input
83 //-------------------------------------------------------------
84 // check if all input files have the correct type
85 FileTypes::Type file_type = FileHandler::getType(ins[0]);
86 for (Size i = 0; i < ins.size(); ++i)
87 {
88 if (FileHandler::getType(ins[i]) != file_type)
89 {
90 writeLogError_("Error: All input files must be of the same type!");
91 return ILLEGAL_PARAMETERS;
92 }
93 }
94
95 //-------------------------------------------------------------
96 // set up algorithm
97 //-------------------------------------------------------------
98 Param algorithm_param = getParam_().copy("algorithm:", true);
99 writeDebug_("Used algorithm parameters", algorithm_param, 3);
100 algorithm->setParameters(algorithm_param);
101
102 //-------------------------------------------------------------
103 // perform grouping
104 //-------------------------------------------------------------
105 // load input
106 ConsensusMap out_map;
107 StringList ms_run_locations;
108
109 String design_file;
110
111 // TODO: support design in labeled feature linker
112 if (!labeled)
113 {
114 design_file = getStringOption_("design");
115 }
116
117 if (file_type == FileTypes::CONSENSUSXML && !design_file.empty())
118 {
119 writeLogError_("Error: Using fractionated design with consensusXML als input is not supported!");
120 return ILLEGAL_PARAMETERS;
121 }
122
123 if (file_type == FileTypes::FEATUREXML)
124 {
125 OPENMS_LOG_INFO << "Linking " << ins.size() << " featureXMLs." << endl;
126
127 //-------------------------------------------------------------
128 // Extract (optional) fraction identifiers and associate with featureXMLs
129 //-------------------------------------------------------------
130
131 // determine map of fractions to MS files
132 map<unsigned, vector<String>> frac2files;
133
134 if (!design_file.empty())
135 {
136 // parse design file and determine fractions
137 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
138
139 // determine if design defines more than one fraction
140 frac2files = ed.getFractionToMSFilesMapping();
141
142 writeDebug_(String("Grouping ") + String(ed.getNumberOfFractions()) + " fractions.", 3);
143
144 // check if all fractions have the same number of MS runs associated
146 {
147 writeLogError_("Error: Number of runs must match for every fraction!");
148 return ILLEGAL_PARAMETERS;
149 }
150 }
151 else // no design file given
152 {
153 for (Size i = 0; i != ins.size(); ++i)
154 {
155 frac2files[1].emplace_back(String("file") + String(i)); // associate each run with fraction 1
156 }
157 }
158
159 vector<FeatureMap > maps(ins.size());
160 FileHandler f;
162
163 // to save memory don't load convex hulls and subordinates
164 param.setLoadSubordinates(false);
165 param.setLoadConvexHull(false);
166 f.setFeatOptions(param);
167
168 Size progress = 0;
170 startProgress(0, ins.size(), "reading input");
171 for (Size i = 0; i < ins.size(); ++i)
172 {
173 FeatureMap tmp;
174 f.loadFeatures(ins[i], tmp, {FileTypes::FEATUREXML});
175
176 StringList ms_runs;
177 tmp.getPrimaryMSRunPath(ms_runs);
178
179 // associate mzML file with map i in consensusXML
180 if (ms_runs.size() > 1 || ms_runs.empty())
181 {
182 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
183 << ms_runs.size()
184 << " provided." << endl;
185 }
186 else
187 {
188 out_map.getColumnHeaders()[i].filename = ms_runs.front();
189 }
190 out_map.getColumnHeaders()[i].size = tmp.size();
191 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
192
193 // copy over information on the primary MS run
194 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
195
196 // to save memory, remove convex hulls, subordinates:
197 for (Feature& ft : tmp)
198 {
199 String adduct;
200 String group;
201 //exception: addduct information
202 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
203 {
204 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS);
205 }
206 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
207 {
208 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP);
209 }
210 ft.getSubordinates().clear();
211 ft.getConvexHulls().clear();
212 ft.clearMetaInfo();
213 if (!adduct.empty())
214 {
215 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
216 }
217 if (!group.empty())
218 {
219 ft.setMetaValue("Group", group);
220 }
221
222 }
223
224 maps[i] = tmp;
225 maps[i].updateRanges();
226
227 setProgress(progress++);
228 }
229 endProgress();
230
231 // exception for "labeled" algorithms: copy file descriptions
232 if (labeled)
233 {
234 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
235 out_map.getColumnHeaders()[0].label = "light";
236 out_map.getColumnHeaders()[1].label = "heavy";
237 ms_run_locations.push_back(ms_run_locations[0]);
238 }
239
241 // invoke feature grouping algorithm
242
243 if (frac2files.size() == 1) // group one fraction
244 {
245 algorithm->group(maps, out_map);
246 }
247 else // group multiple fractions
248 {
249 writeDebug_(String("Stored in ") + String(maps.size()) + " maps.", 3);
250 for (Size i = 1; i <= frac2files.size(); ++i)
251 {
252 vector<FeatureMap> fraction_maps;
253 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
254 // we should check if these are shuffled and error / warn
255 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
256 {
257 fraction_maps.push_back(maps[feature_map_index]);
258 }
259 algorithm->group(fraction_maps, out_map);
260 }
261 }
262 }
263 else
264 {
265 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
266 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
267 bool keep_subelements = getFlag_("keep_subelements");
268 vector<ConsensusMap> maps(ins.size());
269 FileHandler f;
270 for (Size i = 0; i < ins.size(); ++i)
271 {
273 maps[i].updateRanges();
274 // copy over information on the primary MS run
275 StringList ms_runs;
276 maps[i].getPrimaryMSRunPath(ms_runs);
277 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
278 if (keep_subelements)
279 {
280 auto saveOldMapIndex =
282 {
283 if (p.metaValueExists("map_index"))
284 {
285 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
286 }
287 else
288 {
289 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
290 "consistent one. Check the settings of previous tools." << std::endl;
291 }
292 };
293 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
294 }
295 }
296 // group
297 algorithm->group(maps, out_map);
298
299 // set file descriptions:
300
301 if (!keep_subelements)
302 {
303 for (Size i = 0; i < ins.size(); ++i)
304 {
305 out_map.getColumnHeaders()[i].filename = ins[i];
306 out_map.getColumnHeaders()[i].size = maps[i].size();
307 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
308 }
309 }
310 else
311 {
312 // components of the output map are not the input maps themselves, but
313 // the components of the input maps:
314 algorithm->transferSubelements(maps, out_map);
315 }
316 }
317
318 // assign unique ids
320
321 // annotate output with data processing info
322 addDataProcessing_(out_map,
324
325
326 // sort list of peptide identifications in each consensus feature by map index
328
329 // write output
331
332 // some statistics
333 map<Size, UInt> num_consfeat_of_size;
334 for (const ConsensusFeature& cf : out_map)
335 {
336 ++num_consfeat_of_size[cf.size()];
337 }
338
339 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
340 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
341 i != num_consfeat_of_size.rend(); ++i)
342 {
343 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
344 << i->second << endl;
345 }
346 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
347
348 return EXECUTION_OK;
349 }
350
351};
352
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition LogStream.h:452
A consensus feature spanning multiple LC-MS/MS experiments.
Definition ConsensusFeature.h:45
A container for consensus elements.
Definition ConsensusMap.h:68
Size applyMemberFunction(Size(Type::*member_function)())
Applies a member function of Type to the container itself and all consensus features....
Definition ConsensusMap.h:283
void sortPeptideIdentificationsByMapIndex()
Sorts PeptideIdentifications of consensus features with respect to their map index.
const ColumnHeaders & getColumnHeaders() const
Non-mutable access to the file descriptions.
@ FEATURE_GROUPING
Feature grouping
Definition DataProcessing.h:48
void setParameters(const Param &param)
Sets the parameters.
static ExperimentalDesign load(const String &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
unsigned getNumberOfFractions() const
bool sameNrOfMSFilesPerFraction() const
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
size_t size() const noexcept
Definition ExposedVector.h:128
Options for loading files containing features.
Definition FeatureFileOptions.h:21
void setLoadConvexHull(bool convex)
void setLoadSubordinates(bool sub)
Base class for all feature grouping algorithms.
Definition FeatureGroupingAlgorithm.h:25
void transferSubelements(const std::vector< ConsensusMap > &maps, ConsensusMap &out) const
Transfers subelements (grouped features) from input consensus maps to the result consensus map.
virtual void group(const std::vector< FeatureMap > &maps, ConsensusMap &out)=0
Applies the algorithm. The features in the input maps are grouped and the output is written to the co...
A container for features.
Definition FeatureMap.h:82
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
void updateRanges() override
An LC-MS feature.
Definition Feature.h:46
Facilitates file handling by file type recognition.
Definition FileHandler.h:46
void loadConsensusFeatures(const String &filename, ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a ConsensusMap.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
FeatureFileOptions & getFeatOptions()
Mutable access to the feature file options for loading/storing.
void storeConsensusFeatures(const String &filename, const ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Store a ConsensusFeatureMap.
void loadFeatures(const String &filename, FeatureMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a FeatureMap.
void setFeatOptions(const FeatureFileOptions &)
set feature file options for loading/storing
Management and storage of parameters / INI files.
Definition Param.h:46
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:64
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
@ CMD
Command line progress.
Definition ProgressLogger.h:44
A more convenient string class.
Definition String.h:34
Base class for TOPP applications.
Definition TOPPBase.h:122
Param const & getParam_() const
Return all parameters relevant to this TOPP tool.
void registerInputFileList_(const String &name, const String &argument, const StringList &default_value, const String &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers a list of input files option.
void addEmptyLine_()
Adds an empty line between registered variables in the documentation.
void registerInputFile_(const String &name, const String &argument, const String &default_value, const String &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers an input file option.
bool getFlag_(const String &name) const
Returns the value of a previously registered flag.
void writeDebug_(const String &text, UInt min_level) const
Writes a string to the log file and to OPENMS_LOG_DEBUG if the debug level is at least min_level.
void setValidFormats_(const String &name, const std::vector< String > &formats, const bool force_OpenMS_format=true)
Sets the formats for a input/output file option or for all members of an input/output file lists.
void registerFlag_(const String &name, const String &description, bool advanced=false)
Registers a flag.
StringList getStringList_(const String &name) const
Returns the value of a previously registered StringList.
String getStringOption_(const String &name) const
Returns the value of a previously registered string option (use getOutputDirOption() for output direc...
virtual void registerOptionsAndFlags_()=0
Sets the valid command line options (with argument) and flags (without argument).
DataProcessing getProcessingInfo_(DataProcessing::ProcessingAction action) const
Returns the data processing information.
void registerOutputFile_(const String &name, const String &argument, const String &default_value, const String &description, bool required=true, bool advanced=false)
Registers an output file option.
@ ILLEGAL_PARAMETERS
Definition TOPPBase.h:140
@ EXECUTION_OK
Definition TOPPBase.h:134
void addDataProcessing_(ConsensusMap &map, const DataProcessing &dp) const
Data processing setter for consensus maps.
void writeLogError_(const String &text) const
Writes a string to the log file and to OPENMS_LOG_ERROR.
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition UniqueIdInterface.h:78
Size setUniqueId()
Assigns a new, valid unique id. Always returns 1.
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
const std::string ADDUCT_GROUP
Definition Constants.h:564
const std::string DC_CHARGE_ADDUCTS
Definition Constants.h:574
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
STL namespace.
Type
Actual file types enum.
Definition FileTypes.h:31
@ CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition FileTypes.h:39
@ FEATUREXML
OpenMS feature file (.featureXML)
Definition FileTypes.h:37