OpenMS
Loading...
Searching...
No Matches
FeatureLinkerBase.cpp
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Marc Sturm, Clemens Groepl, Steffen Sass $
7// --------------------------------------------------------------------------
8
18
20
22
23#include <iomanip> // setw
24
25using namespace OpenMS;
26using namespace std;
27
28//-------------------------------------------------------------
29//Doxygen docu
30//-------------------------------------------------------------
31
39// We do not want this class to show up in the docu:
41
42class TOPPFeatureLinkerBase :
43 public TOPPBase,
44 public ProgressLogger
45{
46
47public:
48 TOPPFeatureLinkerBase(String name, String description, bool official = true) :
49 TOPPBase(name, description, official)
50 {
51 }
52
53protected:
54 void registerOptionsAndFlags_() override // only for "unlabeled" algorithms!
55 {
56 registerInputFileList_("in", "<files>", ListUtils::create<String>(""), "input files separated by blanks", true);
57 setValidFormats_("in", ListUtils::create<String>("featureXML,consensusXML"));
58 registerOutputFile_("out", "<file>", "", "Output file", true);
59 setValidFormats_("out", ListUtils::create<String>("consensusXML"));
60 registerInputFile_("design", "<file>", "", "input file containing the experimental design", false);
61 setValidFormats_("design", ListUtils::create<String>("tsv"));
63 registerFlag_("keep_subelements", "For consensusXML input only: If set, the sub-features of the inputs are transferred to the output.");
64 }
65
66 ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
67 bool labeled = false)
68 {
69 //-------------------------------------------------------------
70 // parameter handling
71 //-------------------------------------------------------------
72 StringList ins;
73 if (labeled)
74 {
75 ins.push_back(getStringOption_("in"));
76 }
77 else
78 {
79 ins = getStringList_("in");
80 }
81 String out = getStringOption_("out");
82
83 //-------------------------------------------------------------
84 // check for valid input
85 //-------------------------------------------------------------
86 // check if all input files have the correct type
87 FileTypes::Type file_type = FileHandler::getType(ins[0]);
88 for (Size i = 0; i < ins.size(); ++i)
89 {
90 if (FileHandler::getType(ins[i]) != file_type)
91 {
92 writeLogError_("Error: All input files must be of the same type!");
93 return ILLEGAL_PARAMETERS;
94 }
95 }
96
97 //-------------------------------------------------------------
98 // set up algorithm
99 //-------------------------------------------------------------
100 Param algorithm_param = getParam_().copy("algorithm:", true);
101 writeDebug_("Used algorithm parameters", algorithm_param, 3);
102 algorithm->setParameters(algorithm_param);
103
104 //-------------------------------------------------------------
105 // perform grouping
106 //-------------------------------------------------------------
107 // load input
108 ConsensusMap out_map;
109 StringList ms_run_locations;
110
111 String design_file;
112
113 // TODO: support design in labeled feature linker
114 if (!labeled)
115 {
116 design_file = getStringOption_("design");
117 }
118
119 if (file_type == FileTypes::CONSENSUSXML && !design_file.empty())
120 {
121 writeLogError_("Error: Using fractionated design with consensusXML als input is not supported!");
122 return ILLEGAL_PARAMETERS;
123 }
124
125 if (file_type == FileTypes::FEATUREXML)
126 {
127 OPENMS_LOG_INFO << "Linking " << ins.size() << " featureXMLs." << endl;
128
129 //-------------------------------------------------------------
130 // Extract (optional) fraction identifiers and associate with featureXMLs
131 //-------------------------------------------------------------
132
133 // determine map of fractions to MS files
134 map<unsigned, vector<String>> frac2files;
135
136 if (!design_file.empty())
137 {
138 // parse design file and determine fractions
139 ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);
140
141 // determine if design defines more than one fraction
142 frac2files = ed.getFractionToMSFilesMapping();
143
144 writeDebug_(String("Grouping ") + String(ed.getNumberOfFractions()) + " fractions.", 3);
145
146 // check if all fractions have the same number of MS runs associated
148 {
149 writeLogError_("Error: Number of runs must match for every fraction!");
150 return ILLEGAL_PARAMETERS;
151 }
152 }
153 else // no design file given
154 {
155 for (Size i = 0; i != ins.size(); ++i)
156 {
157 frac2files[1].emplace_back(String("file") + String(i)); // associate each run with fraction 1
158 }
159 }
160
161 vector<FeatureMap > maps(ins.size());
162 FileHandler f;
164
165 // to save memory don't load convex hulls and subordinates
166 param.setLoadSubordinates(false);
167 param.setLoadConvexHull(false);
168 f.setFeatOptions(param);
169
170 Size progress = 0;
172 startProgress(0, ins.size(), "reading input");
173 for (Size i = 0; i < ins.size(); ++i)
174 {
175 FeatureMap tmp;
176 f.loadFeatures(ins[i], tmp, {FileTypes::FEATUREXML});
177
178 StringList ms_runs;
179 tmp.getPrimaryMSRunPath(ms_runs);
180
181 // associate mzML file with map i in consensusXML
182 if (ms_runs.size() > 1 || ms_runs.empty())
183 {
184 OPENMS_LOG_WARN << "Exactly one MS run should be associated with a FeatureMap. "
185 << ms_runs.size()
186 << " provided." << endl;
187 }
188 else
189 {
190 out_map.getColumnHeaders()[i].filename = ms_runs.front();
191 }
192 out_map.getColumnHeaders()[i].size = tmp.size();
193 out_map.getColumnHeaders()[i].unique_id = tmp.getUniqueId();
194
195 // copy over information on the primary MS run
196 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
197
198 // to save memory, remove convex hulls, subordinates:
199 for (Feature& ft : tmp)
200 {
201 String adduct;
202 String group;
203 //exception: addduct information
204 if (ft.metaValueExists(Constants::UserParam::DC_CHARGE_ADDUCTS))
205 {
206 adduct = ft.getMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS);
207 }
208 if (ft.metaValueExists(Constants::UserParam::ADDUCT_GROUP))
209 {
210 group = ft.getMetaValue(Constants::UserParam::ADDUCT_GROUP);
211 }
212 ft.getSubordinates().clear();
213 ft.getConvexHulls().clear();
214 ft.clearMetaInfo();
215 if (!adduct.empty())
216 {
217 ft.setMetaValue(Constants::UserParam::DC_CHARGE_ADDUCTS, adduct);
218 }
219 if (!group.empty())
220 {
221 ft.setMetaValue("Group", group);
222 }
223
224 }
225
226 maps[i] = tmp;
227 maps[i].updateRanges();
228
229 setProgress(progress++);
230 }
231 endProgress();
232
233 // exception for "labeled" algorithms: copy file descriptions
234 if (labeled)
235 {
236 out_map.getColumnHeaders()[1] = out_map.getColumnHeaders()[0];
237 out_map.getColumnHeaders()[0].label = "light";
238 out_map.getColumnHeaders()[1].label = "heavy";
239 ms_run_locations.push_back(ms_run_locations[0]);
240 }
241
243 // invoke feature grouping algorithm
244
245 if (frac2files.size() == 1) // group one fraction
246 {
247 algorithm->group(maps, out_map);
248 }
249 else // group multiple fractions
250 {
251 writeDebug_(String("Stored in ") + String(maps.size()) + " maps.", 3);
252 for (Size i = 1; i <= frac2files.size(); ++i)
253 {
254 vector<FeatureMap> fraction_maps;
255 // TODO FRACTIONS: here we assume that the order of featureXML is from fraction 1..n
256 // we should check if these are shuffled and error / warn
257 for (size_t feature_map_index = 0; feature_map_index != frac2files[i].size(); ++feature_map_index)
258 {
259 fraction_maps.push_back(maps[feature_map_index]);
260 }
261 algorithm->group(fraction_maps, out_map);
262 }
263 }
264 }
265 else
266 {
267 //TODO isn't it better to have this option/functionality in the FeatureGroupingAlgorithm class?
268 // Otherwise everyone has to remember e.g. to annotate the old map_index etc.
269 bool keep_subelements = getFlag_("keep_subelements");
270 vector<ConsensusMap> maps(ins.size());
271 FileHandler f;
272 for (Size i = 0; i < ins.size(); ++i)
273 {
275 maps[i].updateRanges();
276 // copy over information on the primary MS run
277 StringList ms_runs;
278 maps[i].getPrimaryMSRunPath(ms_runs);
279 ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
280 if (keep_subelements)
281 {
282 auto saveOldMapIndex =
284 {
285 if (p.metaValueExists("map_index"))
286 {
287 p.setMetaValue("old_map_index", p.getMetaValue("map_index"));
288 }
289 else
290 {
291 OPENMS_LOG_WARN << "Warning: map_index not found in PeptideID. The tool will not be able to assign a"
292 "consistent one. Check the settings of previous tools." << std::endl;
293 }
294 };
295 maps[i].applyFunctionOnPeptideIDs(saveOldMapIndex, true);
296 }
297 }
298 // group
299 algorithm->group(maps, out_map);
300
301 // set file descriptions:
302
303 if (!keep_subelements)
304 {
305 for (Size i = 0; i < ins.size(); ++i)
306 {
307 out_map.getColumnHeaders()[i].filename = ins[i];
308 out_map.getColumnHeaders()[i].size = maps[i].size();
309 out_map.getColumnHeaders()[i].unique_id = maps[i].getUniqueId();
310 }
311 }
312 else
313 {
314 // components of the output map are not the input maps themselves, but
315 // the components of the input maps:
316 algorithm->transferSubelements(maps, out_map);
317 }
318 }
319
320 // assign unique ids
322
323 // annotate output with data processing info
324 addDataProcessing_(out_map,
326
327
328 // sort list of peptide identifications in each consensus feature by map index
330
331 // write output
333
334 // some statistics
335 map<Size, UInt> num_consfeat_of_size;
336 for (const ConsensusFeature& cf : out_map)
337 {
338 ++num_consfeat_of_size[cf.size()];
339 }
340
341 OPENMS_LOG_INFO << "Number of consensus features:" << endl;
342 for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
343 i != num_consfeat_of_size.rend(); ++i)
344 {
345 OPENMS_LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6)
346 << i->second << endl;
347 }
348 OPENMS_LOG_INFO << " total: " << setw(6) << out_map.size() << endl;
349
350 return EXECUTION_OK;
351 }
352
353};
354
#define OPENMS_LOG_WARN
Macro for warnings.
Definition LogStream.h:550
#define OPENMS_LOG_INFO
Macro for information/status messages.
Definition LogStream.h:554
A consensus feature spanning multiple LC-MS/MS experiments.
Definition ConsensusFeature.h:45
A container for consensus elements.
Definition ConsensusMap.h:69
Size applyMemberFunction(Size(Type::*member_function)())
Applies a member function of Type to the container itself and all consensus features....
Definition ConsensusMap.h:290
void sortPeptideIdentificationsByMapIndex()
Sorts PeptideIdentifications of consensus features with respect to their map index.
const ColumnHeaders & getColumnHeaders() const
Non-mutable access to the file descriptions.
@ FEATURE_GROUPING
Feature grouping
Definition DataProcessing.h:48
void setParameters(const Param &param)
Sets the parameters.
static ExperimentalDesign load(const String &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition ExperimentalDesign.h:109
unsigned getNumberOfFractions() const
bool sameNrOfMSFilesPerFraction() const
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
size_t size() const noexcept
Definition ExposedVector.h:128
Options for loading files containing features.
Definition FeatureFileOptions.h:21
void setLoadConvexHull(bool convex)
void setLoadSubordinates(bool sub)
Base class for all feature grouping algorithms.
Definition FeatureGroupingAlgorithm.h:25
void transferSubelements(const std::vector< ConsensusMap > &maps, ConsensusMap &out) const
Transfers subelements (grouped features) from input consensus maps to the result consensus map.
virtual void group(const std::vector< FeatureMap > &maps, ConsensusMap &out)=0
Applies the algorithm. The features in the input maps are grouped and the output is written to the co...
A container for features.
Definition FeatureMap.h:82
void getPrimaryMSRunPath(StringList &toFill) const
get the file path to the first MS run
void updateRanges() override
An LC-MS feature.
Definition Feature.h:46
Facilitates file handling by file type recognition.
Definition FileHandler.h:45
void loadConsensusFeatures(const String &filename, ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a ConsensusMap.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
FeatureFileOptions & getFeatOptions()
Mutable access to the feature file options for loading/storing.
void storeConsensusFeatures(const String &filename, const ConsensusMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Store a ConsensusFeatureMap.
void loadFeatures(const String &filename, FeatureMap &map, const std::vector< FileTypes::Type > allowed_types={}, ProgressLogger::LogType log=ProgressLogger::NONE)
Loads a file into a FeatureMap.
void setFeatOptions(const FeatureFileOptions &)
set feature file options for loading/storing
Management and storage of parameters / INI files.
Definition Param.h:46
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:66
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
@ CMD
Command line progress.
Definition ProgressLogger.h:44
A more convenient string class.
Definition String.h:34
Base class for TOPP applications.
Definition TOPPBase.h:122
Param const & getParam_() const
Return all parameters relevant to this TOPP tool.
void registerInputFileList_(const String &name, const String &argument, const StringList &default_value, const String &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers a list of input files option.
void addEmptyLine_()
Adds an empty line between registered variables in the documentation.
void registerInputFile_(const String &name, const String &argument, const String &default_value, const String &description, bool required=true, bool advanced=false, const StringList &tags=StringList())
Registers an input file option.
bool getFlag_(const String &name) const
Returns the value of a previously registered flag.
void writeDebug_(const String &text, UInt min_level) const
Writes a string to the log file and to OPENMS_LOG_DEBUG if the debug level is at least min_level.
void setValidFormats_(const String &name, const std::vector< String > &formats, const bool force_OpenMS_format=true)
Sets the formats for a input/output file option or for all members of an input/output file lists.
void registerFlag_(const String &name, const String &description, bool advanced=false)
Registers a flag.
StringList getStringList_(const String &name) const
Returns the value of a previously registered StringList.
String getStringOption_(const String &name) const
Returns the value of a previously registered string option (use getOutputDirOption() for output direc...
virtual void registerOptionsAndFlags_()=0
Sets the valid command line options (with argument) and flags (without argument).
DataProcessing getProcessingInfo_(DataProcessing::ProcessingAction action) const
Returns the data processing information.
void registerOutputFile_(const String &name, const String &argument, const String &default_value, const String &description, bool required=true, bool advanced=false)
Registers an output file option.
@ ILLEGAL_PARAMETERS
Definition TOPPBase.h:140
@ EXECUTION_OK
Definition TOPPBase.h:134
void addDataProcessing_(ConsensusMap &map, const DataProcessing &dp) const
Data processing setter for consensus maps.
void writeLogError_(const String &text) const
Writes a string to the log file and to OPENMS_LOG_ERROR.
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition UniqueIdInterface.h:78
Size setUniqueId()
Assigns a new, valid unique id. Always returns 1.
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< String > StringList
Vector of String.
Definition ListUtils.h:44
const std::string ADDUCT_GROUP
Definition Constants.h:569
const std::string DC_CHARGE_ADDUCTS
Definition Constants.h:579
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
STL namespace.
Type
Actual file types enum.
Definition FileTypes.h:31
@ CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition FileTypes.h:39
@ FEATUREXML
OpenMS feature file (.featureXML)
Definition FileTypes.h:37