OpenMS  2.5.0
Classes | Public Types | Public Member Functions | Static Public Member Functions | Private Member Functions | Private Attributes | List of all members
IDBoostGraph Class Reference

Creates and maintains a boost graph based on the OpenMS ID datastructures. More...

#include <OpenMS/ANALYSIS/ID/IDBoostGraph.h>

Classes

class  dfs_ccsplit_visitor
 A boost dfs visitor that copies connected components into a vector of graphs. More...
 
class  GetPosteriorVisitor
 
class  LabelVisitor
 Visits nodes in the boost graph (ptrs to an ID Object) and depending on their type creates a label. More...
 
class  PrintAddressVisitor
 
class  SetPosteriorVisitor
 

Public Types

typedef boost::variant< ProteinHit *, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit * > IDPointer
 placeholder for peptides with the same parent proteins or protein groups More...
 
typedef boost::variant< const ProteinHit *, const ProteinGroup *, const PeptideCluster *, const Peptide, const RunIndex, const Charge, const PeptideHit * > IDPointerConst
 
typedef boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointerGraph
 
typedef std::vector< GraphGraphs
 
typedef boost::adjacency_list< boost::setS, boost::vecS, boost::undirectedS, IDPointerGraphConst
 
typedef boost::graph_traits< Graph >::vertex_descriptor vertex_t
 
typedef boost::graph_traits< Graph >::edge_descriptor edge_t
 
typedef std::set< IDBoostGraph::vertex_tProteinNodeSet
 
typedef std::set< IDBoostGraph::vertex_tPeptideNodeSet
 

Public Member Functions

 IDBoostGraph (ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, bool use_run_info, const boost::optional< const ExperimentalDesign > &ed=boost::optional< const ExperimentalDesign >())
 Constructors. More...
 
 IDBoostGraph (ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_run_info, bool use_unassigned_ids, const boost::optional< const ExperimentalDesign > &ed=boost::optional< const ExperimentalDesign >())
 
void applyFunctorOnCCs (const std::function< unsigned long(Graph &)> &functor)
 Do sth on connected components (your functor object has to inherit from std::function or be a lambda) More...
 
void applyFunctorOnCCsST (const std::function< void(Graph &)> &functor)
 Do sth on connected components single threaded (your functor object has to inherit from std::function or be a lambda) More...
 
void clusterIndistProteinsAndPeptides ()
 
void clusterIndistProteinsAndPeptidesAndExtendGraph ()
 (under development) As above but adds charge, replicate and sequence layer of nodes (untested) More...
 
void annotateIndistProteins (bool addSingletons=true)
 
void calculateAndAnnotateIndistProteins (bool addSingletons=true)
 
void computeConnectedComponents ()
 Splits the initialized graph into connected components and clears it. More...
 
Size getNrConnectedComponents ()
 Zero means the graph was not split yet. More...
 
const GraphgetComponent (Size cc)
 
ProteinIdentificationgetProteinIDs ()
 

Static Public Member Functions

static void printGraph (std::ostream &out, const Graph &fg)
 

Private Member Functions

vertex_t addVertexWithLookup_ (IDPointer &ptr, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map)
 
void annotateIndistProteins_ (const Graph &fg, bool addSingletons)
 internal function to annotate the underlying ID structures based on the given Graph More...
 
void calculateAndAnnotateIndistProteins_ (const Graph &fg, bool addSingletons)
 
void buildGraph_ (ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms)
 
void buildGraph_ (ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids)
 
void addPeptideIDWithAssociatedProteins_ (PeptideIdentification &spectrum, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, const std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
 Used during building. More...
 
void addPeptideAndAssociatedProteinsWithRunInfo_ (PeptideIdentification &spectrum, std::unordered_map< unsigned, unsigned > &indexToPrefractionationGroup, std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &vertex_map, std::unordered_map< std::string, ProteinHit * > &accession_map, Size use_top_psms)
 
void buildGraphWithRunInfo_ (ProteinIdentification &proteins, ConsensusMap &cmap, Size use_top_psms, bool use_unassigned_ids, const ExperimentalDesign &ed)
 
void buildGraphWithRunInfo_ (ProteinIdentification &proteins, std::vector< PeptideIdentification > &idedSpectra, Size use_top_psms, const ExperimentalDesign &ed)
 
void getUpstreamNodesNonRecursive (std::queue< vertex_t > &q, Graph graph, int lvl, bool stop_at_first, std::vector< vertex_t > &result)
 
void resolveGraphPeptideCentric_ (Graph &fg)
 
template<class NodeType >
void getDownstreamNodes (vertex_t start, Graph graph, std::vector< NodeType > &result)
 
template<class NodeType >
void getUpstreamNodes (vertex_t start, Graph graph, std::vector< NodeType > &result)
 

Private Attributes

ProteinIdentificationprotIDs_
 
Graph g
 the initial boost Graph (will be cleared when split into CCs) More...
 
Graphs ccs_
 the Graph split into connected components More...
 
std::unordered_map< vertex_t, SizepepHitVtx_to_run_
 
Size nrPrefractionationGroups_ = 0
 

Detailed Description

Creates and maintains a boost graph based on the OpenMS ID datastructures.

For finding connected components and applying functions to them. Currently assumes that all PeptideIdentifications are from the ProteinID run that is given. Please make sure this is right. VERY IMPORTANT NOTE: If you add Visitors here, make sure they do not touch members of the underlying ID objects that are responsible for the graph structure. E.g. the (protein/peptide)_hits vectors or the lists in ProteinGroups. You can set information like scores or metavalues, though.

Member Typedef Documentation

◆ edge_t

typedef boost::graph_traits<Graph>::edge_descriptor edge_t

◆ Graph

typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, IDPointer> Graph

◆ GraphConst

typedef boost::adjacency_list<boost::setS, boost::vecS, boost::undirectedS, IDPointer> GraphConst

◆ Graphs

typedef std::vector<Graph> Graphs

◆ IDPointer

typedef boost::variant<ProteinHit*, ProteinGroup, PeptideCluster, Peptide, RunIndex, Charge, PeptideHit*> IDPointer

placeholder for peptides with the same parent proteins or protein groups

indistinguishable protein groups an (currently unmodified) peptide sequence in which run a PSM was observed in which charge state a PSM was observed

◆ IDPointerConst

typedef boost::variant<const ProteinHit*, const ProteinGroup*, const PeptideCluster*, const Peptide, const RunIndex, const Charge, const PeptideHit*> IDPointerConst

◆ PeptideNodeSet

◆ ProteinNodeSet

◆ vertex_t

typedef boost::graph_traits<Graph>::vertex_descriptor vertex_t

Constructor & Destructor Documentation

◆ IDBoostGraph() [1/2]

IDBoostGraph ( ProteinIdentification proteins,
std::vector< PeptideIdentification > &  idedSpectra,
Size  use_top_psms,
bool  use_run_info,
const boost::optional< const ExperimentalDesign > &  ed = boost::optional< const ExperimentalDesign >() 
)

Constructors.

◆ IDBoostGraph() [2/2]

IDBoostGraph ( ProteinIdentification proteins,
ConsensusMap cmap,
Size  use_top_psms,
bool  use_run_info,
bool  use_unassigned_ids,
const boost::optional< const ExperimentalDesign > &  ed = boost::optional< const ExperimentalDesign >() 
)

Member Function Documentation

◆ addPeptideAndAssociatedProteinsWithRunInfo_()

void addPeptideAndAssociatedProteinsWithRunInfo_ ( PeptideIdentification spectrum,
std::unordered_map< unsigned, unsigned > &  indexToPrefractionationGroup,
std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &  vertex_map,
std::unordered_map< std::string, ProteinHit * > &  accession_map,
Size  use_top_psms 
)
private

◆ addPeptideIDWithAssociatedProteins_()

void addPeptideIDWithAssociatedProteins_ ( PeptideIdentification spectrum,
std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &  vertex_map,
const std::unordered_map< std::string, ProteinHit * > &  accession_map,
Size  use_top_psms 
)
private

Used during building.

◆ addVertexWithLookup_()

vertex_t addVertexWithLookup_ ( IDPointer ptr,
std::unordered_map< IDPointer, vertex_t, boost::hash< IDPointer >> &  vertex_map 
)
private

helper function to add a vertex if it is not present yet, otherwise return the present one needs a temporary filled vertex_map that is modifiable

◆ annotateIndistProteins()

void annotateIndistProteins ( bool  addSingletons = true)

Annotate indistinguishable proteins by adding the groups to the underlying ProteinIdentification::ProteinGroups object. This has no effect on the graph itself.

Precondition
Graph must contain ProteinGroup nodes (e.g. with clusterIndistProteinsAndPeptides). Otherwise it does nothing and you should use calculateAndAnnotateIndistProteins instead.
Parameters
addSingletonsif you want to annotate groups with just one protein entry

◆ annotateIndistProteins_()

void annotateIndistProteins_ ( const Graph fg,
bool  addSingletons 
)
private

internal function to annotate the underlying ID structures based on the given Graph

◆ applyFunctorOnCCs()

void applyFunctorOnCCs ( const std::function< unsigned long(Graph &)> &  functor)

Do sth on connected components (your functor object has to inherit from std::function or be a lambda)

◆ applyFunctorOnCCsST()

void applyFunctorOnCCsST ( const std::function< void(Graph &)> &  functor)

Do sth on connected components single threaded (your functor object has to inherit from std::function or be a lambda)

◆ buildGraph_() [1/2]

void buildGraph_ ( ProteinIdentification proteins,
ConsensusMap cmap,
Size  use_top_psms,
bool  use_unassigned_ids 
)
private

◆ buildGraph_() [2/2]

void buildGraph_ ( ProteinIdentification proteins,
std::vector< PeptideIdentification > &  idedSpectra,
Size  use_top_psms 
)
private

Initialize and store the graph IMPORTANT: Once the graph is built, editing members like (protein/peptide)_hits_ will invalidate it!

Parameters
proteinProteinIdentification object storing IDs and groups
idedSpectravector of ProteinIdentifications with links to the proteins and PSMs in its PeptideHits
use_top_psmsNr of top PSMs used per spectrum (<= 0 means all)
Todo:
we could include building the graph in important "main" functions like inferPosteriors to make the methods safer, but it is also nice to be able to reuse the graph

◆ buildGraphWithRunInfo_() [1/2]

void buildGraphWithRunInfo_ ( ProteinIdentification proteins,
ConsensusMap cmap,
Size  use_top_psms,
bool  use_unassigned_ids,
const ExperimentalDesign ed 
)
private

Initialize and store the graph. Also stores run information to later group peptides more efficiently. IMPORTANT: Once the graph is built, editing members like (protein/peptide)_hits_ will invalidate it!

Parameters
use_top_psmsNr of top PSMs used per spectrum (<= 0 means all)
Todo:
we could include building the graph in important "main" functions like inferPosteriors to make the methods safer, but it is also nice to be able to reuse the graph

◆ buildGraphWithRunInfo_() [2/2]

void buildGraphWithRunInfo_ ( ProteinIdentification proteins,
std::vector< PeptideIdentification > &  idedSpectra,
Size  use_top_psms,
const ExperimentalDesign ed 
)
private

◆ calculateAndAnnotateIndistProteins()

void calculateAndAnnotateIndistProteins ( bool  addSingletons = true)

Annotate indistinguishable proteins by adding the groups to the underlying ProteinIdentification::ProteinGroups object. This has no effect on the graph itself.

Parameters
addSingletonsif you want to annotate groups with just one protein entry

◆ calculateAndAnnotateIndistProteins_()

void calculateAndAnnotateIndistProteins_ ( const Graph fg,
bool  addSingletons 
)
private

◆ clusterIndistProteinsAndPeptides()

void clusterIndistProteinsAndPeptides ( )

Add intermediate nodes to the graph that represent indist. protein groups and peptides with the same parents this will save computation time and oscillations later on.

◆ clusterIndistProteinsAndPeptidesAndExtendGraph()

void clusterIndistProteinsAndPeptidesAndExtendGraph ( )

(under development) As above but adds charge, replicate and sequence layer of nodes (untested)

◆ computeConnectedComponents()

void computeConnectedComponents ( )

Splits the initialized graph into connected components and clears it.

◆ getComponent()

const Graph& getComponent ( Size  cc)

◆ getDownstreamNodes()

void getDownstreamNodes ( vertex_t  start,
Graph  graph,
std::vector< NodeType > &  result 
)
inlineprivate

◆ getNrConnectedComponents()

Size getNrConnectedComponents ( )

Zero means the graph was not split yet.

◆ getProteinIDs()

ProteinIdentification& getProteinIDs ( )

◆ getUpstreamNodes()

void getUpstreamNodes ( vertex_t  start,
Graph  graph,
std::vector< NodeType > &  result 
)
inlineprivate

◆ getUpstreamNodesNonRecursive()

void getUpstreamNodesNonRecursive ( std::queue< vertex_t > &  q,
Graph  graph,
int  lvl,
bool  stop_at_first,
std::vector< vertex_t > &  result 
)
private

◆ printGraph()

static void printGraph ( std::ostream &  out,
const Graph fg 
)
static

◆ resolveGraphPeptideCentric_()

void resolveGraphPeptideCentric_ ( Graph fg)
private

Member Data Documentation

◆ ccs_

Graphs ccs_
private

the Graph split into connected components

◆ g

Graph g
private

the initial boost Graph (will be cleared when split into CCs)

◆ nrPrefractionationGroups_

Size nrPrefractionationGroups_ = 0
private

this basically stores the number of different values in the pepHitVtx_to_run a Prefractionation group (previously called run) is a unique combination of all non-fractionation related entries in the exp. design i.e. one (sub-)experiment before fractionation

◆ pepHitVtx_to_run_

std::unordered_map<vertex_t, Size> pepHitVtx_to_run_
private

if a graph is built with run information, this will store the run, each peptide hit vertex belongs to. Important for extending the graph.

◆ protIDs_

ProteinIdentification& protIDs_
private