OpenMS
|
Representation of a protein identification run. More...
#include <OpenMS/METADATA/ProteinIdentification.h>
Classes | |
struct | Mapping |
two way mapping from ms-run-path to protID|pepID-identifier More... | |
class | ProteinGroup |
Bundles multiple (e.g. indistinguishable) proteins in a group. More... | |
struct | SearchParameters |
Search parameters of the DB search. More... | |
Public Types | |
enum | PeakMassType { MONOISOTOPIC , AVERAGE , SIZE_OF_PEAKMASSTYPE } |
Peak mass type. More... | |
typedef ProteinHit | HitType |
Hit type definition. More... | |
Public Member Functions | |
Constructors, destructors, assignment operator <br> | |
ProteinIdentification () | |
Default constructor. More... | |
ProteinIdentification (const ProteinIdentification &)=default | |
Copy constructor. More... | |
ProteinIdentification (ProteinIdentification &&)=default | |
Move constructor. More... | |
virtual | ~ProteinIdentification () |
Destructor. More... | |
ProteinIdentification & | operator= (const ProteinIdentification &)=default |
Assignment operator. More... | |
ProteinIdentification & | operator= (ProteinIdentification &&)=default |
Move assignment operator. More... | |
bool | operator== (const ProteinIdentification &rhs) const |
Equality operator. More... | |
bool | operator!= (const ProteinIdentification &rhs) const |
Inequality operator. More... | |
Protein hit information (public members) | |
const std::vector< ProteinHit > & | getHits () const |
Returns the protein hits. More... | |
std::vector< ProteinHit > & | getHits () |
Returns the protein hits (mutable) More... | |
void | insertHit (const ProteinHit &input) |
Appends a protein hit. More... | |
void | insertHit (ProteinHit &&input) |
Appends a protein hit. More... | |
void | setHits (const std::vector< ProteinHit > &hits) |
Sets the protein hits. More... | |
std::vector< ProteinHit >::iterator | findHit (const String &accession) |
Finds a protein hit by accession (returns past-the-end iterator if not found) More... | |
const std::vector< ProteinGroup > & | getProteinGroups () const |
Returns the protein groups. More... | |
std::vector< ProteinGroup > & | getProteinGroups () |
Returns the protein groups (mutable) More... | |
void | insertProteinGroup (const ProteinGroup &group) |
Appends a new protein group. More... | |
const std::vector< ProteinGroup > & | getIndistinguishableProteins () const |
Returns the indistinguishable proteins. More... | |
std::vector< ProteinGroup > & | getIndistinguishableProteins () |
Returns the indistinguishable proteins (mutable) More... | |
void | insertIndistinguishableProteins (const ProteinGroup &group) |
Appends new indistinguishable proteins. More... | |
void | fillIndistinguishableGroupsWithSingletons () |
Appends singleton groups (with the current score) for every yet ungrouped protein hit. More... | |
double | getSignificanceThreshold () const |
Returns the protein significance threshold value. More... | |
void | setSignificanceThreshold (double value) |
Sets the protein significance threshold value. More... | |
const String & | getScoreType () const |
Returns the protein score type. More... | |
void | setScoreType (const String &type) |
Sets the protein score type. More... | |
bool | isHigherScoreBetter () const |
Returns true if a higher score represents a better score. More... | |
void | setHigherScoreBetter (bool higher_is_better) |
Sets the orientation of the score (is higher better?) More... | |
void | sort () |
Sorts the protein hits according to their score. More... | |
void | assignRanks () |
Sorts the protein hits by score and assigns ranks (best score has rank 1) More... | |
void | computeCoverage (const std::vector< PeptideIdentification > &pep_ids) |
Compute the coverage (in percent) of all ProteinHits given PeptideHits. More... | |
void | computeCoverage (const ConsensusMap &cmap, bool use_unassigned_ids) |
void | computeModifications (const std::vector< PeptideIdentification > &pep_ids, const StringList &skip_modifications) |
Compute the modifications of all ProteinHits given PeptideHits. More... | |
void | computeModifications (const ConsensusMap &cmap, const StringList &skip_modifications, bool use_unassigned_ids) |
General information | |
const DateTime & | getDateTime () const |
Returns the date of the protein identification run. More... | |
void | setDateTime (const DateTime &date) |
Sets the date of the protein identification run. More... | |
void | setSearchEngine (const String &search_engine) |
Sets the search engine type. More... | |
const String & | getSearchEngine () const |
Returns the type of search engine used. More... | |
const String | getOriginalSearchEngineName () const |
Return the type of search engine that was first applied (e.g., before percolator or consensusID) or "Unknown". More... | |
void | setSearchEngineVersion (const String &search_engine_version) |
Sets the search engine version. More... | |
const String & | getSearchEngineVersion () const |
Returns the search engine version. More... | |
void | setInferenceEngine (const String &search_engine) |
Sets the inference engine type. More... | |
const String | getInferenceEngine () const |
Returns the type of search engine used. More... | |
void | setInferenceEngineVersion (const String &inference_engine_version) |
Sets the search engine version. More... | |
const String | getInferenceEngineVersion () const |
Returns the search engine version. More... | |
void | setSearchParameters (const SearchParameters &search_parameters) |
Sets the search parameters. More... | |
void | setSearchParameters (SearchParameters &&search_parameters) |
Sets the search parameters (move) More... | |
const SearchParameters & | getSearchParameters () const |
Returns the search parameters. More... | |
SearchParameters & | getSearchParameters () |
Returns the search parameters (mutable) More... | |
const String & | getIdentifier () const |
Returns the identifier. More... | |
void | setIdentifier (const String &id) |
Sets the identifier. More... | |
void | setPrimaryMSRunPath (const StringList &s, bool raw=false) |
void | setPrimaryMSRunPath (const StringList &s, MSExperiment &e) |
set the file path to the primary MS run but try to use the mzML annotated in the MSExperiment. More... | |
void | addPrimaryMSRunPath (const String &s, bool raw=false) |
void | addPrimaryMSRunPath (const StringList &s, bool raw=false) |
void | getPrimaryMSRunPath (StringList &output, bool raw=false) const |
Size | nrPrimaryMSRunPaths (bool raw=false) const |
get the number of primary MS runs involve in this ID run More... | |
bool | hasInferenceData () const |
bool | hasInferenceEngineAsSearchEngine () const |
Checks if the search engine name matches an inference engine known to OpenMS. More... | |
bool | peptideIDsMergeable (const ProteinIdentification &id_run, const String &experiment_type) const |
std::vector< std::pair< String, String > > | getSearchEngineSettingsAsPairs (const String &se="") const |
void | copyMetaDataOnly (const ProteinIdentification &) |
Copies only metadata (no protein hits or protein groups) More... | |
Public Member Functions inherited from MetaInfoInterface | |
MetaInfoInterface () | |
Constructor. More... | |
MetaInfoInterface (const MetaInfoInterface &rhs) | |
Copy constructor. More... | |
MetaInfoInterface (MetaInfoInterface &&) noexcept | |
Move constructor. More... | |
~MetaInfoInterface () | |
Destructor. More... | |
MetaInfoInterface & | operator= (const MetaInfoInterface &rhs) |
Assignment operator. More... | |
MetaInfoInterface & | operator= (MetaInfoInterface &&) noexcept |
Move assignment operator. More... | |
void | swap (MetaInfoInterface &rhs) |
Swap contents. More... | |
bool | operator== (const MetaInfoInterface &rhs) const |
Equality operator. More... | |
bool | operator!= (const MetaInfoInterface &rhs) const |
Equality operator. More... | |
const DataValue & | getMetaValue (const String &name) const |
Returns the value corresponding to a string, or DataValue::EMPTY if not found. More... | |
DataValue | getMetaValue (const String &name, const DataValue &default_value) const |
Returns the value corresponding to a string, or a default value (e.g.: DataValue::EMPTY) if not found More... | |
const DataValue & | getMetaValue (UInt index) const |
Returns the value corresponding to the index, or DataValue::EMPTY if not found. More... | |
DataValue | getMetaValue (UInt index, const DataValue &default_value) const |
Returns the value corresponding to the index, or a default value (e.g.: DataValue::EMPTY) if not found More... | |
bool | metaValueExists (const String &name) const |
Returns whether an entry with the given name exists. More... | |
bool | metaValueExists (UInt index) const |
Returns whether an entry with the given index exists. More... | |
void | setMetaValue (const String &name, const DataValue &value) |
Sets the DataValue corresponding to a name. More... | |
void | setMetaValue (UInt index, const DataValue &value) |
Sets the DataValue corresponding to an index. More... | |
void | removeMetaValue (const String &name) |
Removes the DataValue corresponding to name if it exists. More... | |
void | removeMetaValue (UInt index) |
Removes the DataValue corresponding to index if it exists. More... | |
void | addMetaValues (const MetaInfoInterface &from) |
function to copy all meta values from one object to this one More... | |
void | getKeys (std::vector< String > &keys) const |
Fills the given vector with a list of all keys for which a value is set. More... | |
void | getKeys (std::vector< UInt > &keys) const |
Fills the given vector with a list of all keys for which a value is set. More... | |
bool | isMetaEmpty () const |
Returns if the MetaInfo is empty. More... | |
void | clearMetaInfo () |
Removes all meta values. More... | |
Static Public Attributes | |
static const std::string | NamesOfPeakMassType [SIZE_OF_PEAKMASSTYPE] |
Names corresponding to peak mass types. More... | |
Protected Attributes | |
General information (search engine, parameters and database) | |
String | id_ |
String | search_engine_ |
String | search_engine_version_ |
SearchParameters | search_parameters_ |
DateTime | date_ |
Protected Attributes inherited from MetaInfoInterface | |
MetaInfo * | meta_ |
Pointer to the MetaInfo object (0 by default) More... | |
Protein hit information (protected members) | |
String | protein_score_type_ |
bool | higher_score_better_ |
std::vector< ProteinHit > | protein_hits_ |
std::vector< ProteinGroup > | protein_groups_ |
std::vector< ProteinGroup > | indistinguishable_proteins_ |
Indistinguishable proteins: accessions [0] is "group leader", probability is meaningless. More... | |
double | protein_significance_threshold_ |
void | computeCoverageFromEvidenceMapping_ (const std::unordered_map< String, std::set< PeptideEvidence >> &map) |
void | fillEvidenceMapping_ (std::unordered_map< String, std::set< PeptideEvidence > > &map_acc_2_evidence, const std::vector< PeptideIdentification > &pep_ids) const |
void | fillModMapping_ (const std::vector< PeptideIdentification > &pep_ids, const StringList &skip_modifications, std::unordered_map< String, std::set< std::pair< Size, ResidueModification >>> &prot2mod) const |
Additional Inherited Members | |
Static Public Member Functions inherited from MetaInfoInterface | |
static MetaInfoRegistry & | metaRegistry () |
Returns a reference to the MetaInfoRegistry. More... | |
Protected Member Functions inherited from MetaInfoInterface | |
void | createIfNotExists_ () |
Creates the MetaInfo object if it does not exist. More... | |
Representation of a protein identification run.
This class stores the general information and the protein hits of a protein identification run.
The actual peptide hits are stored in PeptideIdentification instances that are part of spectra or features.
In order to be able to connect the ProteinIdentification and the corresponding peptide identifications, both classes have a string identifier. We recommend using the search engine name and the date as identifier. Setting this identifier is especially important when there are several protein identification runs for a map, i.e. several ProteinIdentification instances.
typedef ProteinHit HitType |
Hit type definition.
enum PeakMassType |
Default constructor.
|
default |
Copy constructor.
|
default |
Move constructor.
|
virtual |
Destructor.
void addPrimaryMSRunPath | ( | const String & | s, |
bool | raw = false |
||
) |
void addPrimaryMSRunPath | ( | const StringList & | s, |
bool | raw = false |
||
) |
void assignRanks | ( | ) |
Sorts the protein hits by score and assigns ranks (best score has rank 1)
void computeCoverage | ( | const ConsensusMap & | cmap, |
bool | use_unassigned_ids | ||
) |
void computeCoverage | ( | const std::vector< PeptideIdentification > & | pep_ids | ) |
Compute the coverage (in percent) of all ProteinHits given PeptideHits.
Exception::MissingInformation | if ProteinsHits do not have sequence information |
Does not return anything but stores the coverage inside the ProteinHit objects
|
private |
void computeModifications | ( | const ConsensusMap & | cmap, |
const StringList & | skip_modifications, | ||
bool | use_unassigned_ids | ||
) |
void computeModifications | ( | const std::vector< PeptideIdentification > & | pep_ids, |
const StringList & | skip_modifications | ||
) |
Compute the modifications of all ProteinHits given PeptideHits.
For every protein accession, the pair of position and modification is returned. Because fixed modifications might not be of interest, a list can be provided to skip those.
void copyMetaDataOnly | ( | const ProteinIdentification & | ) |
Copies only metadata (no protein hits or protein groups)
|
private |
void fillIndistinguishableGroupsWithSingletons | ( | ) |
Appends singleton groups (with the current score) for every yet ungrouped protein hit.
|
private |
std::vector<ProteinHit>::iterator findHit | ( | const String & | accession | ) |
Finds a protein hit by accession (returns past-the-end iterator if not found)
const DateTime& getDateTime | ( | ) | const |
Returns the date of the protein identification run.
std::vector<ProteinHit>& getHits | ( | ) |
Returns the protein hits (mutable)
const std::vector<ProteinHit>& getHits | ( | ) | const |
Returns the protein hits.
const String& getIdentifier | ( | ) | const |
Returns the identifier.
std::vector<ProteinGroup>& getIndistinguishableProteins | ( | ) |
Returns the indistinguishable proteins (mutable)
const std::vector<ProteinGroup>& getIndistinguishableProteins | ( | ) | const |
Returns the indistinguishable proteins.
const String getInferenceEngine | ( | ) | const |
Returns the type of search engine used.
const String getInferenceEngineVersion | ( | ) | const |
Returns the search engine version.
const String getOriginalSearchEngineName | ( | ) | const |
Return the type of search engine that was first applied (e.g., before percolator or consensusID) or "Unknown".
void getPrimaryMSRunPath | ( | StringList & | output, |
bool | raw = false |
||
) | const |
Get the file paths to the primary MS runs
[out] | output | The file paths |
raw | Get raw files (or equivalent) instead of mzMLs |
std::vector<ProteinGroup>& getProteinGroups | ( | ) |
Returns the protein groups (mutable)
const std::vector<ProteinGroup>& getProteinGroups | ( | ) | const |
Returns the protein groups.
const String& getScoreType | ( | ) | const |
Returns the protein score type.
const String& getSearchEngine | ( | ) | const |
Returns the type of search engine used.
std::vector<std::pair<String,String> > getSearchEngineSettingsAsPairs | ( | const String & | se = "" | ) | const |
Collects all search engine settings registered for the given search engine se
. If se
is empty, the main search engine is used, otherwise it will also search the metavalues.
const String& getSearchEngineVersion | ( | ) | const |
Returns the search engine version.
SearchParameters& getSearchParameters | ( | ) |
Returns the search parameters (mutable)
const SearchParameters& getSearchParameters | ( | ) | const |
Returns the search parameters.
double getSignificanceThreshold | ( | ) | const |
Returns the protein significance threshold value.
bool hasInferenceData | ( | ) | const |
Checks if this object has inference data. Looks for "InferenceEngine" metavalue. If not, falls back to old behaviour of reading the search engine name.
bool hasInferenceEngineAsSearchEngine | ( | ) | const |
Checks if the search engine name matches an inference engine known to OpenMS.
void insertHit | ( | const ProteinHit & | input | ) |
Appends a protein hit.
void insertHit | ( | ProteinHit && | input | ) |
Appends a protein hit.
void insertIndistinguishableProteins | ( | const ProteinGroup & | group | ) |
Appends new indistinguishable proteins.
void insertProteinGroup | ( | const ProteinGroup & | group | ) |
Appends a new protein group.
bool isHigherScoreBetter | ( | ) | const |
Returns true if a higher score represents a better score.
Size nrPrimaryMSRunPaths | ( | bool | raw = false | ) | const |
get the number of primary MS runs involve in this ID run
bool operator!= | ( | const ProteinIdentification & | rhs | ) | const |
Inequality operator.
|
default |
Assignment operator.
|
default |
Move assignment operator.
bool operator== | ( | const ProteinIdentification & | rhs | ) | const |
Equality operator.
bool peptideIDsMergeable | ( | const ProteinIdentification & | id_run, |
const String & | experiment_type | ||
) | const |
Checks if the peptide IDs of this IDRun are mergeable with another id_run
given an experiment_type
. Checks search engine and search engine settings.
void setDateTime | ( | const DateTime & | date | ) |
Sets the date of the protein identification run.
void setHigherScoreBetter | ( | bool | higher_is_better | ) |
Sets the orientation of the score (is higher better?)
void setHits | ( | const std::vector< ProteinHit > & | hits | ) |
Sets the protein hits.
IDFilter::updateProteinGroups
to update the groupings. void setIdentifier | ( | const String & | id | ) |
Sets the identifier.
void setInferenceEngine | ( | const String & | search_engine | ) |
Sets the inference engine type.
void setInferenceEngineVersion | ( | const String & | inference_engine_version | ) |
Sets the search engine version.
void setPrimaryMSRunPath | ( | const StringList & | s, |
bool | raw = false |
||
) |
Set the file paths to the primary MS runs (usually the mzML files obtained after data conversion from raw files)
s | The file paths |
raw | Store paths to the raw files (or equivalent) rather than mzMLs |
void setPrimaryMSRunPath | ( | const StringList & | s, |
MSExperiment & | e | ||
) |
set the file path to the primary MS run but try to use the mzML annotated in the MSExperiment.
void setScoreType | ( | const String & | type | ) |
Sets the protein score type.
void setSearchEngine | ( | const String & | search_engine | ) |
Sets the search engine type.
void setSearchEngineVersion | ( | const String & | search_engine_version | ) |
Sets the search engine version.
void setSearchParameters | ( | const SearchParameters & | search_parameters | ) |
Sets the search parameters.
void setSearchParameters | ( | SearchParameters && | search_parameters | ) |
Sets the search parameters (move)
void setSignificanceThreshold | ( | double | value | ) |
Sets the protein significance threshold value.
void sort | ( | ) |
Sorts the protein hits according to their score.
|
protected |
|
protected |
|
protected |
|
protected |
Indistinguishable proteins: accessions
[0] is "group leader", probability
is meaningless.
|
static |
Names corresponding to peak mass types.
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |