OpenMS
|
Used to load and store PepXML files. More...
#include <OpenMS/FORMAT/PepXMLFile.h>
Classes | |
struct | AminoAcidModification |
Public Member Functions | |
PepXMLFile () | |
Constructor. More... | |
~PepXMLFile () override | |
Destructor. More... | |
void | load (const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name, const SpectrumMetaDataLookup &lookup) |
Loads peptide sequences with modifications out of a PepXML file. More... | |
void | load (const String &filename, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, const String &experiment_name="") |
load function with empty defaults for some parameters (see above) More... | |
void | store (const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, const String &mz_file="", const String &mz_name="", bool peptideprophet_analyzed=false, double rt_tolerance=0.01) |
Stores idXML as PepXML file. More... | |
void | keepNativeSpectrumName (bool keep) |
Whether we should keep the native spectrum name of the pepXML. More... | |
void | setPreferredFixedModifications (const std::vector< const ResidueModification * > &mods) |
sets the preferred fixed modifications More... | |
void | setPreferredVariableModifications (const std::vector< const ResidueModification * > &mods) |
sets the preferred variable modifications More... | |
void | setParseUnknownScores (bool parse_unknown_scores) |
sets if during load, unknown scores should be parsed More... | |
Public Member Functions inherited from XMLFile | |
XMLFile () | |
Default constructor. More... | |
XMLFile (const String &schema_location, const String &version) | |
Constructor that sets the schema location. More... | |
virtual | ~XMLFile () |
Destructor. More... | |
bool | isValid (const String &filename, std::ostream &os) |
Checks if a file validates against the XML schema. More... | |
const String & | getVersion () const |
return the version of the schema More... | |
Protected Member Functions | |
void | endElement (const XMLCh *const, const XMLCh *const, const XMLCh *const qname) override |
Docu in base class. More... | |
void | startElement (const XMLCh *const, const XMLCh *const, const XMLCh *const qname, const xercesc::Attributes &attributes) override |
Docu in base class. More... | |
Protected Member Functions inherited from XMLHandler | |
void | writeUserParam_ (const String &tag_name, std::ostream &os, const MetaInfoInterface &meta, UInt indent) const |
Writes the content of MetaInfoInterface to the file. More... | |
Int | asInt_ (const String &in) const |
Conversion of a String to an integer value. More... | |
Int | asInt_ (const XMLCh *in) const |
Conversion of a Xerces string to an integer value. More... | |
UInt | asUInt_ (const String &in) const |
Conversion of a String to an unsigned integer value. More... | |
double | asDouble_ (const String &in) const |
Conversion of a String to a double value. More... | |
float | asFloat_ (const String &in) const |
Conversion of a String to a float value. More... | |
bool | asBool_ (const String &in) const |
Conversion of a string to a boolean value. More... | |
DateTime | asDateTime_ (String date_string) const |
Conversion of a xs:datetime string to a DateTime value. More... | |
bool | equal_ (const XMLCh *a, const XMLCh *b) const |
Returns if two Xerces strings are equal. More... | |
SignedSize | cvStringToEnum_ (const Size section, const String &term, const char *message, const SignedSize result_on_error=0) |
String | attributeAsString_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to a String. More... | |
Int | attributeAsInt_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to a Int. More... | |
double | attributeAsDouble_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to a double. More... | |
DoubleList | attributeAsDoubleList_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to a DoubleList. More... | |
IntList | attributeAsIntList_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to an IntList. More... | |
StringList | attributeAsStringList_ (const xercesc::Attributes &a, const char *name) const |
Converts an attribute to an StringList. More... | |
bool | optionalAttributeAsString_ (String &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the String value if the attribute is present. More... | |
bool | optionalAttributeAsInt_ (Int &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the Int value if the attribute is present. More... | |
bool | optionalAttributeAsUInt_ (UInt &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the UInt value if the attribute is present. More... | |
bool | optionalAttributeAsDouble_ (double &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the double value if the attribute is present. More... | |
bool | optionalAttributeAsDoubleList_ (DoubleList &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the DoubleList value if the attribute is present. More... | |
bool | optionalAttributeAsStringList_ (StringList &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the StringList value if the attribute is present. More... | |
bool | optionalAttributeAsIntList_ (IntList &value, const xercesc::Attributes &a, const char *name) const |
Assigns the attribute content to the IntList value if the attribute is present. More... | |
String | attributeAsString_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a String. More... | |
Int | attributeAsInt_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a Int. More... | |
double | attributeAsDouble_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a double. More... | |
DoubleList | attributeAsDoubleList_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a DoubleList. More... | |
IntList | attributeAsIntList_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a IntList. More... | |
StringList | attributeAsStringList_ (const xercesc::Attributes &a, const XMLCh *name) const |
Converts an attribute to a StringList. More... | |
bool | optionalAttributeAsString_ (String &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the String value if the attribute is present. More... | |
bool | optionalAttributeAsInt_ (Int &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the Int value if the attribute is present. More... | |
bool | optionalAttributeAsUInt_ (UInt &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the UInt value if the attribute is present. More... | |
bool | optionalAttributeAsDouble_ (double &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the double value if the attribute is present. More... | |
bool | optionalAttributeAsDoubleList_ (DoubleList &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the DoubleList value if the attribute is present. More... | |
bool | optionalAttributeAsIntList_ (IntList &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the IntList value if the attribute is present. More... | |
bool | optionalAttributeAsStringList_ (StringList &value, const xercesc::Attributes &a, const XMLCh *name) const |
Assigns the attribute content to the StringList value if the attribute is present. More... | |
XMLHandler (const String &filename, const String &version) | |
Default constructor. More... | |
~XMLHandler () override | |
Destructor. More... | |
void | reset () |
Release internal memory used for parsing (call. More... | |
void | fatalError (const xercesc::SAXParseException &exception) override |
void | error (const xercesc::SAXParseException &exception) override |
void | warning (const xercesc::SAXParseException &exception) override |
void | fatalError (ActionMode mode, const String &msg, UInt line=0, UInt column=0) const |
Fatal error handler. Throws a ParseError exception. More... | |
void | error (ActionMode mode, const String &msg, UInt line=0, UInt column=0) const |
Error handler for recoverable errors. More... | |
void | warning (ActionMode mode, const String &msg, UInt line=0, UInt column=0) const |
Warning handler. More... | |
void | characters (const XMLCh *const chars, const XMLSize_t length) override |
Parsing method for character data. More... | |
void | startElement (const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname, const xercesc::Attributes &attrs) override |
Parsing method for opening tags. More... | |
void | endElement (const XMLCh *const uri, const XMLCh *const localname, const XMLCh *const qname) override |
Parsing method for closing tags. More... | |
virtual void | writeTo (std::ostream &) |
Writes the contents to a stream. More... | |
virtual LOADDETAIL | getLoadDetail () const |
handler which support partial loading, implement this method More... | |
virtual void | setLoadDetail (const LOADDETAIL d) |
handler which support partial loading, implement this method More... | |
DataValue | cvParamToValue (const ControlledVocabulary &cv, const String &parent_tag, const String &accession, const String &name, const String &value, const String &unit_accession) const |
Convert the value of a <cvParam value=.> (as commonly found in PSI schemata) to the DataValue with the correct type (e.g. int) according to the type stored in the CV (usually PSI-MS CV), as well as set its unit. More... | |
DataValue | cvParamToValue (const ControlledVocabulary &cv, const CVTerm &raw_term) const |
Convert the value of a <cvParam value=.> (as commonly found in PSI schemata) to the DataValue with the correct type (e.g. int) according to the type stored in the CV (usually PSI-MS CV), as well as set its unit. More... | |
void | checkUniqueIdentifiers_ (const std::vector< ProteinIdentification > &prot_ids) const |
Protected Member Functions inherited from XMLFile | |
void | parse_ (const String &filename, XMLHandler *handler) |
Parses the XML file given by filename using the handler given by handler . More... | |
void | parseBuffer_ (const std::string &buffer, XMLHandler *handler) |
Parses the in-memory buffer given by buffer using the handler given by handler . More... | |
void | save_ (const String &filename, XMLHandler *handler) const |
Stores the contents of the XML handler given by handler in the file given by filename . More... | |
void | enforceEncoding_ (const String &encoding) |
XMLFile () | |
Default constructor. More... | |
XMLFile (const String &schema_location, const String &version) | |
Constructor that sets the schema location. More... | |
virtual | ~XMLFile () |
Destructor. More... | |
bool | isValid (const String &filename, std::ostream &os) |
Checks if a file validates against the XML schema. More... | |
const String & | getVersion () const |
return the version of the schema More... | |
Private Member Functions | |
void | makeScanMap_ () |
Fill scan_map_ . More... | |
void | readRTMZCharge_ (const xercesc::Attributes &attributes) |
Read RT, m/z, charge information from attributes of "spectrum_query". More... | |
bool | lookupAddFromHeader_ (double modification_mass, Size modification_position, std::vector< AminoAcidModification > const &header_mods) |
Private Attributes | |
std::vector< ProteinIdentification > * | proteins_ |
Pointer to the list of identified proteins. More... | |
std::vector< PeptideIdentification > * | peptides_ |
Pointer to the list of identified peptides. More... | |
const SpectrumMetaDataLookup * | lookup_ |
Pointer to wrapper for looking up spectrum meta data. More... | |
String | exp_name_ |
Name of the associated experiment (filename of the data file, extension will be removed) More... | |
String | search_engine_ |
Set name of search engine. More... | |
String | native_spectrum_name_ |
Several optional attributes of spectrum_query. More... | |
String | experiment_label_ |
String | swath_assay_ |
String | status_ |
bool | use_precursor_data_ {} |
Get RT and m/z for peptide ID from precursor scan (should only matter for RT)? More... | |
std::map< Size, Size > | scan_map_ |
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment. More... | |
Element | hydrogen_ |
Hydrogen data (for mass types) More... | |
bool | analysis_summary_ |
Are we currently in an "analysis_summary" element (should be skipped)? More... | |
bool | keep_native_name_ |
Whether we should keep the native spectrum name of the pepXML. More... | |
bool | search_score_summary_ |
Are we currently in an "search_score_summary" element (should be skipped)? More... | |
bool | search_summary_ {} |
Are we currently in an "search_summary" element (should be skipped)? More... | |
bool | wrong_experiment_ {} |
Do current entries belong to the experiment of interest (for pepXML files that bundle results from different experiments)? More... | |
bool | seen_experiment_ {} |
Have we seen the experiment of interest at all? More... | |
bool | checked_base_name_ {} |
Have we checked the "base_name" attribute in the "msms_run_summary" element? More... | |
bool | has_decoys_ {} |
Does the file have decoys (e.g. from Comet's internal decoy search) More... | |
bool | parse_unknown_scores_ {} |
Also parse unknown scores as metavalues? More... | |
String | decoy_prefix_ |
In case it has decoys, what is the prefix? More... | |
String | current_base_name_ |
current base name More... | |
std::vector< std::vector< ProteinIdentification >::iterator > | current_proteins_ |
References to currently active ProteinIdentifications. More... | |
ProteinIdentification::SearchParameters | params_ |
Search parameters of the current identification run. More... | |
String | enzyme_ |
Enzyme name associated with the current identification run. More... | |
String | enzyme_cuttingsite_ |
PeptideIdentification | current_peptide_ |
PeptideIdentification instance currently being processed. More... | |
PeptideHit::PepXMLAnalysisResult | current_analysis_result_ |
Analysis result instance currently being processed. More... | |
PeptideHit | peptide_hit_ |
PeptideHit instance currently being processed. More... | |
String | current_sequence_ |
Sequence of the current peptide hit. More... | |
double | rt_ {} |
RT and m/z of current PeptideIdentification (=spectrum) More... | |
double | mz_ {} |
Size | scannr_ {} |
1-based scan nr. of current PeptideIdentification (=spectrum). Scannr is usually from the start_scan attribute More... | |
Int | charge_ {} |
Precursor ion charge. More... | |
UInt | search_id_ {} |
ID of current search result. More... | |
String | prot_id_ |
Identifier linking PeptideIdentifications and ProteinIdentifications. More... | |
DateTime | date_ |
Date the pepXML file was generated. More... | |
double | hydrogen_mass_ {} |
Mass of a hydrogen atom (monoisotopic/average depending on case) More... | |
std::vector< std::pair< const ResidueModification *, Size > > | current_modifications_ |
The modifications of the current peptide hit (position is 1-based) More... | |
std::vector< AminoAcidModification > | fixed_modifications_ |
Fixed aminoacid modifications as parsed from the header. More... | |
std::vector< AminoAcidModification > | variable_modifications_ |
Variable aminoacid modifications as parsed from the header. More... | |
std::vector< const ResidueModification * > | preferred_fixed_modifications_ |
std::vector< const ResidueModification * > | preferred_variable_modifications_ |
Static Private Attributes | |
static const double | mod_tol_ |
static const double | xtandem_artificial_mod_tol_ |
Additional Inherited Members | |
Protected Types inherited from XMLHandler | |
enum | ActionMode { LOAD , STORE } |
Action to set the current mode (for error messages) More... | |
enum | LOADDETAIL { LD_ALLDATA , LD_RAWCOUNTS , LD_COUNTS_WITHOPTIONS } |
Static Protected Member Functions inherited from XMLHandler | |
static String | writeXMLEscape (const String &to_escape) |
Escapes a string and returns the escaped string. More... | |
static DataValue | fromXSDString (const String &type, const String &value) |
Convert an XSD type (e.g. 'xsd:double') to a DataValue. More... | |
Protected Attributes inherited from XMLHandler | |
String | file_ |
File name. More... | |
String | version_ |
Schema version. More... | |
StringManager | sm_ |
Helper class for string conversion. More... | |
std::vector< String > | open_tags_ |
Stack of open XML tags. More... | |
LOADDETAIL | load_detail_ |
parse only until total number of scans and chroms have been determined from attributes More... | |
std::vector< std::vector< String > > | cv_terms_ |
Array of CV term lists (one sublist denotes one term and it's children) More... | |
Protected Attributes inherited from XMLFile | |
String | schema_location_ |
XML schema file location. More... | |
String | schema_version_ |
Version string. More... | |
String | enforced_encoding_ |
Encoding string that replaces the encoding (system dependent or specified in the XML). Disabled if empty. Used as a workaround for XTandem output xml. More... | |
Used to load and store PepXML files.
This class is used to load and store documents that implement the schema of PepXML files.
A documented schema for this format comes with the TPP and can also be found at https://github.com/OpenMS/OpenMS/tree/develop/share/OpenMS/SCHEMAS
PepXMLFile | ( | ) |
Constructor.
|
override |
Destructor.
|
overrideprotected |
Docu in base class.
|
inline |
Whether we should keep the native spectrum name of the pepXML.
void load | ( | const String & | filename, |
std::vector< ProteinIdentification > & | proteins, | ||
std::vector< PeptideIdentification > & | peptides, | ||
const String & | experiment_name, | ||
const SpectrumMetaDataLookup & | lookup | ||
) |
Loads peptide sequences with modifications out of a PepXML file.
filename | PepXML file to load |
proteins | Protein identification output |
peptides | Peptide identification output |
experiment_name | Experiment file name, which is used to extract the corresponding search results from the PepXML file. |
lookup | Helper for looking up retention times (PepXML may contain only scan numbers). |
Exception::FileNotFound | is thrown if the file could not be opened |
Exception::ParseError | is thrown if an error occurs during parsing |
void load | ( | const String & | filename, |
std::vector< ProteinIdentification > & | proteins, | ||
std::vector< PeptideIdentification > & | peptides, | ||
const String & | experiment_name = "" |
||
) |
load function with empty defaults for some parameters (see above)
Exception::FileNotFound | is thrown if the file could not be opened |
Exception::ParseError | is thrown if an error occurs during parsing |
|
private |
looks up modification by modification_mass
and aminoacid of current_sequence_[ modification_position
] and adds it to the current_modifications_
|
private |
Fill scan_map_
.
|
private |
Read RT, m/z, charge information from attributes of "spectrum_query".
void setParseUnknownScores | ( | bool | parse_unknown_scores | ) |
sets if during load, unknown scores should be parsed
void setPreferredFixedModifications | ( | const std::vector< const ResidueModification * > & | mods | ) |
sets the preferred fixed modifications
void setPreferredVariableModifications | ( | const std::vector< const ResidueModification * > & | mods | ) |
sets the preferred variable modifications
|
overrideprotected |
Docu in base class.
void store | ( | const String & | filename, |
std::vector< ProteinIdentification > & | protein_ids, | ||
std::vector< PeptideIdentification > & | peptide_ids, | ||
const String & | mz_file = "" , |
||
const String & | mz_name = "" , |
||
bool | peptideprophet_analyzed = false , |
||
double | rt_tolerance = 0.01 |
||
) |
Stores idXML as PepXML file.
Exception::UnableToCreateFile | is thrown if the file could not be opened for writing |
|
private |
Are we currently in an "analysis_summary" element (should be skipped)?
|
private |
Have we checked the "base_name" attribute in the "msms_run_summary" element?
|
private |
Analysis result instance currently being processed.
|
private |
current base name
|
private |
The modifications of the current peptide hit (position is 1-based)
|
private |
PeptideIdentification instance currently being processed.
|
private |
References to currently active ProteinIdentifications.
|
private |
Sequence of the current peptide hit.
|
private |
In case it has decoys, what is the prefix?
|
private |
Enzyme name associated with the current identification run.
|
private |
|
private |
Name of the associated experiment (filename of the data file, extension will be removed)
|
private |
|
private |
Fixed aminoacid modifications as parsed from the header.
|
private |
Does the file have decoys (e.g. from Comet's internal decoy search)
|
private |
Hydrogen data (for mass types)
|
private |
Mass of a hydrogen atom (monoisotopic/average depending on case)
|
private |
Whether we should keep the native spectrum name of the pepXML.
|
private |
Pointer to wrapper for looking up spectrum meta data.
|
staticprivate |
|
private |
|
private |
Several optional attributes of spectrum_query.
|
private |
Search parameters of the current identification run.
|
private |
Also parse unknown scores as metavalues?
|
private |
PeptideHit instance currently being processed.
|
private |
Pointer to the list of identified peptides.
|
private |
Fixed modifications that should be preferred when parsing the header (e.g. when pepXML was produced through an adapter)
|
private |
Variable modifications that should be preferred when parsing the header (e.g. when pepXML was produced through an adapter)
|
private |
Identifier linking PeptideIdentifications and ProteinIdentifications.
|
private |
Pointer to the list of identified proteins.
|
private |
RT and m/z of current PeptideIdentification (=spectrum)
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
|
private |
1-based scan nr. of current PeptideIdentification (=spectrum). Scannr is usually from the start_scan attribute
|
private |
Set name of search engine.
|
private |
ID of current search result.
|
private |
Are we currently in an "search_score_summary" element (should be skipped)?
|
private |
Are we currently in an "search_summary" element (should be skipped)?
|
private |
Have we seen the experiment of interest at all?
|
private |
|
private |
|
private |
Get RT and m/z for peptide ID from precursor scan (should only matter for RT)?
|
private |
Variable aminoacid modifications as parsed from the header.
|
private |
Do current entries belong to the experiment of interest (for pepXML files that bundle results from different experiments)?
|
staticprivate |