OpenMS
AccurateMassSearchEngine Class Reference

An algorithm to search for exact mass matches from a spectrum against a database (e.g. HMDB). More...

#include <OpenMS/ANALYSIS/ID/AccurateMassSearchEngine.h>

Inheritance diagram for AccurateMassSearchEngine:
[legend]
Collaboration diagram for AccurateMassSearchEngine:
[legend]

Classes

struct  CompareEntryAndMass_
 
struct  MappingEntry_
 

Public Member Functions

 AccurateMassSearchEngine ()
 Default constructor. More...
 
 ~AccurateMassSearchEngine () override
 Default destructor. More...
 
void queryByMZ (const double &observed_mz, const Int &observed_charge, const String &ion_mode, std::vector< AccurateMassSearchResult > &results, const EmpiricalFormula &observed_adduct=EmpiricalFormula()) const
 search for a specific observed mass by enumerating all possible adducts and search M+X against database. If use_feature_adducts is activated, queryByMZ uses annotated, observed adducts as EmpiricalFormulas, restricting M+X candidates. More...
 
void queryByFeature (const Feature &feature, const Size &feature_index, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
 
void queryByConsensusFeature (const ConsensusFeature &cfeat, const Size &cf_index, const Size &number_of_maps, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
 
void run (FeatureMap &, MzTab &) const
 
void run (FeatureMap &, MzTabM &) const
 
void run (ConsensusMap &, MzTab &) const
 
void init ()
 parse database and adduct files More...
 
- Public Member Functions inherited from DefaultParamHandler
 DefaultParamHandler (const String &name)
 Constructor with name that is displayed in error messages. More...
 
 DefaultParamHandler (const DefaultParamHandler &rhs)
 Copy constructor. More...
 
virtual ~DefaultParamHandler ()
 Destructor. More...
 
DefaultParamHandleroperator= (const DefaultParamHandler &rhs)
 Assignment operator. More...
 
virtual bool operator== (const DefaultParamHandler &rhs) const
 Equality operator. More...
 
void setParameters (const Param &param)
 Sets the parameters. More...
 
const ParamgetParameters () const
 Non-mutable access to the parameters. More...
 
const ParamgetDefaults () const
 Non-mutable access to the default parameters. More...
 
const StringgetName () const
 Non-mutable access to the name. More...
 
void setName (const String &name)
 Mutable access to the name. More...
 
const std::vector< String > & getSubsections () const
 Non-mutable access to the registered subsections. More...
 
- Public Member Functions inherited from ProgressLogger
 ProgressLogger ()
 Constructor. More...
 
virtual ~ProgressLogger ()
 Destructor. More...
 
 ProgressLogger (const ProgressLogger &other)
 Copy constructor. More...
 
ProgressLoggeroperator= (const ProgressLogger &other)
 Assignment Operator. More...
 
void setLogType (LogType type) const
 Sets the progress log that should be used. The default type is NONE! More...
 
LogType getLogType () const
 Returns the type of progress log being used. More...
 
void setLogger (ProgressLoggerImpl *logger)
 Sets the logger to be used for progress logging. More...
 
void startProgress (SignedSize begin, SignedSize end, const String &label) const
 Initializes the progress display. More...
 
void setProgress (SignedSize value) const
 Sets the current progress. More...
 
void endProgress (UInt64 bytes_processed=0) const
 
void nextProgress () const
 increment progress by 1 (according to range begin-end) More...
 

Static Public Attributes

static constexpr char search_engine_identifier [] = "AccurateMassSearchEngine"
 uses 'AccurateMassSearchEngine' as search engine id for protein and peptide ids which are generated by AMS More...
 

Protected Member Functions

void updateMembers_ () override
 This method is used to update extra member variables at the end of the setParameters() method. More...
 
- Protected Member Functions inherited from DefaultParamHandler
void defaultsToParam_ ()
 Updates the parameters after the defaults have been set in the constructor. More...
 

Private Types

typedef std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
 
typedef std::vector< std::vector< String > > MassIDMapping
 private member variables More...
 
typedef std::map< String, std::vector< String > > HMDBPropsMapping
 

Private Member Functions

template<typename MAPTYPE >
String resolveAutoMode_ (const MAPTYPE &map) const
 private member functions More...
 
void parseMappingFile_ (const StringList &)
 
void parseStructMappingFile_ (const StringList &)
 
void parseAdductsFile_ (const String &filename, std::vector< AdductInfo > &result)
 
void searchMass_ (double neutral_query_mass, double diff_mass, std::pair< Size, Size > &hit_indices) const
 
void annotate_ (const std::vector< AccurateMassSearchResult > &, BaseFeature &) const
 Add search results to a Consensus/Feature. More...
 
std::vector< AccurateMassSearchResultextractQueryResults_ (const Feature &feature, const Size &feature_index, const String &ion_mode_internal, Size &dummy_count) const
 Extract query results from feature. More...
 
void addMatchesToID_ (IdentificationData &id, const std::vector< AccurateMassSearchResult > &amr, const IdentificationData::InputFileRef &file_ref, const IdentificationData::ScoreTypeRef &mass_error_ppm_score_ref, const IdentificationData::ScoreTypeRef &mass_error_Da_score_ref, const IdentificationData::ProcessingStepRef &step_ref, BaseFeature &f) const
 Add resulting matches to IdentificationData. More...
 
double computeCosineSim_ (const std::vector< double > &x, const std::vector< double > &y) const
 
double computeIsotopePatternSimilarity_ (const Feature &feat, const EmpiricalFormula &form) const
 
void exportMzTab_ (const QueryResultsTable &overall_results, const Size number_of_maps, MzTab &mztab_out, const std::vector< String > &file_locations) const
 
void exportMzTabM_ (const FeatureMap &fmap, MzTabM &mztabm_out) const
 

Private Attributes

std::vector< MappingEntry_mass_mappings_
 
HMDBPropsMapping hmdb_properties_mapping_
 
bool is_initialized_
 true if init_() was called without any subsequent param changes More...
 
bool legacyID_ = true
 
double mass_error_value_
 parameter stuff More...
 
String mass_error_unit_
 
String ion_mode_
 
bool iso_similarity_
 
String pos_adducts_fname_
 
String neg_adducts_fname_
 
StringList db_mapping_file_
 
StringList db_struct_file_
 
std::vector< AdductInfopos_adducts_
 
std::vector< AdductInfoneg_adducts_
 
String database_name_
 
String database_version_
 
String database_location_
 
bool keep_unidentified_masses_
 

Additional Inherited Members

- Public Types inherited from ProgressLogger
enum  LogType { CMD , GUI , NONE }
 Possible log types. More...
 
- Static Public Member Functions inherited from DefaultParamHandler
static void writeParametersToMetaValues (const Param &write_this, MetaInfoInterface &write_here, const String &key_prefix="")
 Writes all parameters to meta values. More...
 
- Protected Attributes inherited from DefaultParamHandler
Param param_
 Container for current parameters. More...
 
Param defaults_
 Container for default parameters. This member should be filled in the constructor of derived classes! More...
 
std::vector< Stringsubsections_
 Container for registered subsections. This member should be filled in the constructor of derived classes! More...
 
String error_name_
 Name that is displayed in error messages during the parameter checking. More...
 
bool check_defaults_
 If this member is set to false no checking if parameters in done;. More...
 
bool warn_empty_defaults_
 If this member is set to false no warning is emitted when defaults are empty;. More...
 
- Protected Attributes inherited from ProgressLogger
LogType type_
 
time_t last_invoke_
 
ProgressLoggerImplcurrent_logger_
 
- Static Protected Attributes inherited from ProgressLogger
static int recursion_depth_
 

Detailed Description

An algorithm to search for exact mass matches from a spectrum against a database (e.g. HMDB).

For each peak, neutral masses are reconstructed from observed (spectrum) m/z values by enumerating all possible adducts with matching charge. The resulting neutral masses (can be more than one, depending on list of possible adducts) are matched against masses from a database within a certain mass error (Da or ppm).

Supports any database which contains an identifier, chemical sum formula and (optional) mass. If masses in the database are not given (= set to 0), they are computed from sum formulas.

Both positive and negative ion mode is supported. Charge for (Consensus-)Features can be either positive or negative, but only the absolute value is used since many FeatureFinders will only report positive charges even in negative ion mode. Entities with charge=0 are treated as "unknown charge" and are tested with all potential adducts and subsequently matched against the database.

A file with a list of potential adducts can be given for each mode separately. Each line contains a chemical formula (plus quantor) and a charge (separated by semicolon), e.g. M+H;1+ The M can be preceded by a quantor (e.g.2M, 3M), implicitly assumed as 1. The chemical formula can contain multiple segments, separated by + or - operators, e.g. M+H-H2O;+1 (water loss in positive mode). Brackets are implicit per segment, i.e. M+H-H2O is parsed as M + (H) - (H2O). Each segment can also be preceded by a quantor, e.g. M+H-H2O would parse as M + (H) - 2x(H2O). If debug mode is enabled, the masses of each segment are printed for verification. In particular, typing H20 (twenty H) is different from H2O (water).

Ionization mode of the observed m/z values can be determined automatically if the input map (either FeatureMap or ConsensusMap) is annotated with a meta value, as done by FeatureFinderMetabo.


Class Documentation

◆ OpenMS::AccurateMassSearchEngine::MappingEntry_

struct OpenMS::AccurateMassSearchEngine::MappingEntry_
Collaboration diagram for AccurateMassSearchEngine::MappingEntry_:
[legend]
Class Members
String formula
double mass
vector< String > massIDs

Member Typedef Documentation

◆ HMDBPropsMapping

typedef std::map<String, std::vector<String> > HMDBPropsMapping
private

◆ MassIDMapping

typedef std::vector<std::vector<String> > MassIDMapping
private

private member variables

◆ QueryResultsTable

typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable
private

Constructor & Destructor Documentation

◆ AccurateMassSearchEngine()

Default constructor.

◆ ~AccurateMassSearchEngine()

~AccurateMassSearchEngine ( )
override

Default destructor.

Member Function Documentation

◆ addMatchesToID_()

void addMatchesToID_ ( IdentificationData id,
const std::vector< AccurateMassSearchResult > &  amr,
const IdentificationData::InputFileRef file_ref,
const IdentificationData::ScoreTypeRef mass_error_ppm_score_ref,
const IdentificationData::ScoreTypeRef mass_error_Da_score_ref,
const IdentificationData::ProcessingStepRef step_ref,
BaseFeature f 
) const
private

Add resulting matches to IdentificationData.

◆ annotate_()

void annotate_ ( const std::vector< AccurateMassSearchResult > &  ,
BaseFeature  
) const
private

Add search results to a Consensus/Feature.

◆ computeCosineSim_()

double computeCosineSim_ ( const std::vector< double > &  x,
const std::vector< double > &  y 
) const
private

For two vectors of identical length, compute the cosine of the angle between them. Since we look at the angle, scaling of the vectors does not change the result (when ignoring numerical instability).

◆ computeIsotopePatternSimilarity_()

double computeIsotopePatternSimilarity_ ( const Feature feat,
const EmpiricalFormula form 
) const
private

◆ exportMzTab_()

void exportMzTab_ ( const QueryResultsTable overall_results,
const Size  number_of_maps,
MzTab mztab_out,
const std::vector< String > &  file_locations 
) const
private

◆ exportMzTabM_()

void exportMzTabM_ ( const FeatureMap fmap,
MzTabM mztabm_out 
) const
private

◆ extractQueryResults_()

std::vector<AccurateMassSearchResult> extractQueryResults_ ( const Feature feature,
const Size feature_index,
const String ion_mode_internal,
Size dummy_count 
) const
private

Extract query results from feature.

◆ init()

void init ( )

parse database and adduct files

◆ parseAdductsFile_()

void parseAdductsFile_ ( const String filename,
std::vector< AdductInfo > &  result 
)
private

◆ parseMappingFile_()

void parseMappingFile_ ( const StringList )
private

◆ parseStructMappingFile_()

void parseStructMappingFile_ ( const StringList )
private

◆ queryByConsensusFeature()

void queryByConsensusFeature ( const ConsensusFeature cfeat,
const Size cf_index,
const Size number_of_maps,
const String ion_mode,
std::vector< AccurateMassSearchResult > &  results 
) const

◆ queryByFeature()

void queryByFeature ( const Feature feature,
const Size feature_index,
const String ion_mode,
std::vector< AccurateMassSearchResult > &  results 
) const

◆ queryByMZ()

void queryByMZ ( const double &  observed_mz,
const Int observed_charge,
const String ion_mode,
std::vector< AccurateMassSearchResult > &  results,
const EmpiricalFormula observed_adduct = EmpiricalFormula() 
) const

search for a specific observed mass by enumerating all possible adducts and search M+X against database. If use_feature_adducts is activated, queryByMZ uses annotated, observed adducts as EmpiricalFormulas, restricting M+X candidates.

◆ resolveAutoMode_()

String resolveAutoMode_ ( const MAPTYPE &  map) const
inlineprivate

private member functions

if ion-mode is auto, this will set the internal mode according to input data

Exceptions
InvalidParameterif ion mode cannot be resolved

References File::basename(), and OPENMS_LOG_INFO.

◆ run() [1/3]

void run ( ConsensusMap ,
MzTab  
) const

main method of AccurateMassSearchEngine input map is not const, since it will get annotated with results

Note
Call init() before calling run!

◆ run() [2/3]

void run ( FeatureMap ,
MzTab  
) const

main method of AccurateMassSearchEngine input map is not const, since it will get annotated with results

◆ run() [3/3]

void run ( FeatureMap ,
MzTabM  
) const

◆ searchMass_()

void searchMass_ ( double  neutral_query_mass,
double  diff_mass,
std::pair< Size, Size > &  hit_indices 
) const
private

◆ updateMembers_()

void updateMembers_ ( )
overrideprotectedvirtual

This method is used to update extra member variables at the end of the setParameters() method.

Also call it at the end of the derived classes' copy constructor and assignment operator.

The default implementation is empty.

Reimplemented from DefaultParamHandler.

Member Data Documentation

◆ database_location_

String database_location_
private

◆ database_name_

String database_name_
private

◆ database_version_

String database_version_
private

◆ db_mapping_file_

StringList db_mapping_file_
private

◆ db_struct_file_

StringList db_struct_file_
private

◆ hmdb_properties_mapping_

HMDBPropsMapping hmdb_properties_mapping_
private

◆ ion_mode_

String ion_mode_
private

◆ is_initialized_

bool is_initialized_
private

true if init_() was called without any subsequent param changes

◆ iso_similarity_

bool iso_similarity_
private

◆ keep_unidentified_masses_

bool keep_unidentified_masses_
private

◆ legacyID_

bool legacyID_ = true
private

◆ mass_error_unit_

String mass_error_unit_
private

◆ mass_error_value_

double mass_error_value_
private

parameter stuff

◆ mass_mappings_

std::vector<MappingEntry_> mass_mappings_
private

◆ neg_adducts_

std::vector<AdductInfo> neg_adducts_
private

◆ neg_adducts_fname_

String neg_adducts_fname_
private

◆ pos_adducts_

std::vector<AdductInfo> pos_adducts_
private

◆ pos_adducts_fname_

String pos_adducts_fname_
private

◆ search_engine_identifier

constexpr char search_engine_identifier[] = "AccurateMassSearchEngine"
staticconstexpr

uses 'AccurateMassSearchEngine' as search engine id for protein and peptide ids which are generated by AMS