21 #include <OpenMS/config.h>
27 #include <unordered_set>
74 template<
class HitType>
81 HasGoodScore(
double score_,
bool higher_score_better_) : score(score_), higher_score_better(higher_score_better_)
87 if (higher_score_better)
89 return hit.getScore() >= score;
91 return hit.getScore() <= score;
100 template<
class HitType>
110 throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"The cut-off value for rank filtering must not be zero!");
116 Size hit_rank = hit.getRank();
121 return hit_rank <= rank;
130 template<
class HitType>
148 return found == value;
153 template<
class HitType>
169 return double(found) <= value;
174 template<
class HitType>
189 return target_decoy(hit) || is_decoy(hit);
198 template<
class HitType>
212 if (accessions.count(it) > 0)
234 template<
class HitType>
248 if (accessions.count(it) > 0)
270 template<
class HitType,
class Entry>
278 for (
typename std::vector<Entry>::iterator rec_it = records.begin(); rec_it != records.end(); ++rec_it)
280 items[getKey(*rec_it)] = &(*rec_it);
295 return items.count(getHitKey(hit)) > 0;
305 if (!exists(evidence))
307 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Accession: '" + getHitKey(evidence) +
"'. peptide evidence accession not in data");
309 return *(items.find(getHitKey(evidence))->second);
324 struct HasMinPeptideLength;
330 struct HasLowMZError;
337 struct HasMatchingModification;
344 struct HasMatchingSequence;
347 struct HasNoEvidence;
377 const auto& fun = [&](
const Int missed_cleavages) {
378 bool max_filter = max_cleavages_ != disabledValue() ? missed_cleavages > max_cleavages_ :
false;
379 bool min_filter = min_cleavages_ != disabledValue() ? missed_cleavages < min_cleavages_ :
false;
380 return max_filter || min_filter;
387 hits.erase(std::remove_if(hits.begin(), hits.end(), (*
this)), hits.end());
407 accession_resolver_(entries), digestion_(digestion), ignore_missed_cleavages_(ignore_missed_cleavages), methionine_cleavage_(methionine_cleavage)
419 if (accession_resolver_.
exists(evidence))
422 ignore_missed_cleavages_, methionine_cleavage_);
428 OPENMS_LOG_WARN <<
"Peptide accession not available! Skipping Evidence." << std::endl;
440 IDFilter::FilterPeptideEvidences<IDFilter::DigestionFilter>(*
this, peptides);
451 template<
class IdentificationType>
457 return id.getHits().empty();
484 template<
class Container,
class Predicate>
487 items.erase(std::remove_if(items.begin(), items.end(), pred), items.end());
491 template<
class Container,
class Predicate>
494 items.erase(std::remove_if(items.begin(), items.end(), std::not_fn(pred)), items.end());
498 template<
class Container,
class Predicate>
501 auto part = std::partition(items.begin(), items.end(), std::not_fn(pred));
502 std::move(part, items.end(), std::back_inserter(target));
503 items.erase(part, items.end());
507 template<
class IDContainer,
class Predicate>
510 for (
auto& item : items)
512 removeMatchingItems(item.getHits(), pred);
517 template<
class IDContainer,
class Predicate>
520 for (
auto& item : items)
522 keepMatchingItems(item.getHits(), pred);
526 template<
class MapType,
class Predicate>
529 for (
auto& feat : prot_and_pep_ids)
531 keepMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
533 keepMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
536 template<
class MapType,
class Predicate>
539 for (
auto& feat : prot_and_pep_ids)
541 removeMatchingItemsUnroll(feat.getPeptideIdentifications(), pred);
543 removeMatchingItemsUnroll(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
546 template<
class MapType,
class Predicate>
549 for (
auto& feat : prot_and_pep_ids)
551 removeMatchingItems(feat.getPeptideIdentifications(), pred);
553 removeMatchingItems(prot_and_pep_ids.getUnassignedPeptideIdentifications(), pred);
563 template<
class IdentificationType>
567 for (
typename std::vector<IdentificationType>::const_iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
569 counter += id_it->getHits().size();
587 template<
class IdentificationType>
588 static bool getBestHit(
const std::vector<IdentificationType>& identifications,
bool assume_sorted,
typename IdentificationType::HitType& best_hit)
590 if (identifications.empty())
593 typename std::vector<IdentificationType>::const_iterator best_id_it = identifications.end();
594 typename std::vector<typename IdentificationType::HitType>::const_iterator best_hit_it;
596 for (
typename std::vector<IdentificationType>::const_iterator id_it = identifications.begin(); id_it != identifications.end(); ++id_it)
598 if (id_it->getHits().empty())
601 if (best_id_it == identifications.end())
604 best_hit_it = id_it->getHits().begin();
606 else if (best_id_it->getScoreType() != id_it->getScoreType())
608 throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
"Can't compare scores of different types", best_id_it->getScoreType() +
"/" + id_it->getScoreType());
611 bool higher_better = best_id_it->isHigherScoreBetter();
612 for (
typename std::vector<typename IdentificationType::HitType>::const_iterator hit_it = id_it->getHits().begin(); hit_it != id_it->getHits().end(); ++hit_it)
614 if ((higher_better && (hit_it->getScore() > best_hit_it->getScore())) || (!higher_better && (hit_it->getScore() < best_hit_it->getScore())))
616 best_hit_it = hit_it;
623 if (best_id_it == identifications.end())
628 best_hit = *best_hit_it;
639 static void extractPeptideSequences(
const std::vector<PeptideIdentification>& peptides, std::set<String>& sequences,
bool ignore_mods =
false);
653 template<
class Ev
idenceFilter>
656 for (std::vector<PeptideIdentification>::iterator pep_it = peptides.begin(); pep_it != peptides.end(); ++pep_it)
658 for (std::vector<PeptideHit>::iterator hit_it = pep_it->getHits().begin(); hit_it != pep_it->getHits().end(); ++hit_it)
660 std::vector<PeptideEvidence> evidences;
661 remove_copy_if(hit_it->getPeptideEvidences().begin(), hit_it->getPeptideEvidences().end(), back_inserter(evidences), std::not_fn(filter));
662 hit_it->setPeptideEvidences(evidences);
674 template<
class IdentificationType>
677 for (
typename std::vector<IdentificationType>::iterator it = ids.begin(); it != ids.end(); ++it)
699 static void updateProteinReferences(std::vector<PeptideIdentification>& peptides,
const std::vector<ProteinIdentification>& proteins,
bool remove_peptides_without_reference =
false);
727 static bool updateProteinGroups(std::vector<ProteinIdentification::ProteinGroup>& groups,
const std::vector<ProteinHit>& hits);
735 static void removeUngroupedProteins(
const std::vector<ProteinIdentification::ProteinGroup>& groups, std::vector<ProteinHit>& hits);
743 template<
class IdentificationType>
746 struct HasNoHits<IdentificationType> empty_filter;
747 removeMatchingItems(ids, empty_filter);
755 template<
class IdentificationType>
758 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
760 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id_it->isHigherScoreBetter());
761 keepMatchingItems(id_it->getHits(), score_filter);
771 static void filterGroupsByScore(std::vector<ProteinIdentification::ProteinGroup>& grps, double threshold_score, bool higher_better);
778 template<class IdentificationType>
779 static void filterHitsByScore(IdentificationType& id, double threshold_score)
781 struct HasGoodScore<typename IdentificationType::HitType> score_filter(threshold_score, id.isHigherScoreBetter());
782 keepMatchingItems(id.getHits(), score_filter);
790 template<class IdentificationType>
791 static void keepNBestHits(std::vector<IdentificationType>& ids, Size n)
793 for (
typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
796 if (n < id_it->getHits().size())
797 id_it->getHits().resize(n);
815 template<
class IdentificationType>
821 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(min_rank - 1);
822 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
824 removeMatchingItems(id_it->getHits(), rank_filter);
827 if (max_rank >= min_rank)
829 struct HasMaxRank<typename IdentificationType::HitType> rank_filter(max_rank);
830 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
832 keepMatchingItems(id_it->getHits(), rank_filter);
844 template<
class IdentificationType>
848 for (typename std::vector<IdentificationType>::iterator id_it = ids.begin(); id_it != ids.end(); ++id_it)
850 removeMatchingItems(id_it->getHits(), decoy_filter);
861 template<
class IdentificationType>
865 for (auto& id_it : ids)
867 removeMatchingItems(id_it.getHits(), acc_filter);
878 template<
class IdentificationType>
882 for (auto& id_it : ids)
884 keepMatchingItems(id_it.getHits(), acc_filter);
923 static void filterPeptidesByRT(std::vector<PeptideIdentification>& peptides,
double min_rt,
double max_rt);
926 static void filterPeptidesByMZ(std::vector<PeptideIdentification>& peptides,
double min_mz,
double max_mz);
948 template<
class Filter>
1018 filterHitsByScore(exp_it->getPeptideIdentifications(), peptide_threshold_score);
1019 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1031 std::vector<PeptideIdentification> all_peptides;
1036 std::vector<PeptideIdentification>& peptides = exp_it->getPeptideIdentifications();
1037 keepNBestHits(peptides, n);
1038 removeEmptyIdentifications(peptides);
1040 all_peptides.insert(all_peptides.end(), peptides.begin(), peptides.end());
1051 template<
class MapType>
1056 for (
auto& feat : map)
1058 keepNBestHits(feat.getPeptideIdentifications(), n);
1060 keepNBestHits(map.getUnassignedPeptideIdentifications(), n);
1063 template<
class MapType>
1067 removeMatchingPeptideIdentifications(prot_and_pep_ids, pred);
1071 static void keepBestPerPeptide(std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1073 annotateBestPerPeptide(pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1075 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1078 static void keepBestPerPeptidePerRun(std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
Size nr_best_spectrum)
1080 annotateBestPerPeptidePerRun(prot_ids, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1082 keepMatchingItemsUnroll(pep_ids, best_per_peptide);
1086 template<
class MapType>
1092 for (
const auto& idrun : prot_ids)
1097 for (
auto& feat : prot_and_pep_ids)
1099 annotateBestPerPeptidePerRunWithData(best_peps_per_run, feat.getPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1102 annotateBestPerPeptidePerRunWithData(best_peps_per_run, prot_and_pep_ids.getUnassignedPeptideIdentifications(), ignore_mods, ignore_charges, nr_best_spectrum);
1105 template<
class MapType>
1108 annotateBestPerPeptidePerRun(prot_and_pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1110 keepMatchingPeptideHits(prot_and_pep_ids, best_per_peptide);
1115 static void annotateBestPerPeptidePerRun(
const std::vector<ProteinIdentification>& prot_ids, std::vector<PeptideIdentification>& pep_ids,
bool ignore_mods,
bool ignore_charges,
1116 Size nr_best_spectrum)
1119 for (
const auto&
id : prot_ids)
1123 annotateBestPerPeptidePerRunWithData(best_peps_per_run, pep_ids, ignore_mods, ignore_charges, nr_best_spectrum);
1130 Size nr_best_spectrum)
1132 for (
auto& pep : pep_ids)
1135 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1145 for (
auto& pep : pep_ids)
1147 annotateBestPerPeptideWithData(best_pep, pep, ignore_mods, ignore_charges, nr_best_spectrum);
1161 auto pepIt = pep.
getHits().begin();
1162 auto pepItEnd = nr_best_spectrum == 0 || pep.
getHits().size() <= nr_best_spectrum ? pep.
getHits().end() : pep.
getHits().begin() + nr_best_spectrum;
1163 for (; pepIt != pepItEnd; ++pepIt)
1177 int lookup_charge = 0;
1178 if (!ignore_charges)
1184 auto it_inserted = best_pep.emplace(std::move(lookup_seq),
ChargeToPepHitP());
1185 auto it_inserted_chg = it_inserted.first->second.emplace(lookup_charge, &hit);
1187 PeptideHit*& p = it_inserted_chg.first->second;
1188 if (!it_inserted_chg.second)
1212 std::set<String> accessions;
1213 for (std::vector<FASTAFile::FASTAEntry>::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
1215 accessions.insert(it->identifier);
1225 if (exp_it->getMSLevel() == 2)
1227 keepHitsMatchingProteins(exp_it->getPeptideIdentifications(), accessions);
1228 removeEmptyIdentifications(exp_it->getPeptideIdentifications());
1229 updateHitRanks(exp_it->getPeptideIdentifications());
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
String toString() const
returns the peptide as string with modifications embedded in brackets
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
A container for consensus elements.
Definition: ConsensusMap.h:66
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:33
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:362
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:38
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
A method or algorithm argument contains illegal values.
Definition: Exception.h:616
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:316
Invalid value exception.
Definition: Exception.h:305
const std::vector< ProteinIdentification > & getProteinIdentifications() const
returns a const reference to the protein ProteinIdentification vector
Filter Peptide Hit by its digestion product.
Definition: IDFilter.h:356
Int max_cleavages_
Definition: IDFilter.h:360
EnzymaticDigestion & digestion_
Definition: IDFilter.h:358
PeptideHit argument_type
Definition: IDFilter.h:363
Int min_cleavages_
Definition: IDFilter.h:359
bool operator()(PeptideHit &p) const
Definition: IDFilter.h:375
void filterPeptideSequences(std::vector< PeptideHit > &hits)
Definition: IDFilter.h:385
PeptideDigestionFilter(EnzymaticDigestion &digestion, Int min, Int max)
Definition: IDFilter.h:364
static Int disabledValue()
Definition: IDFilter.h:368
Collection of functions for filtering peptide and protein identifications.
Definition: IDFilter.h:53
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:756
static void moveMatchingItems(Container &items, const Predicate &pred, Container &target)
Move items that satisfy a condition to a container (e.g. vector)
Definition: IDFilter.h:499
static void filterPeptidesByLength(std::vector< PeptideIdentification > &peptides, Size min_length, Size max_length=UINT_MAX)
Filters peptide identifications according to peptide sequence length.
static void keepNBestHits(PeakMap &experiment, Size n)
Filters an MS/MS experiment by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1027
static void removeUnreferencedProteins(std::vector< ProteinIdentification > &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void keepBestMatchPerObservation(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref)
Filter IdentificationData to keep only the best match (e.g. PSM) for each observation (e....
static void annotateBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1087
static void removeUnreferencedProteins(ProteinIdentification &proteins, const std::vector< PeptideIdentification > &peptides)
Removes protein hits from proteins that are not referenced by a peptide in peptides.
static void removeDuplicatePeptideHits(std::vector< PeptideIdentification > &peptides, bool seq_only=false)
Removes duplicate peptide hits from each peptide identification, keeping only unique hits (per ID).
std::map< std::string, SequenceToChargeToPepHitP > RunToSequenceToChargeToPepHitP
Definition: IDFilter.h:64
static void keepMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:527
static void removeEmptyIdentifications(MapType &prot_and_pep_ids)
Definition: IDFilter.h:1064
static void annotateBestPerPeptidePerRun(const std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1115
static void removeMatchingItems(Container &items, const Predicate &pred)
Remove items that satisfy a condition from a container (e.g. vector)
Definition: IDFilter.h:485
static void annotateBestPerPeptidePerRunWithData(RunToSequenceToChargeToPepHitP &best_peps_per_run, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1129
std::unordered_map< std::string, ChargeToPepHitP > SequenceToChargeToPepHitP
Definition: IDFilter.h:63
static void removeEmptyIdentifications(std::vector< IdentificationType > &ids)
Removes peptide or protein identifications that have no hits in them.
Definition: IDFilter.h:744
IDFilter()=default
Constructor.
static void keepMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Keep Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Protei...
Definition: IDFilter.h:518
static void extractPeptideSequences(const std::vector< PeptideIdentification > &peptides, std::set< String > &sequences, bool ignore_mods=false)
Extracts all unique peptide sequences from a list of peptide IDs.
static void removeDecoys(IdentificationData &id_data)
Filter IdentificationData to remove parent sequences annotated as decoys.
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:845
static void removePeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Removes all peptide hits that have at least one of the given modifications.
virtual ~IDFilter()=default
Destructor.
static void keepMatchingItems(Container &items, const Predicate &pred)
Keep items that satisfy a condition in a container (e.g. vector), removing all others.
Definition: IDFilter.h:492
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
static void keepPeptidesWithMatchingModifications(std::vector< PeptideIdentification > &peptides, const std::set< String > &modifications)
Keeps only peptide hits that have at least one of the given modifications.
static void annotateBestPerPeptideWithData(SequenceToChargeToPepHitP &best_pep, PeptideIdentification &pep, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1155
static void filterObservationMatchesByScore(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref, double cutoff)
Filter observation matches (e.g. PSMs) in IdentificationData by score.
static void removeMatchingPeptideHits(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:537
static void keepBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Filters PeptideHits from PeptideIdentification by keeping only the best peptide hits for every peptid...
Definition: IDFilter.h:1071
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
static void filterPeptidesByCharge(std::vector< PeptideIdentification > &peptides, Int min_charge, Int max_charge)
Filters peptide identifications according to charge state.
static void filterPeptidesByMZError(std::vector< PeptideIdentification > &peptides, double mass_error, bool unit_ppm)
Filter peptide identifications according to mass deviation.
static void updateProteinReferences(ConsensusMap &cmap, const ProteinIdentification &ref_run, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void FilterPeptideEvidences(EvidenceFilter &filter, std::vector< PeptideIdentification > &peptides)
remove peptide evidences based on a filter
Definition: IDFilter.h:654
static void filterHitsByRank(std::vector< IdentificationType > &ids, Size min_rank, Size max_rank)
Filters peptide or protein identifications according to the ranking of the hits.
Definition: IDFilter.h:816
static void keepNBestPeptideHits(MapType &map, Size n)
Filters a Consensus/FeatureMap by keeping the N best peptide hits for every spectrum.
Definition: IDFilter.h:1052
static void removeMatchingItemsUnroll(IDContainer &items, const Predicate &pred)
Remove Hit items that satisfy a condition in one of our ID containers (e.g. vector of Peptide or Prot...
Definition: IDFilter.h:508
static std::map< String, std::vector< ProteinHit > > extractUnassignedProteins(ConsensusMap &cmap)
Extracts all proteins not matched by PSMs in features.
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static void removeMatchingPeptideIdentifications(MapType &prot_and_pep_ids, Predicate &pred)
Definition: IDFilter.h:547
static void filterPeptidesByRT(std::vector< PeptideIdentification > &peptides, double min_rt, double max_rt)
Filters peptide identifications by precursor RT, keeping only IDs in the given range.
static void filterPeptideEvidences(Filter &filter, std::vector< PeptideIdentification > &peptides)
Digest a collection of proteins and filter PeptideEvidences based on specificity PeptideEvidences of ...
static void removePeptidesWithMatchingRegEx(std::vector< PeptideIdentification > &peptides, const String ®ex)
static void removePeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &bad_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that matches one in bad_peptides.
static Size countHits(const std::vector< IdentificationType > &ids)
Returns the total number of peptide/protein hits in a vector of peptide/protein identifications.
Definition: IDFilter.h:564
static void updateProteinReferences(ConsensusMap &cmap, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static void updateProteinReferences(std::vector< PeptideIdentification > &peptides, const std::vector< ProteinIdentification > &proteins, bool remove_peptides_without_reference=false)
Removes references to missing proteins.
static bool getBestHit(const std::vector< IdentificationType > &identifications, bool assume_sorted, typename IdentificationType::HitType &best_hit)
Finds the best-scoring hit in a vector of peptide or protein identifications.
Definition: IDFilter.h:588
static void updateHitRanks(std::vector< IdentificationType > &ids)
Updates the hit ranks on all peptide or protein IDs.
Definition: IDFilter.h:675
static void keepBestPerPeptidePerRun(MapType &prot_and_pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1106
static void keepHitsMatchingProteins(PeakMap &experiment, const std::vector< FASTAFile::FASTAEntry > &proteins)
Filters an MS/MS experiment according to the given proteins.
Definition: IDFilter.h:1210
static void removeHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > accessions)
Filters peptide or protein identifications according to the given proteins (negative).
Definition: IDFilter.h:862
static void keepBestPerPeptidePerRun(std::vector< ProteinIdentification > &prot_ids, std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1078
static void keepHitsMatchingProteins(std::vector< IdentificationType > &ids, const std::set< String > &accessions)
Filters peptide or protein identifications according to the given proteins (positive).
Definition: IDFilter.h:879
static void removeUngroupedProteins(const std::vector< ProteinIdentification::ProteinGroup > &groups, std::vector< ProteinHit > &hits)
Update protein hits after protein groups were filtered.
static void keepNBestSpectra(std::vector< PeptideIdentification > &peptides, Size n)
static void filterPeptidesByMZ(std::vector< PeptideIdentification > &peptides, double min_mz, double max_mz)
Filters peptide identifications by precursor m/z, keeping only IDs in the given range.
static void keepUniquePeptidesPerProtein(std::vector< PeptideIdentification > &peptides)
Removes all peptides that are not annotated as unique for a protein (by PeptideIndexer)
static void keepPeptidesWithMatchingSequences(std::vector< PeptideIdentification > &peptides, const std::vector< PeptideIdentification > &good_peptides, bool ignore_mods=false)
Removes all peptide hits with a sequence that does not match one in good_peptides.
static void removeUnreferencedProteins(ConsensusMap &cmap, bool include_unassigned)
static void annotateBestPerPeptide(std::vector< PeptideIdentification > &pep_ids, bool ignore_mods, bool ignore_charges, Size nr_best_spectrum)
Definition: IDFilter.h:1142
static void filterHitsByScore(PeakMap &experiment, double peptide_threshold_score, double protein_threshold_score)
Filters an MS/MS experiment according to score thresholds.
Definition: IDFilter.h:1008
std::map< Int, PeptideHit * > ChargeToPepHitP
Typedefs.
Definition: IDFilter.h:62
Definition: IdentificationData.h:87
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:45
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:76
Iterator begin() noexcept
Definition: MSExperiment.h:155
Iterator end()
Definition: MSExperiment.h:170
Representation of a peptide evidence.
Definition: PeptideEvidence.h:25
Int getStart() const
get the position in the protein (starting at 0 for the N-terminus). If not available UNKNOWN_POSITION...
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
bool hasValidLimits() const
start and end numbers in evidence represent actual numeric indices
Int getEnd() const
get the position of the last AA of the peptide in protein coordinates (starting at 0 for the N-termin...
Representation of a peptide hit.
Definition: PeptideHit.h:31
double getScore() const
returns the PSM score
const AASequence & getSequence() const
returns the peptide sequence
std::set< String > extractProteinAccessionsSet() const
extracts the set of non-empty protein accessions from peptide evidences
Int getCharge() const
returns the charge of the peptide
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:39
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
void sort()
Sorts the hits by score.
bool isHigherScoreBetter() const
returns the peptide score orientation
Class for the enzymatic digestion of proteins represented as AASequence or String.
Definition: ProteaseDigestion.h:32
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true, bool allow_nterm_protein_cleavage=false, bool allow_random_asp_pro_cleavage=false) const
Variant of EnzymaticDigestion::isValidProduct() with support for n-term protein cleavage and random D...
Representation of a protein hit.
Definition: ProteinHit.h:34
const String & getAccession() const
returns the accession of the protein
Representation of a protein identification run.
Definition: ProteinIdentification.h:50
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:46
String identifier
Definition: FASTAFile.h:47
Is peptide evidence digestion product of some protein.
Definition: IDFilter.h:397
DigestionFilter(std::vector< FASTAFile::FASTAEntry > &entries, ProteaseDigestion &digestion, bool ignore_missed_cleavages, bool methionine_cleavage)
Definition: IDFilter.h:406
GetMatchingItems< PeptideEvidence, FASTAFile::FASTAEntry > accession_resolver_
Definition: IDFilter.h:401
void filterPeptideEvidences(std::vector< PeptideIdentification > &peptides)
Definition: IDFilter.h:438
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:411
bool ignore_missed_cleavages_
Definition: IDFilter.h:403
PeptideEvidence argument_type
Definition: IDFilter.h:398
ProteaseDigestion & digestion_
Definition: IDFilter.h:402
bool methionine_cleavage_
Definition: IDFilter.h:404
Builds a map index of data that have a String index to find matches and return the objects.
Definition: IDFilter.h:271
std::map< String, Entry * > ItemMap
Definition: IDFilter.h:273
GetMatchingItems()
Definition: IDFilter.h:284
ItemMap items
Definition: IDFilter.h:274
HitType argument_type
Definition: IDFilter.h:272
bool exists(const HitType &hit) const
Definition: IDFilter.h:293
const Entry & getValue(const PeptideEvidence &evidence) const
Definition: IDFilter.h:303
GetMatchingItems(std::vector< Entry > &records)
Definition: IDFilter.h:276
const String & getKey(const FASTAFile::FASTAEntry &entry) const
Definition: IDFilter.h:288
const String & getHitKey(const PeptideEvidence &p) const
Definition: IDFilter.h:298
Is this a decoy hit?
Definition: IDFilter.h:175
bool operator()(const HitType &hit) const
Definition: IDFilter.h:184
HitType argument_type
Definition: IDFilter.h:176
HasDecoyAnnotation()
Definition: IDFilter.h:180
Is the score of this hit at least as good as the given value?
Definition: IDFilter.h:75
bool operator()(const HitType &hit) const
Definition: IDFilter.h:85
double score
Definition: IDFilter.h:78
HitType argument_type
Definition: IDFilter.h:76
HasGoodScore(double score_, bool higher_score_better_)
Definition: IDFilter.h:81
bool higher_score_better
Definition: IDFilter.h:79
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:199
HasMatchingAccessionUnordered(const std::unordered_set< String > &accessions_)
Definition: IDFilter.h:204
HitType argument_type
Definition: IDFilter.h:200
const std::unordered_set< String > & accessions
Definition: IDFilter.h:202
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:208
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:223
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:218
Given a list of protein accessions, do any occur in the annotation(s) of this hit?
Definition: IDFilter.h:235
HitType argument_type
Definition: IDFilter.h:236
bool operator()(const PeptideHit &hit) const
Definition: IDFilter.h:244
bool operator()(const PeptideEvidence &evidence) const
Definition: IDFilter.h:259
const std::set< String > & accessions
Definition: IDFilter.h:238
HasMatchingAccession(const std::set< String > &accessions_)
Definition: IDFilter.h:240
bool operator()(const ProteinHit &hit) const
Definition: IDFilter.h:254
Is the rank of this hit below or at the given cut-off?
Definition: IDFilter.h:101
bool operator()(const HitType &hit) const
Definition: IDFilter.h:114
HitType argument_type
Definition: IDFilter.h:102
Size rank
Definition: IDFilter.h:104
HasMaxRank(Size rank_)
Definition: IDFilter.h:106
Is the list of hits of this peptide/protein ID empty?
Definition: IDFilter.h:452
bool operator()(const IdentificationType &id) const
Definition: IDFilter.h:455
IdentificationType argument_type
Definition: IDFilter.h:453
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:20