RDKit
Open-source cheminformatics and machine learning.
DuplicatedSeedCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <map>
12 #include <vector>
13 #include <stdexcept>
14 #include <algorithm>
15 
16 namespace RDKit {
17  namespace FMCS {
19  public:
20  typedef bool TValue;
21  class TKey {
22  std::vector<unsigned> AtomIdx; // sorted
23  std::vector<unsigned> BondIdx; // sorted
24  public:
25  size_t getNumAtoms()const {
26  return AtomIdx.size();
27  }
28  size_t getNumBonds()const {
29  return BondIdx.size();
30  }
31 
32  void addAtom(unsigned i) {
33  std::vector<unsigned>::iterator it = std::lower_bound(AtomIdx.begin(), AtomIdx.end(), i);
34  AtomIdx.insert(it, i);
35  }
36  void addBond(unsigned i) {
37  std::vector<unsigned>::iterator it = std::lower_bound(BondIdx.begin(), BondIdx.end(), i);
38  BondIdx.insert(it, i);
39  }
40 
41  bool operator == (const TKey& right)const { //opt.
42  return AtomIdx.size() == right.AtomIdx.size()
43  && BondIdx.size() == right.BondIdx.size()
44  && 0==memcmp(&AtomIdx[0], &right.AtomIdx[0], AtomIdx.size()*sizeof(unsigned))
45  && 0==memcmp(&BondIdx[0], &right.BondIdx[0], BondIdx.size()*sizeof(unsigned));
46  }
47 
48  bool operator < (const TKey& right)const {
49  if(AtomIdx.size() < right.AtomIdx.size())
50  return true;
51  if(AtomIdx.size() > right.AtomIdx.size())
52  return false;
53 
54  if(BondIdx.size() < right.BondIdx.size())
55  return true;
56  if(BondIdx.size() > right.BondIdx.size())
57  return false;
58 
59  // everything is equal -> perform straight comparision
60  int diff;
61  diff = memcmp(&AtomIdx[0], &right.AtomIdx[0], AtomIdx.size()*sizeof(unsigned));
62  if(diff < 0)
63  return true;
64  if(diff > 0)
65  return false;
66  return memcmp(&BondIdx[0], &right.BondIdx[0], BondIdx.size()*sizeof(unsigned)) < 0;
67  }
68  };
69  private:
70  std::map<TKey, TValue> Index;
71  size_t MaxAtoms; // max key in the cache for fast failed find
72  public:
73  DuplicatedSeedCache() : MaxAtoms(0) {}
74  void clear() {
75  Index.clear();
76  MaxAtoms=0;
77  }
78 
79  bool find(const TKey& key, TValue& value)const {
80  value = false;
81  if(key.getNumAtoms() > MaxAtoms)
82  return false;// fast check if key greater then max key in the cache
83 
84  std::map<TKey, TValue>::const_iterator entryit = Index.find(key);
85  if(Index.end() != entryit)
86  value = entryit->second;
87  return Index.end() != entryit;
88  }
89 
90  void add(const TKey& key, TValue found=true) {
91  if(key.getNumAtoms() > MaxAtoms)
92  MaxAtoms = key.getNumAtoms();
93 
94  Index.insert( std::pair<TKey, bool>(key, found));
95  }
96 
97  size_t size()const {
98  return Index.size(); // for statistics only
99  }
100  };
101 
102  }
103 }
bool operator==(const TKey &right) const
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
void add(const TKey &key, TValue found=true)
bool operator<(const TKey &right) const
bool find(const TKey &key, TValue &value) const