RDKit
Open-source cheminformatics and machine learning.
SubstructureCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #pragma once
11 #include <list>
12 #include <vector>
13 #include <string>
14 #include <stdexcept>
15 #include "../RDKitBase.h"
16 //#include "../Fingerprints/MorganFingerprints.h"
17 #include "Graph.h"
18 #include "Seed.h"
19 #include "DebugTrace.h" // algorithm filter definitions
20 
21 namespace RDKit {
22  namespace FMCS {
24  public:
25 
26 #pragma pack(push,1)
28  typedef unsigned long long TValue;
29  TValue Value;
30  public:
31  KeyNumericMetrics() : Value(0) {}
32  };
33 #pragma pack(pop)
34 
35  struct HashKey {
37  public:
38  void computeKey (const Seed& seed, const std::vector<unsigned>& queryAtomLabels, const std::vector<unsigned>& queryBondLabels) {
39  computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
40  }
41  private:
42  void computeMorganCodeHash(const Seed& seed
43  , const std::vector<unsigned>& queryAtomLabels, const std::vector<unsigned>& queryBondLabels) {
44  size_t nv = seed.getNumAtoms();
45  size_t ne = seed.getNumBonds();
46  std::vector<unsigned long> currCodes(nv);
47  std::vector<unsigned long> prevCodes(nv);
48  size_t nIterations = seed.getNumBonds();
49  if (nIterations > 5)
50  nIterations = 5;
51 
52  for(unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms(); seedAtomIdx++)
53  currCodes[seedAtomIdx] = queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
54 
55  for (size_t iter = 0; iter < nIterations; iter++) {
56  for (size_t i = 0; i < nv; i++)
57  prevCodes[i] = currCodes[i];
58 
59  for (size_t seedBondIdx= 0; seedBondIdx< ne; seedBondIdx++) {
60  const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
61  unsigned order = queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
62  unsigned atom1= seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getBeginAtomIdx())->second;
63  unsigned atom2= seed.MoleculeFragment.SeedAtomIdxMap.find(bond->getEndAtomIdx ())->second;
64  unsigned v1 = prevCodes[atom1];
65  unsigned v2 = prevCodes[atom2];
66 
67  currCodes[atom1] += v2*v2 + (v2 + 23) * (order + 1721);
68  currCodes[atom2] += v1*v1 + (v1 + 23) * (order + 1721);
69  }
70  }
71 
72  KeyNumericMetrics::TValue result = 0;
73  for(unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
74  unsigned long code = currCodes[seedAtomIdx];
75  result += code * (code + 6849) + 29;
76  }
77 
78  NumericMetrics.Value = result;
79 
80  }
81 
82 //not implemented yet:
83  /*
84  void computeFingerprint(const Seed& seed)
85  {
86  unsigned int radius = seed.getNumBonds();
87  if (radius > 5)
88  radius = 5;
89  ExplicitBitVect *mf = RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology, radius); //SLOW !!!
90  // ...
91  delete mf;
92  NumericMetrics.Field.hasFingerprint = 1;
93  }
94  */
95  };
96 
97  typedef HashKey TKey;
98  typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
99  private:
100  std::vector<TIndexEntry> ValueStorage;
101  std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
102  public:
103  // returns computed key, and pointer to index entry with a set of subgraphs corresponding to the key if found.
104  // then caller must find exactly matched subgraph in the result set with own search algorithm,
105  // including a resolving of collisions of hash key
106  TIndexEntry* find(const Seed& seed, const std::vector<unsigned>& queryAtomLabels
107  , const std::vector<unsigned>& queryBondLabels
108  , TKey& key) { // compute key and find entry
109  key.computeKey(seed, queryAtomLabels, queryBondLabels);
110  std::map<KeyNumericMetrics::TValue, size_t>::const_iterator
111  entryit = NumericIndex.find(key.NumericMetrics.Value);
112  if(NumericIndex.end() != entryit)
113  return & ValueStorage[entryit->second];
114  return NULL; // not found
115  }
116 
117  //if find() did not found any entry for this key of seed a new entry will be created
118  void add(const Seed& seed, TKey& key, TIndexEntry* entry) { // "compute" value and store it in NEW entry if not found
119  if(!entry) {
120  try {
121  ValueStorage.push_back(TIndexEntry());
122  } catch(...) {
123  return; // not enought memory room to add the item, but it's just a cache
124  }
125  entry = &ValueStorage.back();
126  }
127  entry->push_back(seed.Topology);
128 
129  if(!NumericIndex.insert( std::pair<KeyNumericMetrics::TValue, size_t>(key.NumericMetrics.Value, ValueStorage.size()-1) ).second)
130  return; // not enought memory room to add the item, but it is just cache
131  }
132 
133 
134  size_t keyssize() const { // for statistics only
135  return ValueStorage.size();
136  }
137 
138  size_t fullsize() const { // for statistics only
139  size_t n = 0;
140  for(std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin(); e != ValueStorage.end(); e++)
141  n += e->size();
142  return n;
143  }
144  };
145  }
146 }
void computeKey(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels)
std::vector< unsigned > BondsIdx
Definition: Seed.h:27
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:28
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
unsigned int getBeginAtomIdx() const
returns the index of our begin Atom
Definition: Bond.h:170
unsigned getNumBonds() const
Definition: Seed.h:105
Graph Topology
Definition: Seed.h:51
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:177
class for representing a bond
Definition: Bond.h:46
std::list< FMCS::Graph > TIndexEntry
MolFragment MoleculeFragment
Definition: Seed.h:50
std::vector< const Bond * > Bonds
Definition: Seed.h:25
TIndexEntry * find(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels, TKey &key)
unsigned getNumAtoms() const
Definition: Seed.h:102
std::vector< unsigned > AtomsIdx
Definition: Seed.h:26