RDKit
Open-source cheminformatics and machine learning.
SubstructureCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #pragma once
12 #include <list>
13 #include <vector>
14 #include <string>
15 #include <stdexcept>
16 #include "../RDKitBase.h"
17 //#include "../Fingerprints/MorganFingerprints.h"
18 #include "Graph.h"
19 #include "Seed.h"
20 #include "DebugTrace.h" // algorithm filter definitions
21 
22 namespace RDKit {
23 namespace FMCS {
25  public:
26 #pragma pack(push, 1)
28  typedef unsigned long long TValue;
29  TValue Value;
30 
31  public:
32  KeyNumericMetrics() : Value(0) {}
33  };
34 #pragma pack(pop)
35 
36  struct HashKey {
38 
39  public:
40  void computeKey(const Seed& seed,
41  const std::vector<unsigned>& queryAtomLabels,
42  const std::vector<unsigned>& queryBondLabels) {
43  computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
44  }
45 
46  private:
47  void computeMorganCodeHash(const Seed& seed,
48  const std::vector<unsigned>& queryAtomLabels,
49  const std::vector<unsigned>& queryBondLabels) {
50  size_t nv = seed.getNumAtoms();
51  size_t ne = seed.getNumBonds();
52  std::vector<unsigned long> currCodes(nv);
53  std::vector<unsigned long> prevCodes(nv);
54  size_t nIterations = seed.getNumBonds();
55  if (nIterations > 5) nIterations = 5;
56 
57  for (unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
58  seedAtomIdx++)
59  currCodes[seedAtomIdx] =
60  queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
61 
62  for (size_t iter = 0; iter < nIterations; iter++) {
63  for (size_t i = 0; i < nv; i++) prevCodes[i] = currCodes[i];
64 
65  for (size_t seedBondIdx = 0; seedBondIdx < ne; seedBondIdx++) {
66  const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
67  unsigned order =
68  queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
69  unsigned atom1 =
71  ->second;
72  unsigned atom2 =
74  ->second;
75  unsigned v1 = prevCodes[atom1];
76  unsigned v2 = prevCodes[atom2];
77 
78  currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
79  currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
80  }
81  }
82 
83  KeyNumericMetrics::TValue result = 0;
84  for (unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
85  unsigned long code = currCodes[seedAtomIdx];
86  result += code * (code + 6849) + 29;
87  }
88 
89  NumericMetrics.Value = result;
90  }
91 
92  // not implemented yet:
93  /*
94  void computeFingerprint(const Seed& seed)
95  {
96  unsigned int radius = seed.getNumBonds();
97  if (radius > 5)
98  radius = 5;
99  ExplicitBitVect *mf =
100  RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
101  radius); //SLOW !!!
102  // ...
103  delete mf;
104  NumericMetrics.Field.hasFingerprint = 1;
105  }
106  */
107  };
108 
109  typedef HashKey TKey;
110  typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
111  private:
112  std::vector<TIndexEntry> ValueStorage;
113  std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
114  public:
115  // returns computed key, and pointer to index entry with a set of subgraphs
116  // corresponding to the key if found.
117  // then caller must find exactly matched subgraph in the result set with own
118  // search algorithm,
119  // including a resolving of collisions of hash key
120  TIndexEntry* find(const Seed& seed,
121  const std::vector<unsigned>& queryAtomLabels,
122  const std::vector<unsigned>& queryBondLabels,
123  TKey& key) { // compute key and find entry
124  key.computeKey(seed, queryAtomLabels, queryBondLabels);
125  std::map<KeyNumericMetrics::TValue, size_t>::const_iterator entryit =
126  NumericIndex.find(key.NumericMetrics.Value);
127  if (NumericIndex.end() != entryit) return &ValueStorage[entryit->second];
128  return NULL; // not found
129  }
130 
131  // if find() did not found any entry for this key of seed a new entry will be
132  // created
133  void add(const Seed& seed, TKey& key,
134  TIndexEntry* entry) { // "compute" value and store it in NEW entry
135  // if not found
136  if (!entry) {
137  try {
138  ValueStorage.push_back(TIndexEntry());
139  } catch (...) {
140  return; // not enought memory room to add the item, but it's just a
141  // cache
142  }
143  entry = &ValueStorage.back();
144  }
145  entry->push_back(seed.Topology);
146 
147  if (!NumericIndex.insert(std::pair<KeyNumericMetrics::TValue, size_t>(
148  key.NumericMetrics.Value,
149  ValueStorage.size() - 1)).second)
150  return; // not enought memory room to add the item, but it is just cache
151  }
152 
153  size_t keyssize() const { // for statistics only
154  return ValueStorage.size();
155  }
156 
157  size_t fullsize() const { // for statistics only
158  size_t n = 0;
159  for (std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin();
160  e != ValueStorage.end(); e++)
161  n += e->size();
162  return n;
163  }
164 };
165 }
166 }
void computeKey(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels)
std::vector< unsigned > BondsIdx
Definition: Seed.h:28
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:29
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
#define RDKIT_FMCS_EXPORT
Definition: export.h:190
unsigned getNumAtoms() const
Definition: Seed.h:126
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:182
Graph Topology
Definition: Seed.h:71
Std stuff.
Definition: Atom.h:30
class for representing a bond
Definition: Bond.h:47
std::list< FMCS::Graph > TIndexEntry
unsigned int getBeginAtomIdx() const
returns the index of our begin Atom
Definition: Bond.h:175
MolFragment MoleculeFragment
Definition: Seed.h:70
std::vector< const Bond * > Bonds
Definition: Seed.h:26
TIndexEntry * find(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels, TKey &key)
std::vector< unsigned > AtomsIdx
Definition: Seed.h:27
unsigned getNumBonds() const
Definition: Seed.h:127