RDKit
Open-source cheminformatics and machine learning.
SubstructLibrary.h
Go to the documentation of this file.
1 // Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDK_SUBSTRUCT_LIBRARY
32 #define RDK_SUBSTRUCT_LIBRARY
33 #include <RDGeneral/export.h>
34 #include <GraphMol/RDKitBase.h>
35 #include <GraphMol/MolPickler.h>
40 #include <DataStructs/BitOps.h>
41 #include <GraphMol/MolOps.h>
42 
43 namespace RDKit {
44 
46 
47 //! Base class API for holding molecules to substructure search.
48 /*!
49  This is an API that hides the implementation details used for
50  indexing molecules for substructure searching. It simply
51  provides an API for adding and getting molecules from a set.
52  */
54  public:
55  virtual ~MolHolderBase() {}
56 
57  //! Add a new molecule to the substructure search library
58  //! Returns the molecules index in the library
59  virtual unsigned int addMol(const ROMol &m) = 0;
60 
61  // implementations should throw IndexError on out of range
62  virtual boost::shared_ptr<ROMol> getMol(unsigned int) const = 0;
63 
64  //! Get the current library size
65  virtual unsigned int size() const = 0;
66 };
67 
68 //! Concrete class that holds molecules in memory
69 /*!
70  This is currently one of the faster implementations.
71  However it is very memory intensive.
72 */
74  std::vector<boost::shared_ptr<ROMol>> mols;
75 
76  public:
77  MolHolder() : MolHolderBase(), mols() {}
78 
79  virtual unsigned int addMol(const ROMol &m) {
80  mols.push_back(boost::make_shared<ROMol>(m));
81  return size() - 1;
82  }
83 
84  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
85  if (idx >= mols.size()) throw IndexErrorException(idx);
86  return mols[idx];
87  }
88 
89  virtual unsigned int size() const {
90  return rdcast<unsigned int>(mols.size());
91  }
92 
93  std::vector<boost::shared_ptr<ROMol>> &getMols() { return mols; }
94  const std::vector<boost::shared_ptr<ROMol>> &getMols() const { return mols; }
95 };
96 
97 //! Concrete class that holds binary cached molecules in memory
98 /*!
99  This implementation uses quite a bit less memory than the
100  non cached implementation. However, due to the reduced speed
101  it should be used in conjunction with a pattern fingerprinter.
102 
103  See RDKit::FPHolder
104 */
106  std::vector<std::string> mols;
107 
108  public:
110 
111  virtual unsigned int addMol(const ROMol &m) {
112  mols.push_back(std::string());
113  MolPickler::pickleMol(m, mols.back());
114  return size() - 1;
115  }
116 
117  //! Adds a pickled binary molecule, no validity checking of the input
118  //! is done.
119  unsigned int addBinary(const std::string &pickle) {
120  mols.push_back(pickle);
121  return size() - 1;
122  }
123 
124  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
125  if (idx >= mols.size()) throw IndexErrorException(idx);
126  boost::shared_ptr<ROMol> mol(new ROMol);
127  MolPickler::molFromPickle(mols[idx], mol.get());
128  return mol;
129  }
130 
131  virtual unsigned int size() const {
132  return rdcast<unsigned int>(mols.size());
133  }
134 
135  std::vector<std::string> &getMols() { return mols; }
136  const std::vector<std::string> &getMols() const { return mols; }
137 };
138 
139 //! Concrete class that holds smiles strings in memory
140 /*!
141  This implementation uses quite a bit less memory than the
142  cached binary or uncached implementation. However, due to the
143  reduced speed it should be used in conjunction with a pattern
144  fingerprinter.
145 
146  See RDKit::FPHolder
147 */
149  : public MolHolderBase {
150  std::vector<std::string> mols;
151 
152  public:
154 
155  virtual unsigned int addMol(const ROMol &m) {
156  bool doIsomericSmiles = true;
157  mols.push_back(MolToSmiles(m, doIsomericSmiles));
158  return size() - 1;
159  }
160 
161  //! Add a smiles to the dataset, no validation is done
162  //! to the inputs.
163  unsigned int addSmiles(const std::string &smiles) {
164  mols.push_back(smiles);
165  return size() - 1;
166  }
167 
168  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
169  if (idx >= mols.size()) throw IndexErrorException(idx);
170 
171  boost::shared_ptr<ROMol> mol(SmilesToMol(mols[idx]));
172  return mol;
173  }
174 
175  virtual unsigned int size() const {
176  return rdcast<unsigned int>(mols.size());
177  }
178 
179  std::vector<std::string> &getMols() { return mols; }
180  const std::vector<std::string> &getMols() const { return mols; }
181 };
182 
183 //! Concrete class that holds trusted smiles strings in memory
184 /*!
185  A trusted smiles is essentially a smiles string that
186  RDKit has generated. This indicates that fewer
187  sanitization steps are required. See
188  http://rdkit.blogspot.com/2016/09/avoiding-unnecessary-work-and.html
189 
190  This implementation uses quite a bit less memory than the
191  cached binary or uncached implementation. However, due to the
192  reduced speed it should be used in conjunction with a pattern
193  fingerprinter.
194 
195  See RDKit::FPHolder
196 */
198  : public MolHolderBase {
199  std::vector<std::string> mols;
200 
201  public:
203 
204  virtual unsigned int addMol(const ROMol &m) {
205  bool doIsomericSmiles = true;
206  mols.push_back(MolToSmiles(m, doIsomericSmiles));
207  return size() - 1;
208  }
209 
210  //! Add a smiles to the dataset, no validation is done
211  //! to the inputs.
212  unsigned int addSmiles(const std::string &smiles) {
213  mols.push_back(smiles);
214  return size() - 1;
215  }
216 
217  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
218  if (idx >= mols.size()) throw IndexErrorException(idx);
219 
220  RWMol *m = SmilesToMol(mols[idx], 0, false);
221  m->updatePropertyCache();
222  return boost::shared_ptr<ROMol>(m);
223  }
224 
225  virtual unsigned int size() const {
226  return rdcast<unsigned int>(mols.size());
227  }
228 
229  std::vector<std::string> &getMols() { return mols; }
230  const std::vector<std::string> &getMols() const { return mols; }
231 };
232 
233 //! Base FPI for the fingerprinter used to rule out impossible matches
235  std::vector<ExplicitBitVect *> fps;
236 
237  public:
238  virtual ~FPHolderBase() {
239  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
240  }
241 
242  virtual unsigned int size() const {
243  return rdcast<unsigned int>(fps.size());
244  }
245 
246  //! Adds a molecule to the fingerprinter
247  unsigned int addMol(const ROMol &m) {
248  fps.push_back(makeFingerprint(m));
249  return rdcast<unsigned int>(fps.size() - 1);
250  }
251 
252  //! Adds a raw bit vector to the fingerprinter
253  unsigned int addFingerprint(const ExplicitBitVect &v) {
254  fps.push_back(new ExplicitBitVect(v));
255  return rdcast<unsigned int>(fps.size() - 1);
256  }
257 
258  //! Return false if a substructure search can never match the molecule
259  bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const {
260  if (idx >= fps.size()) throw IndexErrorException(idx);
261 
262  return AllProbeBitsMatch(query, *fps[idx]);
263  }
264 
265  //! Get the bit vector at the specified index (throws IndexError if out of
266  //! range)
267  const ExplicitBitVect &getFingerprint(unsigned int idx) const {
268  if (idx >= fps.size()) throw IndexErrorException(idx);
269  return *fps[idx];
270  }
271 
272  //! make the query vector
273  //! Caller owns the vector!
274  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const = 0;
275 
276  std::vector<ExplicitBitVect *> &getFingerprints() { return fps; }
277  const std::vector<ExplicitBitVect *> &getFingerprints() const { return fps; }
278 };
279 
280 //! Uses the pattern fingerprinter to rule out matches
282  public:
283  //! Caller owns the vector!
284  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const {
285  return PatternFingerprintMol(m, 2048);
286  }
287 };
288 
289 //! Substructure Search a library of molecules
290 /*! This class allows for multithreaded substructure searches os
291  large datasets.
292 
293  The implementations can use fingerprints to speed up searches
294  and have molecules cached as binary forms to reduce memory
295  usage.
296 
297  basic usage:
298  \code
299  SubstructLibrary lib;
300  lib.addMol(mol);
301  std::vector<unsigned int> results = lib.getMatches(query);
302  for(std::vector<unsigned int>::const_iterator matchIndex=results.begin();
303  matchIndex != results.end();
304  ++matchIndex) {
305  boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
306  }
307  \endcode
308 
309  Using different mol holders and pattern fingerprints.
310 
311  \code
312  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
313  boost::make_shared<CachedTrustedSmilesMolHolder>();
314  boost::shared_ptr<PatternHolder> patternHolder = \
315  boost::make_shared<PatternHolder>();
316 
317  SubstructLibrary lib(molHolder, patternHolder);
318  lib.addMol(mol);
319  \endcode
320 
321  Cached molecule holders create molecules on demand. There are currently
322  three styles of cached molecules.
323 
324  CachedMolHolder: stores molecules in the rdkit binary format.
325  CachedSmilesMolHolder: stores molecules in smiles format.
326  CachedTrustedSmilesMolHolder: stores molecules in smiles format.
327 
328  The CachedTrustedSmilesMolHolder is made to add molecules from
329  a trusted source. This makes the basic assumption that RDKit was
330  used to sanitize and canonicalize the smiles string. In practice
331  this is considerably faster than using arbitrary smiles strings since
332  certain assumptions can be made. Molecules generated from trusted
333  smiles do not have ring information (although this is created
334  in the molecule being searched if necessary).
335 
336  When loading from external data, as opposed to using the "addMol" API,
337  care must be taken to ensure that the pattern fingerprints and smiles
338  are synchronized.
339 
340  Each pattern holder has an API point for making its fingerprint. This
341  is useful to ensure that the pattern stored in the database will be
342  compatible with the patterns made when analyzing queries.
343 
344  \code
345  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
346  boost::make_shared<CachedTrustedSmilesMolHolder>();
347  boost::shared_ptr<PatternHolder> patternHolder = \
348  boost::make_shared<PatternHolder>();
349 
350  // the PatternHolder instance is able to make fingerprints.
351  // These, of course, can be read from a file. For demonstration
352  // purposes we construct them here.
353  const std::string trustedSmiles = "c1ccccc1";
354  ROMol *m = SmilesToMol(trustedSmiles);
355  const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
356 
357  // The trusted smiles and bitVector can be read from any source.
358  // This is the fastest way to load a substruct library.
359  molHolder->addSmiles( trustedSmiles );
360  patternHolder->addFingerprint( *bitVector );
361  SubstructLibrary lib(molHolder, patternHolder);
362  delete m;
363  delete bitVector;
364  \endcode
365 
366 */
368  boost::shared_ptr<MolHolderBase> molholder;
369  boost::shared_ptr<FPHolderBase> fpholder;
370  MolHolderBase *mols; // used for a small optimization
371  FPHolderBase *fps;
372 
373  public:
375  : molholder(new MolHolder),
376  fpholder(),
377  mols(molholder.get()),
378  fps(nullptr) {}
379 
380  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules)
381  : molholder(molecules), fpholder(), mols(molholder.get()), fps(0) {}
382 
383  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules,
384  boost::shared_ptr<FPHolderBase> fingerprints)
385  : molholder(molecules),
386  fpholder(fingerprints),
387  mols(molholder.get()),
388  fps(fpholder.get()) {}
389 
390  SubstructLibrary(const std::string &pickle)
391  : molholder(new MolHolder),
392  fpholder(),
393  mols(molholder.get()),
394  fps(nullptr) {
395  initFromString(pickle);
396  }
397 
398  //! Get the underlying molecule holder implementation
399  boost::shared_ptr<MolHolderBase> &getMolHolder() { return molholder; }
400 
401  const boost::shared_ptr<MolHolderBase> &getMolHolder() const {
402  return molholder;
403  }
404 
405  //! Get the underlying molecule holder implementation
406  boost::shared_ptr<FPHolderBase> &getFpHolder() { return fpholder; }
407 
408  //! Get the underlying molecule holder implementation
409  const boost::shared_ptr<FPHolderBase> &getFpHolder() const {
410  return fpholder;
411  }
412 
413  const MolHolderBase &getMolecules() const {
414  PRECONDITION(mols, "Molecule holder NULL in SubstructLibrary");
415  return *mols;
416  }
417 
418  //! Get the underlying fingerprint implementation.
419  /*! Throws a value error if no fingerprints have been set */
421  if (!fps)
422  throw ValueErrorException("Substruct Library does not have fingerprints");
423  return *fps;
424  }
425 
426  const FPHolderBase &getFingerprints() const {
427  if (!fps)
428  throw ValueErrorException("Substruct Library does not have fingerprints");
429  return *fps;
430  }
431 
432  //! Add a molecule to the library
433  /*!
434  \param mol Molecule to add
435 
436  returns index for the molecule in the library
437  */
438  unsigned int addMol(const ROMol &mol);
439 
440  //! Get the matching indices for the query
441  /*!
442  \param query Query to match against molecules
443  \param recursionPossible flags whether or not recursive matches are allowed
444  [ default true ]
445  \param useChirality use atomic CIP codes as part of the comparison [
446  default true ]
447  \param useQueryQueryMatches if set, the contents of atom and bond queries [
448  default false ]
449  will be used as part of the matching
450  \param numThreads If -1 use all available processors [default -1]
451  \param maxResults Maximum results to return, -1 means return all [default
452  -1]
453  */
454  std::vector<unsigned int> getMatches(const ROMol &query,
455  bool recursionPossible = true,
456  bool useChirality = true,
457  bool useQueryQueryMatches = false,
458  int numThreads = -1,
459  int maxResults = -1);
460  //! Get the matching indices for the query between the given indices
461  /*!
462  \param query Query to match against molecules
463  \param startIdx Start index of the search
464  \param endIdx Ending idx (non-inclusive) of the search.
465  \param recursionPossible flags whether or not recursive matches are allowed
466  [ default true ]
467  \param useChirality use atomic CIP codes as part of the comparison [
468  default true ]
469  \param useQueryQueryMatches if set, the contents of atom and bond queries [
470  default false ]
471  will be used as part of the matching
472  \param numThreads If -1 use all available processors [default -1]
473  \param maxResults Maximum results to return, -1 means return all [default
474  -1]
475  */
476  std::vector<unsigned int> getMatches(
477  const ROMol &query, unsigned int startIdx, unsigned int endIdx,
478  bool recursionPossible = true, bool useChirality = true,
479  bool useQueryQueryMatches = false, int numThreads = -1,
480  int maxResults = -1);
481 
482  //! Return the number of matches for the query
483  /*!
484  \param query Query to match against molecules
485  \param recursionPossible flags whether or not recursive matches are allowed
486  [ default true ]
487  \param useChirality use atomic CIP codes as part of the comparison [
488  default true ]
489  \param useQueryQueryMatches if set, the contents of atom and bond queries [
490  default false ]
491  will be used as part of the matching
492  \param numThreads If -1 use all available processors [default -1]
493  */
494  unsigned int countMatches(const ROMol &query, bool recursionPossible = true,
495  bool useChirality = true,
496  bool useQueryQueryMatches = false,
497  int numThreads = -1);
498  //! Return the number of matches for the query between the given indices
499  /*!
500  \param query Query to match against molecules
501  \param startIdx Start index of the search
502  \param endIdx Ending idx (non-inclusive) of the search.
503  \param recursionPossible flags whether or not recursive matches are allowed
504  [ default true ]
505  \param useChirality use atomic CIP codes as part of the comparison [
506  default true ]
507  \param useQueryQueryMatches if set, the contents of atom and bond queries [
508  default false ]
509  will be used as part of the matching
510  \param numThreads If -1 use all available processors [default -1]
511  */
512  unsigned int countMatches(const ROMol &query, unsigned int startIdx,
513  unsigned int endIdx, bool recursionPossible = true,
514  bool useChirality = true,
515  bool useQueryQueryMatches = false,
516  int numThreads = -1);
517 
518  //! Returns true if any match exists for the query
519  /*!
520  \param query Query to match against molecules
521  \param recursionPossible flags whether or not recursive matches are allowed
522  [ default true ]
523  \param useChirality use atomic CIP codes as part of the comparison [
524  default true ]
525  \param useQueryQueryMatches if set, the contents of atom and bond queries [
526  default false ]
527  will be used as part of the matching
528  \param numThreads If -1 use all available processors [default -1]
529  */
530  bool hasMatch(const ROMol &query, bool recursionPossible = true,
531  bool useChirality = true, bool useQueryQueryMatches = false,
532  int numThreads = -1);
533  //! Returns true if any match exists for the query between the specified
534  //! indices
535  /*!
536  \param query Query to match against molecules
537  \param startIdx Start index of the search
538  \param endIdx Ending idx (inclusive) of the search.
539  \param recursionPossible flags whether or not recursive matches are allowed
540  [ default true ]
541  \param useChirality use atomic CIP codes as part of the comparison [
542  default true ]
543  \param useQueryQueryMatches if set, the contents of atom and bond queries [
544  default false ]
545  will be used as part of the matching
546  \param numThreads If -1 use all available processors [default -1]
547  */
548  bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx,
549  bool recursionPossible = true, bool useChirality = true,
550  bool useQueryQueryMatches = false, int numThreads = -1);
551 
552  //! Returns the molecule at the given index
553  /*!
554  \param idx Index of the molecule in the library
555  */
556  boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
557  // expects implementation to throw IndexError if out of range
558  PRECONDITION(mols, "molholder is null in SubstructLibrary");
559  return mols->getMol(idx);
560  }
561 
562  //! Returns the molecule at the given index
563  /*!
564  \param idx Index of the molecule in the library
565  */
566  boost::shared_ptr<ROMol> operator[](unsigned int idx) {
567  // expects implementation to throw IndexError if out of range
568  PRECONDITION(mols, "molholder is null in SubstructLibrary");
569  return mols->getMol(idx);
570  }
571 
572  //! return the number of molecules in the library
573  unsigned int size() const {
574  PRECONDITION(mols, "molholder is null in SubstructLibrary");
575  return rdcast<unsigned int>(molholder->size());
576  }
577 
578  //! access required for serialization
579  void resetHolders() {
580  mols = molholder.get();
581  fps = fpholder.get();
582  }
583 
584  //! serializes (pickles) to a stream
585  void toStream(std::ostream &ss) const;
586  //! returns a string with a serialized (pickled) representation
587  std::string Serialize() const;
588  //! initializes from a stream pickle
589  void initFromStream(std::istream &ss);
590  //! initializes from a string pickle
591  void initFromString(const std::string &text);
592 };
593 } // namespace RDKit
594 
596 #endif
RDKit::MolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:89
RDKit::CachedTrustedSmilesMolHolder::CachedTrustedSmilesMolHolder
CachedTrustedSmilesMolHolder()
Definition: SubstructLibrary.h:202
RDKit::FPHolderBase::makeFingerprint
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const =0
RDKit::CachedMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:136
RDKit::CachedMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:131
RDKit::FPHolderBase::getFingerprints
const std::vector< ExplicitBitVect * > & getFingerprints() const
Definition: SubstructLibrary.h:277
RDKit::SubstructLibrary::hasMatch
bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
RDKit::EnumerationStrategyPickler::pickle
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
RDKit::CachedSmilesMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:175
RDKit::CachedTrustedSmilesMolHolder
Concrete class that holds trusted smiles strings in memory.
Definition: SubstructLibrary.h:198
RDKit::SubstructLibraryCanSerialize
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
BitOps.h
Contains general bit-comparison and similarity operations.
RDKit::MolHolder::MolHolder
MolHolder()
Definition: SubstructLibrary.h:77
MolOps.h
RDKit::CachedTrustedSmilesMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:229
RDKit::CachedTrustedSmilesMolHolder::addSmiles
unsigned int addSmiles(const std::string &smiles)
Definition: SubstructLibrary.h:212
Fingerprints.h
RDKit::MolToSmiles
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
SmilesWrite.h
RDKit::CachedSmilesMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:155
RDKit::SubstructLibrary::getFpHolder
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:406
SmilesParse.h
RDKit::SubstructLibrary::hasMatch
bool hasMatch(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Returns true if any match exists for the query.
RDKit::SubstructLibrary
Substructure Search a library of molecules.
Definition: SubstructLibrary.h:367
RDKit::SubstructLibrary::getMatches
std::vector< unsigned int > getMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1)
Get the matching indices for the query between the given indices.
RDKit::SubstructLibrary::initFromStream
void initFromStream(std::istream &ss)
initializes from a stream pickle
RDKit::MolHolderBase::size
virtual unsigned int size() const =0
Get the current library size.
RDKit::FPHolderBase
Base FPI for the fingerprinter used to rule out impossible matches.
Definition: SubstructLibrary.h:234
RDKit::SubstructLibrary::operator[]
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
Definition: SubstructLibrary.h:566
RDKit::FPHolderBase::getFingerprint
const ExplicitBitVect & getFingerprint(unsigned int idx) const
Definition: SubstructLibrary.h:267
ExplicitBitVect.h
SubstructLibrarySerialization.h
RDKit::MolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:84
RDKit::CachedTrustedSmilesMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:217
RDKit::SubstructLibrary::getMolHolder
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:399
RDKit::MolPickler::molFromPickle
static void molFromPickle(const std::string &pickle, ROMol *mol)
constructs a molecule from a pickle stored in a string
RDKit::PatternHolder::makeFingerprint
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const
Caller owns the vector!
Definition: SubstructLibrary.h:284
RDKit::MolHolder::getMols
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
Definition: SubstructLibrary.h:94
RDKit::SmilesToMol
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RDKit::SubstructLibrary::Serialize
std::string Serialize() const
returns a string with a serialized (pickled) representation
RDKit::FPHolderBase::getFingerprints
std::vector< ExplicitBitVect * > & getFingerprints()
Definition: SubstructLibrary.h:276
RDKit::MolHolderBase::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
RDKit::MolHolderBase
Base class API for holding molecules to substructure search.
Definition: SubstructLibrary.h:53
RDKit::ROMol
Definition: ROMol.h:171
RDKit::SubstructLibrary::getFingerprints
const FPHolderBase & getFingerprints() const
Definition: SubstructLibrary.h:426
RDKit::CachedSmilesMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:168
RDKitBase.h
pulls in the core RDKit functionality
RDKit::FPHolderBase::~FPHolderBase
virtual ~FPHolderBase()
Definition: SubstructLibrary.h:238
RDKit::FPHolderBase::addMol
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
Definition: SubstructLibrary.h:247
RDKit::CachedSmilesMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:180
RDKit::SubstructLibrary::getMatches
std::vector< unsigned int > getMatches(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1)
Get the matching indices for the query.
AllProbeBitsMatch
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
RDKit::CachedTrustedSmilesMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:230
RDKit::CachedSmilesMolHolder
Concrete class that holds smiles strings in memory.
Definition: SubstructLibrary.h:149
RDKit::SubstructLibrary::getMolHolder
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
Definition: SubstructLibrary.h:401
ValueErrorException
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
RDKit::SubstructLibrary::size
unsigned int size() const
return the number of molecules in the library
Definition: SubstructLibrary.h:573
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(const std::string &pickle)
Definition: SubstructLibrary.h:390
RDKit::CachedSmilesMolHolder::CachedSmilesMolHolder
CachedSmilesMolHolder()
Definition: SubstructLibrary.h:153
RDKit::MolHolder
Concrete class that holds molecules in memory.
Definition: SubstructLibrary.h:73
RDKit::SubstructLibrary::initFromString
void initFromString(const std::string &text)
initializes from a string pickle
RDKit::CachedTrustedSmilesMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:204
RDKit::ROMol::updatePropertyCache
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary()
Definition: SubstructLibrary.h:374
RDKit::SubstructLibrary::resetHolders
void resetHolders()
access required for serialization
Definition: SubstructLibrary.h:579
RDKit::MolHolderBase::addMol
virtual unsigned int addMol(const ROMol &m)=0
RDKit::CachedMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:124
RDKit::MolPickler::pickleMol
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
RDKit::SubstructLibrary::toStream
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
RDKit::FPHolderBase::size
virtual unsigned int size() const
Definition: SubstructLibrary.h:242
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::PatternFingerprintMol
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
IndexErrorException
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Definition: Exceptions.h:19
MolPickler.h
RDKit::CachedMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:135
RDKit::PatternHolder
Uses the pattern fingerprinter to rule out matches.
Definition: SubstructLibrary.h:281
RDKit::SubstructLibrary::getFingerprints
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
Definition: SubstructLibrary.h:420
RDKit::SubstructLibrary::countMatches
unsigned int countMatches(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Return the number of matches for the query.
RDKit::SubstructLibrary::countMatches
unsigned int countMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Return the number of matches for the query between the given indices.
RDKit::CachedMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:111
RDKit::FPHolderBase::passesFilter
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
Definition: SubstructLibrary.h:259
RDKit::SubstructLibrary::addMol
unsigned int addMol(const ROMol &mol)
Add a molecule to the library.
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:110
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
Definition: SubstructLibrary.h:383
RDKit::SubstructLibrary::getFpHolder
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:409
RDKit::MolHolder::getMols
std::vector< boost::shared_ptr< ROMol > > & getMols()
Definition: SubstructLibrary.h:93
RDKit::CachedMolHolder
Concrete class that holds binary cached molecules in memory.
Definition: SubstructLibrary.h:105
RDKit::CachedMolHolder::CachedMolHolder
CachedMolHolder()
Definition: SubstructLibrary.h:109
RDKit::CachedTrustedSmilesMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:225
RDKit::CachedMolHolder::addBinary
unsigned int addBinary(const std::string &pickle)
Definition: SubstructLibrary.h:119
RDKit::MolHolderBase::~MolHolderBase
virtual ~MolHolderBase()
Definition: SubstructLibrary.h:55
RDKit::SubstructLibrary::getMolecules
const MolHolderBase & getMolecules() const
Definition: SubstructLibrary.h:413
RDKit::SubstructLibrary::getMol
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
Definition: SubstructLibrary.h:556
RDKit::FPHolderBase::addFingerprint
unsigned int addFingerprint(const ExplicitBitVect &v)
Adds a raw bit vector to the fingerprinter.
Definition: SubstructLibrary.h:253
RDKit::CachedSmilesMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:179
RDKit::CachedSmilesMolHolder::addSmiles
unsigned int addSmiles(const std::string &smiles)
Definition: SubstructLibrary.h:163
RDKit::MolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:79
RDKIT_SUBSTRUCTLIBRARY_EXPORT
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
Definition: export.h:684
ExplicitBitVect
a class for bit vectors that are densely occupied
Definition: ExplicitBitVect.h:29
export.h
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
Definition: SubstructLibrary.h:380