RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <RDGeneral/export.h>
33 #ifndef ENUMERATION_STRATEGY_H
34 #define ENUMERATION_STRATEGY_H
35 
36 #include "EnumerateTypes.h"
37 #include "../Reaction.h"
38 #include <vector>
40 #include <boost/cstdint.hpp>
41 #ifdef RDK_USE_BOOST_SERIALIZATION
42 #include <boost/serialization/assume_abstract.hpp>
43 #include <boost/serialization/vector.hpp>
44 // the next two includes need to be there for boost 1.56
45 #include <boost/serialization/singleton.hpp>
46 #include <boost/serialization/extended_type_info.hpp>
47 #include <boost/serialization/shared_ptr.hpp>
48 #endif
50 
51 #include <GraphMol/RDKitBase.h>
52 
53 namespace RDKit {
54 
55 //! class for flagging enumeration strategy errors
57  public:
58  EnumerationStrategyException(const char *msg) : _msg(msg){};
59  EnumerationStrategyException(const std::string &msg) : _msg(msg){};
60  const char *message() const { return _msg.c_str(); };
62 
63  private:
64  std::string _msg;
65 };
66 
67 //! Return the number of elements per input vector
68 /*! \param bbs vector<vector<T> >
69 
70  \result vector<unint64_t> number of elements in each vector
71  */
72 template <class T>
74  const std::vector<std::vector<T> > &bbs) {
76  for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
77  return sizes;
78 }
79 
80 //! getSizesFromReactants
81 //! Helper function for enumeration, bbs are stored in a
82 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
83 //
85  const std::vector<MOL_SPTR_VECT> &bbs);
86 
87 //! getReactantsFromRGroups
88 //! Helper function for enumeration, bbs are stored in a
89 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
90 //
91 RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
92  const EnumerationTypes::RGROUPS &rgroups);
93 
94 //! computeNumProducts
95 //! Returns the number of possible product combination from
96 //! The given numbers of building blocks for each rgroup
97 //! or EnumerationStrategyBase::EnumerationOverflow if the
98 //! number will not fit into the machines integer type.
99 //! n.b. An overflow simply means there are a lot of products
100 //! not that they cannot be enumerated
102 
103 //! Base Class for enumeration strageties
104 //! Usage:
105 //! EnumerationStrategyBase must be initialized with both a reaction
106 //! and the building block (molecule) vector to be sampled.
107 //!
108 //! \verbatim
109 //! EnumerationStrategyBase &eb = ...
110 //! if(eb) { // can we get another entry
111 //! const std::vector<int> &v = eb.next();
112 //! v[0] // RGroup 0 position
113 //! v[1] // RGroup 1 position...
114 //! }
115 //! \endverbatim
116 
118  protected:
119  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
121  m_permutationSizes; // m_permutationSizes num bbs per group
122  boost::uint64_t
123  m_numPermutations; // total number of permutations for this group
124  // -1 if > ssize_t::max
125  public:
126  static const boost::uint64_t EnumerationOverflow =
127  static_cast<boost::uint64_t>(-1);
129  : m_permutation(), m_permutationSizes(), m_numPermutations() {}
130 
132 
133  virtual const char *type() const { return "EnumerationStrategyBase"; }
134 
135  //! Initialize the enumerator based on the reaction and the
136  //! supplied building blocks
137  //! This is the standard API point.
138  void initialize(const ChemicalReaction &reaction,
139  const EnumerationTypes::BBS &building_blocks) {
140  // default initialization, may be overridden (sets the # reactants
141  // and computes the default # of permutations)
142  m_permutationSizes = getSizesFromBBs(building_blocks);
143  m_permutation.resize(m_permutationSizes.size());
144 
145  m_numPermutations = computeNumProducts(m_permutationSizes);
146  std::fill(m_permutation.begin(), m_permutation.end(), 0);
147 
148  initializeStrategy(reaction, building_blocks);
149  }
150 
151  // ! Initialize derived class
152  // ! must exist, EnumerationStrategyBase structures are already initialized
153  virtual void initializeStrategy(
154  const ChemicalReaction &reaction,
155  const EnumerationTypes::BBS &building_blocks) = 0;
156 
157  //! returns true if there are more permutations left
158  //! random enumerators may always return true...
159  virtual operator bool() const = 0;
160 
161  //! The current permutation {r1, r2, ...}
162  virtual const EnumerationTypes::RGROUPS &next() = 0;
163 
164  //! copy the enumeration strategy complete with current state
165  virtual EnumerationStrategyBase *copy() const = 0;
166 
167  //! The current position in the enumeration
168  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
169 
170  //! a result of EnumerationOverflow indicates that the number of
171  //! permutations is not computable with the current
172  //! rdlonglong size.
173  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
174 
175  //! Returns how many permutations have been processed by this strategy
176  virtual boost::uint64_t getPermutationIdx() const = 0;
177 
178  //! Skip the specified number of permutations (useful for
179  //! resetting state to a known position)
180  bool skip(boost::uint64_t skipCount) {
181  for (boost::uint64_t i = 0; i < skipCount; ++i) next();
182  return true;
183  }
184 
185  protected:
186  //! Initialize the internal data structures
187  //! i.e. RGROUPS = {10,40,50};
189  m_permutation.resize(rgroups.size());
190  m_permutationSizes = rgroups;
191  m_numPermutations = computeNumProducts(m_permutationSizes);
192  std::fill(m_permutation.begin(), m_permutation.end(), 0);
193  }
194 
195  private:
196  friend class boost::serialization::access;
197  template <class Archive>
198  void serialize(Archive &ar, const unsigned int /*version*/) {
199  ar &m_permutation;
200  ar &m_permutationSizes;
201  ar &m_numPermutations;
202  }
203 };
204 #ifdef RDK_USE_BOOST_SERIALIZATION
205 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
206 #endif
207 }
208 
209 #ifdef RDK_USE_BOOST_SERIALIZATION
210 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
211 #endif
212 
213 #endif
virtual const char * type() const
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:118
pulls in the core RDKit functionality
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
EnumerationStrategyException(const std::string &msg)
EnumerationTypes::RGROUPS m_permutation
std::vector< MOL_SPTR_VECT > BBS
Std stuff.
Definition: Atom.h:30
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)
std::vector< boost::uint64_t > RGROUPS
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:60
bool skip(boost::uint64_t skipCount)
boost::uint64_t getNumPermutations() const
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
EnumerationTypes::RGROUPS m_permutationSizes
class for flagging enumeration strategy errors
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T > > &bbs)
Return the number of elements per input vector.