RDKit
Open-source cheminformatics and machine learning.
Reaction.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2007-2014, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 
32 #ifndef __RD_REACTION_H_17Aug2006__
33 #define __RD_REACTION_H_17Aug2006__
34 
35 #include <GraphMol/RDKitBase.h>
37 #include <vector>
38 
39 namespace RDKit{
40  class ReactionPickler;
41 
42  //! used to indicate an error in the chemical reaction engine
43  class ChemicalReactionException : public std::exception {
44  public:
45  //! construct with an error message
46  explicit ChemicalReactionException(const char *msg) : _msg(msg) {};
47  //! construct with an error message
48  explicit ChemicalReactionException(const std::string msg) : _msg(msg) {};
49  //! get the error message
50  const char *message () const { return _msg.c_str(); };
52  private:
53  std::string _msg;
54  };
55 
56  //! This is a class for storing and applying general chemical reactions.
57  /*!
58  basic usage will be something like:
59 
60  \verbatim
61  ChemicalReaction rxn;
62  rxn.addReactantTemplate(r1);
63  rxn.addReactantTemplate(r2);
64  rxn.addProductTemplate(p1);
65  rxn.initReactantMatchers();
66 
67  MOL_SPTR_VECT prods;
68  for(MOL_SPTR_VECT::const_iterator r1It=reactantSet1.begin();
69  r1It!=reactantSet1.end();++r1It;){
70  for(MOL_SPTR_VECT::const_iterator r2It=reactantSet2.begin();
71  r2It!=reactantSet2.end();++r2It;){
72  MOL_SPTR_VECT rVect(2);
73  rVect[0] = *r1It;
74  rVect[1] = *r2It;
75 
76  std::vector<MOL_SPTR_VECT> lprods;
77  lprods = rxn.runReactants(rVect);
78  for(std::vector<MOL_SPTR_VECT>::const_iterator lpIt=lprods.begin();
79  lpIt!=lprods.end();++lpIt){
80  // we know this is a single-product reaction:
81  prods.push_back((*lpIt)[0]);
82  }
83  }
84  }
85  \endverbatim
86 
87  NOTES:
88  - to allow more control over the reaction, it is possible to flag reactant
89  atoms as being protected by setting the common_properties::_protected property on those
90  atoms. Here's an example:
91  \verbatim
92  std::string smi="[O:1]>>[N:1]";
93  ChemicalReaction *rxn = RxnSmartsToChemicalReaction(smi);
94  rxn->initReactantMatchers();
95 
96  MOL_SPTR_VECT reacts;
97  reacts.clear();
98  smi = "OCO";
99  ROMol *mol = SmilesToMol(smi);
100  reacts.push_back(ROMOL_SPTR(mol));
101  std::vector<MOL_SPTR_VECT> prods;
102  prods = rxn->runReactants(reacts);
103  // here prods has two entries, because there are two Os in the
104  // reactant.
105 
106  reacts[0]->getAtomWithIdx(0)->setProp(common_properties::_protected,1);
107  prods = rxn->runReactants(reacts);
108  // here prods only has one entry, the reaction at atom 0
109  // has been blocked by the _protected property
110  \endverbatim
111 
112  */
114  friend class ReactionPickler;
115 
116  public:
117  ChemicalReaction() : df_needsInit(true), df_implicitProperties(false) {};
119  df_needsInit=other.df_needsInit;
120  df_implicitProperties=other.df_implicitProperties;
121  m_reactantTemplates=other.m_reactantTemplates;
122  m_productTemplates=other.m_productTemplates;
123  m_agentTemplates=other.m_agentTemplates;
124  }
125  //! construct a reaction from a pickle string
126  ChemicalReaction(const std::string &binStr);
127 
128  //! Adds a new reactant template
129  /*!
130  \return the number of reactants
131 
132  */
133  unsigned int addReactantTemplate(ROMOL_SPTR mol){
134  this->df_needsInit = true;
135  this->m_reactantTemplates.push_back(mol);
136  return this->m_reactantTemplates.size();
137  }
138 
139  //! Adds a new agent template
140  /*!
141  \return the number of agent
142 
143  */
144  unsigned int addAgentTemplate(ROMOL_SPTR mol){
145  this->m_agentTemplates.push_back(mol);
146  return this->m_agentTemplates.size();
147  }
148 
149  //! Adds a new product template
150  /*!
151  \return the number of products
152 
153  */
154  unsigned int addProductTemplate(ROMOL_SPTR mol){
155  this->m_productTemplates.push_back(mol);
156  return this->m_productTemplates.size();
157  }
158 
159  //! Removes the reactant templates from a reaction if atom mapping ratio is below a given threshold
160  /*! By default the removed reactant templates were attached to the agent templates.
161  An alternative will be to provide a pointer to a molecule vector where these reactants should be saved.
162  */
163  void removeUnmappedReactantTemplates(
164  double thresholdUnmappedAtoms = 0.2,
165  bool moveToAgentTemplates = true,
166  MOL_SPTR_VECT *targetVector = NULL);
167 
168  //! Removes the product templates from a reaction if its atom mapping ratio is below a given threshold
169  /*! By default the removed products templates were attached to the agent templates.
170  An alternative will be to provide a pointer to a molecule vector where these products should be saved.
171  */
172  void removeUnmappedProductTemplates(
173  double thresholdUnmappedAtoms =0.2,
174  bool moveToAgentTemplates = true,
175  MOL_SPTR_VECT *targetVector = NULL);
176 
177  //! Runs the reaction on a set of reactants
178  /*!
179 
180  \param reactants: the reactants to be used. The length of this must be equal to
181  this->getNumReactantTemplates()
182 
183  \return a vector of vectors of products. Each subvector will be
184  this->getNumProductTemplates() long.
185 
186  We return a vector of vectors of products because each individual template may
187  map multiple times onto its reactant. This leads to multiple possible result
188  sets.
189  */
190  std::vector<MOL_SPTR_VECT> runReactants(const MOL_SPTR_VECT reactants) const;
191 
192  const MOL_SPTR_VECT & getReactants() const { return this->m_reactantTemplates; }
193  const MOL_SPTR_VECT & getAgents() const { return this->m_agentTemplates; }
194  const MOL_SPTR_VECT & getProducts() const { return this->m_productTemplates; }
195 
196  MOL_SPTR_VECT::const_iterator beginReactantTemplates() const {
197  return this->m_reactantTemplates.begin();
198  }
199  MOL_SPTR_VECT::const_iterator endReactantTemplates() const {
200  return this->m_reactantTemplates.end();
201  }
202 
203  MOL_SPTR_VECT::const_iterator beginProductTemplates() const {
204  return this->m_productTemplates.begin();
205  }
206  MOL_SPTR_VECT::const_iterator endProductTemplates() const {
207  return this->m_productTemplates.end();
208  }
209 
210  MOL_SPTR_VECT::const_iterator beginAgentTemplates() const {
211  return this->m_agentTemplates.begin();
212  }
213  MOL_SPTR_VECT::const_iterator endAgentTemplates() const {
214  return this->m_agentTemplates.end();
215  }
216 
217  MOL_SPTR_VECT::iterator beginReactantTemplates() {
218  return this->m_reactantTemplates.begin();
219  }
220  MOL_SPTR_VECT::iterator endReactantTemplates() {
221  return this->m_reactantTemplates.end();
222  }
223 
224  MOL_SPTR_VECT::iterator beginProductTemplates() {
225  return this->m_productTemplates.begin();
226  }
227  MOL_SPTR_VECT::iterator endProductTemplates() {
228  return this->m_productTemplates.end();
229  }
230 
231  MOL_SPTR_VECT::iterator beginAgentTemplates() {
232  return this->m_agentTemplates.begin();
233  }
234  MOL_SPTR_VECT::iterator endAgentTemplates() {
235  return this->m_agentTemplates.end();
236  }
237  unsigned int getNumReactantTemplates() const { return this->m_reactantTemplates.size(); };
238  unsigned int getNumProductTemplates() const { return this->m_productTemplates.size(); };
239  unsigned int getNumAgentTemplates() const { return this->m_agentTemplates.size(); };
240 
241  //! initializes our internal reactant-matching datastructures.
242  /*!
243  This must be called after adding reactants and before calling
244  runReactants.
245  */
246  void initReactantMatchers();
247 
248  bool isInitialized() const { return !df_needsInit; };
249 
250  //! validates the reactants and products to make sure the reaction seems "reasonable"
251  /*!
252  \return true if the reaction validates without errors (warnings do not stop
253  validation)
254 
255  \param numWarnings: used to return the number of validation warnings
256  \param numErrors: used to return the number of validation errors
257 
258  \param silent: If this bool is true, no messages will be logged during the validation.
259  By default, validation problems are reported to the warning and error
260  logs depending on their severity.
261 
262  */
263  bool validate(unsigned int &numWarnings,unsigned int &numErrors,bool silent=false) const;
264 
265 
266  //! returns whether or not the reaction uses implicit
267  //! properties on the product atoms
268  /*!
269 
270  This toggles whether or not unspecified atomic properties in the
271  products are considered to be implicit and should be copied from
272  the actual reactants. This is necessary due to a semantic difference
273  between the "reaction SMARTS" approach and the MDL RXN
274  approach:
275  In "reaction SMARTS", this reaction:
276  [C:1]-[Br:2].[O-:3]>>[C:1]-[O:3].[Br-:2]
277  applied to [CH4+]Br should yield [CH4+]O
278  Something similar drawn in an rxn file, and applied to
279  [CH4+]Br should yield [CH3]O.
280  In rxn there is no charge on the product C because nothing is
281  specified in the rxn file; in "SMARTS" the charge from the
282  actual reactants is not *removed* because no charge is
283  specified in the reaction.
284 
285  */
286  bool getImplicitPropertiesFlag() const { return df_implicitProperties; };
287  //! sets the implicit properties flag. See the documentation for
288  //! getImplicitProertiesFlag() for a discussion of what this means.
289  void setImplicitPropertiesFlag(bool val) { df_implicitProperties=val; };
290 
291  private:
292  bool df_needsInit;
293  bool df_implicitProperties;
294  MOL_SPTR_VECT m_reactantTemplates,m_productTemplates,m_agentTemplates;
295  ChemicalReaction &operator=(const ChemicalReaction &); // disable assignment
296  };
297 
298  //! tests whether or not the molecule has a substructure match
299  //! to any of the reaction's reactants
300  //! the \c which argument is used to return which of the reactants
301  //! the molecule matches. If there's no match, it is equal to the number
302  //! of reactants on return
303  bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,const ROMol &mol,
304  unsigned int &which);
305  //! \overload
306  bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn,const ROMol &mol);
307 
308  //! tests whether or not the molecule has a substructure match
309  //! to any of the reaction's products
310  //! the \c which argument is used to return which of the products
311  //! the molecule matches. If there's no match, it is equal to the number
312  //! of products on return
313  bool isMoleculeProductOfReaction(const ChemicalReaction &rxn,const ROMol &mol,
314  unsigned int &which);
315  //! \overload
316  bool isMoleculeProductOfReaction(const ChemicalReaction &rxn,const ROMol &mol);
317 
318  //! tests whether or not the molecule has a substructure match
319  //! to any of the reaction's agents
320  //! the \c which argument is used to return which of the agents
321  //! the molecule matches. If there's no match, it is equal to the number
322  //! of agents on return
323  bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn,const ROMol &mol,
324  unsigned int &which);
325  //! \overload
326  bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn,const ROMol &mol);
327 
328  //! returns indices of the atoms in each reactant that are changed
329  //! in the reaction
330  /*!
331  \param rxn the reaction we are interested in
332 
333  \param mappedAtomsOnly if set, atoms that are not mapped will not be included in
334  the list of changed atoms (otherwise they are automatically included)
335 
336  How are changed atoms recognized?
337  1) Atoms whose degree changes
338  2) Atoms whose bonding pattern changes
339  3) unmapped atoms (unless the mappedAtomsOnly flag is set)
340  4) Atoms connected to unmapped atoms
341  5) Atoms whose atomic number changes (unless the
342  corresponding product atom is a dummy)
343  6) Atoms with more than one atomic number query (unless the
344  corresponding product atom is a dummy)
345 
346  Note that the atomic number of a query atom depends on how it's constructed.
347  When coming from SMARTS: if the first query is an atomic label/number that
348  sets the atomic number, otherwise it's zero.
349  For example [O;$(OC)] is atomic number 8 while [$(OC);O] is atomic
350  number 0.
351  When coming from RXN: the atomic number of the atom in the rxn file sets
352  the value.
353  */
354  VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn,bool mappedAtomsOnly=false);
355 
356  //! add the recursive queries to the reactants of a reaction
357  /*!
358  This does its work using RDKit::addRecursiveQueries()
359 
360  \param rxn the reaction we are interested in
361  \param queries - the dictionary of named queries to add
362  \param propName - the atom property to use to get query names
363  optional:
364  \param reactantLabels - to store pairs of (atom index, query string)
365  per reactant
366 
367  NOTES:
368  - existing query information, if present, will be supplemented (AND logic)
369  - non-query atoms will be replaced with query atoms using only the query
370  logic
371  - query names can be present as comma separated lists, they will then
372  be combined using OR logic.
373  - throws a KeyErrorException if a particular query name is not present
374  in \c queries
375 
376  */
378  const std::map<std::string,ROMOL_SPTR> &queries,
379  std::string propName,
380  std::vector<std::vector<std::pair<unsigned int,std::string> > > *reactantLabels=NULL);
381 
382 } // end of RDKit namespace
383 
384 namespace RDDepict {
385  //! \brief Generate 2D coordinates (a depiction) for a reaction
386  /*!
387 
388  \param rxn the reaction we are interested in
389 
390  \param spacing the spacing between components of the reaction
391 
392  \param updateProps if set, properties such as conjugation and
393  hybridization will be calculated for the reactant and product
394  templates before generating coordinates. This should result in
395  better depictions, but can lead to errors in some cases.
396 
397  \param canonOrient canonicalize the orientation so that the long
398  axes align with the x-axis etc.
399 
400  \param nFlipsPerSample - the number of rotatable bonds that are
401  flipped at random for each sample
402 
403  \param nSamples - the number of samples
404 
405  \param sampleSeed - seed for the random sampling process
406 
407  \param permuteDeg4Nodes - try permuting the drawing order of bonds around
408  atoms with four neighbors in order to improve the depiction
409 
410  for the other parameters see the documentation for compute2DCoords()
411 
412  */
414  double spacing=2.0,
415  bool updateProps=true,
416  bool canonOrient=false,
417  unsigned int nFlipsPerSample=0,
418  unsigned int nSamples=0,
419  int sampleSeed=0,
420  bool permuteDeg4Nodes=false);
421 
422 } // end of RDDepict namespace
423 
424 #endif
MOL_SPTR_VECT::iterator beginProductTemplates()
Definition: Reaction.h:224
MOL_SPTR_VECT::const_iterator endAgentTemplates() const
Definition: Reaction.h:213
unsigned int getNumProductTemplates() const
Definition: Reaction.h:238
unsigned int getNumReactantTemplates() const
Definition: Reaction.h:237
bool getImplicitPropertiesFlag() const
Definition: Reaction.h:286
bool isMoleculeReactantOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
const MOL_SPTR_VECT & getProducts() const
Definition: Reaction.h:194
const char * message() const
get the error message
Definition: Reaction.h:50
unsigned int getNumAgentTemplates() const
Definition: Reaction.h:239
VECT_INT_VECT getReactingAtoms(const ChemicalReaction &rxn, bool mappedAtomsOnly=false)
ChemicalReaction(const ChemicalReaction &other)
Definition: Reaction.h:118
void setImplicitPropertiesFlag(bool val)
Definition: Reaction.h:289
bool isInitialized() const
Definition: Reaction.h:248
unsigned int addAgentTemplate(ROMOL_SPTR mol)
Adds a new agent template.
Definition: Reaction.h:144
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:113
pulls in the core RDKit functionality
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
MOL_SPTR_VECT::iterator endProductTemplates()
Definition: Reaction.h:227
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:160
MOL_SPTR_VECT::iterator beginReactantTemplates()
Definition: Reaction.h:217
bool isMoleculeAgentOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
MOL_SPTR_VECT::const_iterator endReactantTemplates() const
Definition: Reaction.h:199
MOL_SPTR_VECT::const_iterator endProductTemplates() const
Definition: Reaction.h:206
boost::shared_ptr< ROMol > ROMOL_SPTR
MOL_SPTR_VECT::const_iterator beginAgentTemplates() const
Definition: Reaction.h:210
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
used to indicate an error in the chemical reaction engine
Definition: Reaction.h:43
const MOL_SPTR_VECT & getAgents() const
Definition: Reaction.h:193
unsigned int addProductTemplate(ROMOL_SPTR mol)
Adds a new product template.
Definition: Reaction.h:154
bool isMoleculeProductOfReaction(const ChemicalReaction &rxn, const ROMol &mol, unsigned int &which)
unsigned int addReactantTemplate(ROMOL_SPTR mol)
Adds a new reactant template.
Definition: Reaction.h:133
MOL_SPTR_VECT::const_iterator beginReactantTemplates() const
Definition: Reaction.h:196
MOL_SPTR_VECT::const_iterator beginProductTemplates() const
Definition: Reaction.h:203
void compute2DCoordsForReaction(RDKit::ChemicalReaction &rxn, double spacing=2.0, bool updateProps=true, bool canonOrient=false, unsigned int nFlipsPerSample=0, unsigned int nSamples=0, int sampleSeed=0, bool permuteDeg4Nodes=false)
Generate 2D coordinates (a depiction) for a reaction.
ChemicalReactionException(const std::string msg)
construct with an error message
Definition: Reaction.h:48
const MOL_SPTR_VECT & getReactants() const
Definition: Reaction.h:192
MOL_SPTR_VECT::iterator endReactantTemplates()
Definition: Reaction.h:220
handles pickling (serializing) reactions
MOL_SPTR_VECT::iterator beginAgentTemplates()
Definition: Reaction.h:231
void addRecursiveQueriesToReaction(ChemicalReaction &rxn, const std::map< std::string, ROMOL_SPTR > &queries, std::string propName, std::vector< std::vector< std::pair< unsigned int, std::string > > > *reactantLabels=NULL)
add the recursive queries to the reactants of a reaction
MOL_SPTR_VECT::iterator endAgentTemplates()
Definition: Reaction.h:234
ChemicalReactionException(const char *msg)
construct with an error message
Definition: Reaction.h:46