RDKit
Open-source cheminformatics and machine learning.
Embedder.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_EMBEDDER_H_
12 #define _RD_EMBEDDER_H_
13 
14 #include <map>
15 #include <Geometry/point.h>
16 #include <GraphMol/ROMol.h>
17 
18 namespace RDKit {
19  namespace DGeomHelpers {
20 
21  //! Compute an embedding (in 3D) for the specified molecule using Distance Geometry
22  /*!
23  The following operations are performed (in order) here:
24  -# Build a distance bounds matrix based on the topology, including 1-5
25  distances but not VDW scaling
26  -# Triangle smooth this bounds matrix
27  -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
28  scaling, and repeat step 2
29  -# Pick a distance matrix at random using the bounds matrix
30  -# Compute initial coordinates from the distance matrix
31  -# Repeat steps 3 and 4 until maxIterations is reached or embedding is successful
32  -# Adjust initial coordinates by minimizing a Distance Violation error function
33 
34  **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
35  this means that they will likely occupy the same region of space.
36 
37  \param mol Molecule of interest
38  \param maxIterations Max. number of times the embedding will be tried if coordinates are
39  not obtained successfully. The default value is 10x the number of atoms.
40  \param seed provides a seed for the random number generator (so that the same
41  coordinates can be obtained for a molecule on multiple runs)
42  If negative, the RNG will not be seeded.
43  \param clearConfs Clear all existing conformations on the molecule
44  \param useRandomCoords Start the embedding from random coordinates instead of
45  using eigenvalues of the distance matrix.
46  \param boxSizeMult Determines the size of the box that is used for
47  random coordinates. If this is a positive number, the
48  side length will equal the largest element of the distance
49  matrix times \c boxSizeMult. If this is a negative number,
50  the side length will equal \c -boxSizeMult (i.e. independent
51  of the elements of the distance matrix).
52  \param randNegEig Picks coordinates at random when a embedding process produces
53  negative eigenvalues
54  \param numZeroFail Fail embedding if we find this many or more zero eigenvalues
55  (within a tolerance)
56  \param coordMap a map of int to Point3D, between atom IDs and their locations
57  their locations. If this container is provided, the coordinates
58  are used to set distance constraints on the embedding. The resulting
59  conformer(s) should have distances between the specified atoms that
60  reproduce those between the points in \c coordMap. Because the embedding
61  produces a molecule in an arbitrary reference frame, an alignment step
62  is required to actually reproduce the provided coordinates.
63  \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
64  (this shouldn't normally be altered in client code).
65  \param ignoreSmoothingFailures try to embed the molecule even if triangle bounds
66  smoothing fails
67  \param basinThresh set the basin threshold for the DGeom force field,
68  (this shouldn't normally be altered in client code).
69 
70  \return ID of the conformations added to the molecule, -1 if the emdedding failed
71  */
72  int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1,
73  bool clearConfs=true,
74  bool useRandomCoords=false,double boxSizeMult=2.0,
75  bool randNegEig=true,
76  unsigned int numZeroFail=1,
77  const std::map<int,RDGeom::Point3D> *coordMap=0,
78  double optimizerForceTol=1e-3,
79  bool ignoreSmoothingFailures=false,
80  double basinThresh=5.0
81  );
82 
83  //*! Embed multiple conformations for a molecule
84  /*!
85  This is kind of equivalent to calling EmbedMolecule multiple times - just that the bounds
86  matrix is computed only once from the topology
87 
88  **NOTE**: if the molecule has multiple fragments, they will be embedded separately,
89  this means that they will likely occupy the same region of space.
90 
91 
92  \param mol Molecule of interest
93  \param res Used to return the resulting conformer ids
94  \param numConfs Number of conformations to be generated
95  \param numThreads Sets the number of threads to use (more than one thread will only
96  be used if the RDKit was build with multithread support)
97  \param maxIterations Max. number of times the embedding will be tried if coordinates are
98  not obtained successfully. The default value is 10x the number of atoms.
99  \param seed provides a seed for the random number generator (so that the same
100  coordinates can be obtained for a molecule on multiple runs).
101  If negative, the RNG will not be seeded.
102  \param clearConfs Clear all existing conformations on the molecule
103  \param useRandomCoords Start the embedding from random coordinates instead of
104  using eigenvalues of the distance matrix.
105  \param boxSizeMult Determines the size of the box that is used for
106  random coordinates. If this is a positive number, the
107  side length will equal the largest element of the distance
108  matrix times \c boxSizeMult. If this is a negative number,
109  the side length will equal \c -boxSizeMult (i.e. independent
110  of the elements of the distance matrix).
111  \param randNegEig Picks coordinates at random when a embedding process produces
112  negative eigenvalues
113  \param numZeroFail Fail embedding if we find this many or more zero eigenvalues
114  (within a tolerance)
115  \param pruneRmsThresh Retain only the conformations out of 'numConfs' after embedding that are
116  at least this far apart from each other. RMSD is computed on the heavy atoms.
117  Prunining is greedy; i.e. the first embedded conformation is retained and from
118  then on only those that are atleast pruneRmsThresh away from already
119  retained conformations are kept. The pruning is done after embedding and
120  bounds violation minimization. No pruning by default.
121  \param coordMap a map of int to Point3D, between atom IDs and their locations
122  their locations. If this container is provided, the coordinates
123  are used to set distance constraints on the embedding. The resulting
124  conformer(s) should have distances between the specified atoms that
125  reproduce those between the points in \c coordMap. Because the embedding
126  produces a molecule in an arbitrary reference frame, an alignment step
127  is required to actually reproduce the provided coordinates.
128 
129  \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
130  (this shouldn't normally be altered in client code).
131 
132  \param ignoreSmoothingFailures try to embed the molecule even if triangle bounds
133  smoothing fails
134 
135  \param basinThresh set the basin threshold for the DGeom force field,
136  (this shouldn't normally be altered in client code).
137 
138 
139 
140  */
141  void EmbedMultipleConfs(ROMol &mol,
142  INT_VECT &res,
143  unsigned int numConfs=10,
144  int numThreads=1,
145  unsigned int maxIterations=30,
146  int seed=-1, bool clearConfs=true,
147  bool useRandomCoords=false,double boxSizeMult=2.0,
148  bool randNegEig=true, unsigned int numZeroFail=1,
149  double pruneRmsThresh=-1.0,
150  const std::map<int,RDGeom::Point3D> *coordMap=0,
151  double optimizerForceTol=1e-3,
152  bool ignoreSmoothingFailures=false,
153  double basinThresh=5.0);
154  //! \overload
155  INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs=10,
156  unsigned int maxIterations=30,
157  int seed=-1, bool clearConfs=true,
158  bool useRandomCoords=false,double boxSizeMult=2.0,
159  bool randNegEig=true, unsigned int numZeroFail=1,
160  double pruneRmsThresh=-1.0,
161  const std::map<int,RDGeom::Point3D> *coordMap=0,
162  double optimizerForceTol=1e-3,
163  bool ignoreSmoothingFailures=false,
164  double basinThresh=5.0);
165 
166  }
167 }
168 
169 #endif
Defines the primary molecule class ROMol as well as associated typedefs.
int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1, bool clearConfs=true, bool useRandomCoords=false, double boxSizeMult=2.0, bool randNegEig=true, unsigned int numZeroFail=1, const std::map< int, RDGeom::Point3D > *coordMap=0, double optimizerForceTol=1e-3, bool ignoreSmoothingFailures=false, double basinThresh=5.0)
Compute an embedding (in 3D) for the specified molecule using Distance Geometry.
void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs=10, int numThreads=1, unsigned int maxIterations=30, int seed=-1, bool clearConfs=true, bool useRandomCoords=false, double boxSizeMult=2.0, bool randNegEig=true, unsigned int numZeroFail=1, double pruneRmsThresh=-1.0, const std::map< int, RDGeom::Point3D > *coordMap=0, double optimizerForceTol=1e-3, bool ignoreSmoothingFailures=false, double basinThresh=5.0)
std::vector< int > INT_VECT
Definition: types.h:146
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28