RDKit
Open-source cheminformatics and machine learning.
Embedder.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2017 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_EMBEDDER_H_GUARD
13 #define RD_EMBEDDER_H_GUARD
14 
15 #include <map>
16 #include <Geometry/point.h>
17 #include <GraphMol/ROMol.h>
18 
19 namespace RDKit {
20 namespace DGeomHelpers {
21 
22 //! Parameter object for controlling embedding
23 /*!
24  numConfs Number of conformations to be generated
25 
26  numThreads Sets the number of threads to use (more than one thread
27  will only be used if the RDKit was build with multithread
28  support) If set to zero, the max supported by the system will
29  be used.
30 
31  maxIterations Max. number of times the embedding will be tried if
32  coordinates are not obtained successfully. The default
33  value is 10x the number of atoms.
34 
35  randomSeed provides a seed for the random number generator (so that
36  the same coordinates can be obtained for a
37  molecule on multiple runs) If -1, the
38  RNG will not be seeded.
39 
40  clearConfs Clear all existing conformations on the molecule
41 
42  useRandomCoords Start the embedding from random coordinates instead of
43  using eigenvalues of the distance matrix.
44 
45  boxSizeMult Determines the size of the box that is used for
46  random coordinates. If this is a positive number, the
47  side length will equal the largest element of the distance
48  matrix times \c boxSizeMult. If this is a negative number,
49  the side length will equal \c -boxSizeMult (i.e. independent
50  of the elements of the distance matrix).
51 
52  randNegEig Picks coordinates at random when a embedding process produces
53  negative eigenvalues
54 
55  numZeroFail Fail embedding if we find this many or more zero eigenvalues
56  (within a tolerance)
57 
58  pruneRmsThresh Retain only the conformations out of 'numConfs' after
59  embedding that are at least this far apart from each other.
60  RMSD is computed on the heavy atoms.
61  Prunining is greedy; i.e. the first embedded conformation is
62  retained and from then on only those that are at least
63  \c pruneRmsThresh away from already
64  retained conformations are kept. The pruning is done
65  after embedding and bounds violation minimization.
66  No pruning by default.
67 
68  coordMap a map of int to Point3D, between atom IDs and their locations
69  their locations. If this container is provided, the
70  coordinates are used to set distance constraints on the
71  embedding. The resulting conformer(s) should have distances
72  between the specified atoms that reproduce those between the
73  points in \c coordMap. Because the embedding produces a
74  molecule in an arbitrary reference frame, an alignment step
75  is required to actually reproduce the provided coordinates.
76 
77  optimizerForceTol set the tolerance on forces in the DGeom optimizer
78  (this shouldn't normally be altered in client code).
79 
80  ignoreSmoothingFailures try to embed the molecule even if triangle bounds
81  smoothing fails
82 
83  enforceChirality enforce the correct chirality if chiral centers are present
84 
85  useExpTorsionAnglePrefs impose experimental torsion-angle preferences
86 
87  useBasicKnowledge impose "basic knowledge" terms such as flat
88  aromatic rings, ketones, etc.
89 
90  ETversion version of the experimental torsion-angle preferences
91 
92  verbose print output of experimental torsion-angle preferences
93 
94  basinThresh set the basin threshold for the DGeom force field,
95  (this shouldn't normally be altered in client code).
96 
97  onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
98 */
100  unsigned int maxIterations;
105  double boxSizeMult;
107  unsigned int numZeroFail;
108  const std::map<int, RDGeom::Point3D> *coordMap;
114  bool verbose;
115  double basinThresh;
118  unsigned int ETversion;
120  : maxIterations(0),
121  numThreads(1),
122  randomSeed(-1),
123  clearConfs(true),
124  useRandomCoords(false),
125  boxSizeMult(2.0),
126  randNegEig(true),
127  numZeroFail(1),
128  coordMap(NULL),
129  optimizerForceTol(1e-3),
130  ignoreSmoothingFailures(false),
131  enforceChirality(true),
132  useExpTorsionAnglePrefs(false),
133  useBasicKnowledge(false),
134  verbose(false),
135  basinThresh(5.0),
136  pruneRmsThresh(-1.0),
137  onlyHeavyAtomsForRMS(false),
138  ETversion(1){};
139  EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed,
140  bool clearConfs, bool useRandomCoords, double boxSizeMult,
141  bool randNegEig, unsigned int numZeroFail,
142  const std::map<int, RDGeom::Point3D> *coordMap,
143  double optimizerForceTol, bool ignoreSmoothingFailures,
144  bool enforceChirality, bool useExpTorsionAnglePrefs,
145  bool useBasicKnowledge, bool verbose, double basinThresh,
146  double pruneRmsThresh, bool onlyHeavyAtomsForRMS,
147  unsigned int ETversion = 1)
148  : maxIterations(maxIterations),
149  numThreads(numThreads),
150  randomSeed(randomSeed),
151  clearConfs(clearConfs),
152  useRandomCoords(useRandomCoords),
153  boxSizeMult(boxSizeMult),
154  randNegEig(randNegEig),
155  numZeroFail(numZeroFail),
156  coordMap(coordMap),
157  optimizerForceTol(optimizerForceTol),
158  ignoreSmoothingFailures(ignoreSmoothingFailures),
159  enforceChirality(enforceChirality),
160  useExpTorsionAnglePrefs(useExpTorsionAnglePrefs),
161  useBasicKnowledge(useBasicKnowledge),
162  verbose(verbose),
163  basinThresh(basinThresh),
164  pruneRmsThresh(pruneRmsThresh),
165  onlyHeavyAtomsForRMS(onlyHeavyAtomsForRMS),
166  ETversion(ETversion){};
167 };
168 
169 //*! Embed multiple conformations for a molecule
170 RDKIT_DISTGEOMHELPERS_EXPORT void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs,
171  const EmbedParameters &params);
172 inline INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs,
173  const EmbedParameters &params) {
174  INT_VECT res;
175  EmbedMultipleConfs(mol, res, numConfs, params);
176  return res;
177 }
178 
179 //! Compute an embedding (in 3D) for the specified molecule using Distance
180 // Geometry
181 inline int EmbedMolecule(ROMol &mol, const EmbedParameters &params) {
182  INT_VECT confIds;
183  EmbedMultipleConfs(mol, confIds, 1, params);
184 
185  int res;
186  if (confIds.size()) {
187  res = confIds[0];
188  } else {
189  res = -1;
190  }
191  return res;
192 }
193 
194 //! Compute an embedding (in 3D) for the specified molecule using Distance
195 // Geometry
196 /*!
197  The following operations are performed (in order) here:
198  -# Build a distance bounds matrix based on the topology, including 1-5
199  distances but not VDW scaling
200  -# Triangle smooth this bounds matrix
201  -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
202  scaling, and repeat step 2
203  -# Pick a distance matrix at random using the bounds matrix
204  -# Compute initial coordinates from the distance matrix
205  -# Repeat steps 3 and 4 until maxIterations is reached or embedding is
206  successful
207  -# Adjust initial coordinates by minimizing a Distance Violation error
208  function
209 
210  **NOTE**: if the molecule has multiple fragments, they will be embedded
211  separately,
212  this means that they will likely occupy the same region of space.
213 
214  \param mol Molecule of interest
215  \param maxIterations Max. number of times the embedding will be tried if
216  coordinates are not obtained successfully. The default
217  value is 10x the number of atoms.
218  \param seed provides a seed for the random number generator (so that
219  the same coordinates can be obtained for a molecule on
220  multiple runs). If negative, the RNG will not be seeded.
221  \param clearConfs Clear all existing conformations on the molecule
222  \param useRandomCoords Start the embedding from random coordinates instead of
223  using eigenvalues of the distance matrix.
224  \param boxSizeMult Determines the size of the box that is used for
225  random coordinates. If this is a positive number, the
226  side length will equal the largest element of the
227  distance matrix times \c boxSizeMult. If this is a
228  negative number, the side length will equal
229  \c -boxSizeMult (i.e. independent of the elements of the
230  distance matrix).
231  \param randNegEig Picks coordinates at random when a embedding process
232  produces negative eigenvalues
233  \param numZeroFail Fail embedding if we find this many or more zero
234  eigenvalues (within a tolerance)
235  \param coordMap a map of int to Point3D, between atom IDs and their locations
236  their locations. If this container is provided, the
237  coordinates are used to set distance constraints on the
238  embedding. The resulting conformer(s) should have distances
239  between the specified atoms that reproduce those between the
240  points in \c coordMap. Because the embedding produces a
241  molecule in an arbitrary reference frame, an alignment step
242  is required to actually reproduce the provided coordinates.
243  \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
244  (this shouldn't normally be altered in client code).
245  \param ignoreSmoothingFailures try to embed the molecule even if triangle
246  bounds smoothing fails
247  \param enforceChirality enforce the correct chirality if chiral centers are
248  present
249  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
250  \param useBasicKnowledge impose "basic knowledge" terms such as flat
251  aromatic rings, ketones, etc.
252  \param verbose print output of experimental torsion-angle preferences
253 
254  \param basinThresh set the basin threshold for the DGeom force field,
255  (this shouldn't normally be altered in client code).
256 
257  \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
258 
259  \return ID of the conformations added to the molecule, -1 if the emdedding
260  failed
261 */
262 inline int EmbedMolecule(ROMol &mol, unsigned int maxIterations = 0,
263  int seed = -1, bool clearConfs = true,
264  bool useRandomCoords = false, double boxSizeMult = 2.0,
265  bool randNegEig = true, unsigned int numZeroFail = 1,
266  const std::map<int, RDGeom::Point3D> *coordMap = 0,
267  double optimizerForceTol = 1e-3,
268  bool ignoreSmoothingFailures = false,
269  bool enforceChirality = true,
270  bool useExpTorsionAnglePrefs = false,
271  bool useBasicKnowledge = false, bool verbose = false,
272  double basinThresh = 5.0,
273  bool onlyHeavyAtomsForRMS = false) {
274  EmbedParameters params(
275  maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
276  randNegEig, numZeroFail, coordMap, optimizerForceTol,
277  ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
278  useBasicKnowledge, verbose, basinThresh, -1.0, onlyHeavyAtomsForRMS);
279  return EmbedMolecule(mol, params);
280 };
281 
282 //*! Embed multiple conformations for a molecule
283 /*!
284  This is kind of equivalent to calling EmbedMolecule multiple times - just that
285  the bounds
286  matrix is computed only once from the topology
287 
288  **NOTE**: if the molecule has multiple fragments, they will be embedded
289  separately,
290  this means that they will likely occupy the same region of space.
291 
292 
293  \param mol Molecule of interest
294  \param res Used to return the resulting conformer ids
295  \param numConfs Number of conformations to be generated
296  \param numThreads Sets the number of threads to use (more than one thread
297  will only be used if the RDKit was build with
298  multithread
299  support). If set to zero, the max supported by the
300  system
301  will be used.
302  \param maxIterations Max. number of times the embedding will be tried if
303  coordinates are not obtained successfully. The default
304  value is 10x the number of atoms.
305  \param seed provides a seed for the random number generator (so that
306  the same coordinates can be obtained for a molecule on
307  multiple runs). If negative, the RNG will not be seeded.
308  \param clearConfs Clear all existing conformations on the molecule
309  \param useRandomCoords Start the embedding from random coordinates instead of
310  using eigenvalues of the distance matrix.
311  \param boxSizeMult Determines the size of the box that is used for
312  random coordinates. If this is a positive number, the
313  side length will equal the largest element of the
314  distance matrix times \c boxSizeMult. If this is a
315  negative number, the side length will equal
316  \c -boxSizeMult (i.e. independent of the elements of the
317  distance matrix).
318  \param randNegEig Picks coordinates at random when a embedding process
319  produces negative eigenvalues
320  \param numZeroFail Fail embedding if we find this many or more zero
321  eigenvalues (within a tolerance)
322  \param pruneRmsThresh Retain only the conformations out of 'numConfs' after
323  embedding that are at least this far apart from each
324  other. RMSD is computed on the heavy atoms.
325  Pruning is greedy; i.e. the first embedded conformation
326  is retained and from then on only those that are at
327  least
328  pruneRmsThresh away from already retained conformations
329  are kept. The pruning is done after embedding and
330  bounds violation minimization. No pruning by default.
331  \param coordMap a map of int to Point3D, between atom IDs and their locations
332  their locations. If this container is provided, the
333  coordinates are used to set distance constraints on the
334  embedding. The resulting conformer(s) should have distances
335  between the specified atoms that reproduce those between the
336  points in \c coordMap. Because the embedding produces a
337  molecule in an arbitrary reference frame, an alignment step
338  is required to actually reproduce the provided coordinates.
339  \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
340  (this shouldn't normally be altered in client code).
341  \param ignoreSmoothingFailures try to embed the molecule even if triangle
342  bounds smoothing fails
343  \param enforceChirality enforce the correct chirality if chiral centers are
344  present
345  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
346  \param useBasicKnowledge impose "basic knowledge" terms such as flat
347  aromatic rings, ketones, etc.
348  \param verbose print output of experimental torsion-angle preferences
349  \param basinThresh set the basin threshold for the DGeom force field,
350  (this shouldn't normally be altered in client code).
351  \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
352 
353 */
354 inline void EmbedMultipleConfs(
355  ROMol &mol, INT_VECT &res, unsigned int numConfs = 10, int numThreads = 1,
356  unsigned int maxIterations = 30, int seed = -1, bool clearConfs = true,
357  bool useRandomCoords = false, double boxSizeMult = 2.0,
358  bool randNegEig = true, unsigned int numZeroFail = 1,
359  double pruneRmsThresh = -1.0,
360  const std::map<int, RDGeom::Point3D> *coordMap = 0,
361  double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
362  bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
363  bool useBasicKnowledge = false, bool verbose = false,
364  double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false) {
365  EmbedParameters params(maxIterations, numThreads, seed, clearConfs,
366  useRandomCoords, boxSizeMult, randNegEig, numZeroFail,
367  coordMap, optimizerForceTol, ignoreSmoothingFailures,
368  enforceChirality, useExpTorsionAnglePrefs,
369  useBasicKnowledge, verbose, basinThresh,
370  pruneRmsThresh, onlyHeavyAtomsForRMS);
371  EmbedMultipleConfs(mol, res, numConfs, params);
372 };
373 //! \overload
375  ROMol &mol, unsigned int numConfs = 10, unsigned int maxIterations = 30,
376  int seed = -1, bool clearConfs = true, bool useRandomCoords = false,
377  double boxSizeMult = 2.0, bool randNegEig = true,
378  unsigned int numZeroFail = 1, double pruneRmsThresh = -1.0,
379  const std::map<int, RDGeom::Point3D> *coordMap = 0,
380  double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
381  bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
382  bool useBasicKnowledge = false, bool verbose = false,
383  double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false) {
384  EmbedParameters params(maxIterations, 1, seed, clearConfs, useRandomCoords,
385  boxSizeMult, randNegEig, numZeroFail, coordMap,
386  optimizerForceTol, ignoreSmoothingFailures,
387  enforceChirality, useExpTorsionAnglePrefs,
388  useBasicKnowledge, verbose, basinThresh,
389  pruneRmsThresh, onlyHeavyAtomsForRMS);
390  INT_VECT res;
391  EmbedMultipleConfs(mol, res, numConfs, params);
392  return res;
393 };
394 
395 //! Parameters corresponding to Sereina Riniker's KDG approach
397 //! Parameters corresponding to Sereina Riniker's ETDG approach
399 //! Parameters corresponding to Sereina Riniker's ETKDG approach
401 //! Parameters corresponding to Sereina Riniker's ETKDG approach - version 2
403 }
404 }
405 
406 #endif
int EmbedMolecule(ROMol &mol, const EmbedParameters &params)
Compute an embedding (in 3D) for the specified molecule using Distance.
Definition: Embedder.h:181
Defines the primary molecule class ROMol as well as associated typedefs.
const std::map< int, RDGeom::Point3D > * coordMap
Definition: Embedder.h:108
RDKIT_DISTGEOMHELPERS_EXPORT void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs, const EmbedParameters &params)
Parameter object for controlling embedding.
Definition: Embedder.h:99
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDGv2
Parameters corresponding to Sereina Riniker&#39;s ETKDG approach - version 2.
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETKDG
Parameters corresponding to Sereina Riniker&#39;s ETKDG approach.
std::vector< int > INT_VECT
Definition: types.h:247
EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed, bool clearConfs, bool useRandomCoords, double boxSizeMult, bool randNegEig, unsigned int numZeroFail, const std::map< int, RDGeom::Point3D > *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh, double pruneRmsThresh, bool onlyHeavyAtomsForRMS, unsigned int ETversion=1)
Definition: Embedder.h:139
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters ETDG
Parameters corresponding to Sereina Riniker&#39;s ETDG approach.
Std stuff.
Definition: Atom.h:30
#define RDKIT_DISTGEOMHELPERS_EXPORT
Definition: export.h:151
RDKIT_DISTGEOMHELPERS_EXPORT const EmbedParameters KDG
Parameters corresponding to Sereina Riniker&#39;s KDG approach.