RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2017 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
22 #include <RDGeneral/types.h>
23 
24 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
25 namespace RDKit {
26 class ROMol;
27 class RWMol;
28 class Atom;
29 class Bond;
30 class Conformer;
31 typedef std::vector<double> INVAR_VECT;
32 typedef INVAR_VECT::iterator INVAR_VECT_I;
33 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
34 
35 //! \brief Groups a variety of molecular query and transformation operations.
36 namespace MolOps {
37 
38 //! return the number of electrons available on an atom to donate for
39 // aromaticity
40 /*!
41  The result is determined using the default valency, number of lone pairs,
42  number of bonds and the formal charge. Note that the atom may not donate
43  all of these electrons to a ring for aromaticity (also used in Conjugation
44  and hybridization code).
45 
46  \param at the atom of interest
47 
48  \return the number of electrons
49 */
51 
52 //! sums up all atomic formal charges and returns the result
54 
55 //! returns whether or not the given Atom is involved in a conjugated bond
57 
58 //! find fragments (disconnected components of the molecular graph)
59 /*!
60 
61  \param mol the molecule of interest
62  \param mapping used to return the mapping of Atoms->fragments.
63  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
64  and will contain the fragment assignment for each Atom
65 
66  \return the number of fragments found.
67 
68 */
69 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
70  std::vector<int> &mapping);
71 //! find fragments (disconnected components of the molecular graph)
72 /*!
73 
74  \param mol the molecule of interest
75  \param frags used to return the Atoms in each fragment
76  On return \c mapping will be \c numFrags long, and each entry
77  will contain the indices of the Atoms in that fragment.
78 
79  \return the number of fragments found.
80 
81 */
83  const ROMol &mol, std::vector<std::vector<int>> &frags);
84 
85 //! splits a molecule into its component fragments
86 // (disconnected components of the molecular graph)
87 /*!
88 
89  \param mol the molecule of interest
90  \param sanitizeFrags toggles sanitization of the fragments after
91  they are built
92  \param frags used to return the mapping of Atoms->fragments.
93  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
94  on return and will contain the fragment assignment for each Atom
95  \param fragsMolAtomMapping used to return the Atoms in each fragment
96  On return \c mapping will be \c numFrags long, and each entry
97  will contain the indices of the Atoms in that fragment.
98  \param copyConformers toggles copying conformers of the fragments after
99  they are built
100  \return a vector of the fragments as smart pointers to ROMols
101 
102 */
103 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
104  const ROMol &mol, bool sanitizeFrags = true, std::vector<int> *frags = 0,
105  std::vector<std::vector<int>> *fragsMolAtomMapping = 0,
106  bool copyConformers = true);
107 
108 //! splits a molecule into pieces based on labels assigned using a query
109 /*!
110 
111  \param mol the molecule of interest
112  \param query the query used to "label" the molecule for fragmentation
113  \param sanitizeFrags toggles sanitization of the fragments after
114  they are built
115  \param whiteList if provided, only labels in the list will be kept
116  \param negateList if true, the white list logic will be inverted: only labels
117  not in the list will be kept
118 
119  \return a map of the fragments and their labels
120 
121 */
122 template <typename T>
123 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
124 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
125  bool sanitizeFrags = true,
126  const std::vector<T> *whiteList = 0,
127  bool negateList = false);
128 
129 #if 0
130  //! finds a molecule's minimium spanning tree (MST)
131  /*!
132  \param mol the molecule of interest
133  \param mst used to return the MST as a vector of bond indices
134  */
135  RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
136 #endif
137 
138 //! calculates Balaban's J index for the molecule
139 /*!
140  \param mol the molecule of interest
141  \param useBO toggles inclusion of the bond order in the calculation
142  (when false, we're not really calculating the J value)
143  \param force forces the calculation (instead of using cached results)
144  \param bondPath when included, only paths using bonds whose indices occur
145  in this vector will be included in the calculation
146  \param cacheIt If this is true, the calculated value will be cached
147  as a property on the molecule
148  \return the J index
149 
150 */
152  const ROMol &mol, bool useBO = true, bool force = false,
153  const std::vector<int> *bondPath = 0, bool cacheIt = true);
154 //! \overload
155 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts);
156 
157 //! \name Dealing with hydrogens
158 //{@
159 
160 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
161 /*!
162  \param mol the molecule to add Hs to
163  \param explicitOnly (optional) if this \c true, only explicit Hs will be
164  added
165  \param addCoords (optional) If this is true, estimates for the atomic
166  coordinates
167  of the added Hs will be used.
168  \param onlyOnAtoms (optional) if provided, this should be a vector of
169  IDs of the atoms that will be considered for H addition.
170  \param addResidueInfo (optional) if this is true, add residue info to
171  hydrogen atoms (useful for PDB files).
172 
173  \return the new molecule
174 
175  <b>Notes:</b>
176  - it makes no sense to use the \c addCoords option if the molecule's
177  heavy
178  atoms don't already have coordinates.
179  - the caller is responsible for <tt>delete</tt>ing the pointer this
180  returns.
181  */
182 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
183  bool addCoords = false,
184  const UINT_VECT *onlyOnAtoms = NULL,
185  bool addResidueInfo = false);
186 //! \overload
187 // modifies the molecule in place
188 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
189  bool addCoords = false,
190  const UINT_VECT *onlyOnAtoms = NULL,
191  bool addResidueInfo = false);
192 
193 //! returns a copy of a molecule with hydrogens removed
194 /*!
195  \param mol the molecule to remove Hs from
196  \param implicitOnly (optional) if this \c true, only implicit Hs will be
197  removed
198  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
199  are removed
200  from the graph, the heavy atom to which they are bound will have its
201  counter of
202  explicit Hs increased.
203  \param sanitize: (optional) If this is \c true, the final molecule will be
204  sanitized
205 
206  \return the new molecule
207 
208  <b>Notes:</b>
209  - Hydrogens which aren't connected to a heavy atom will not be
210  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
211  all atoms removed.
212  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
213  will not be removed.
214  - two coordinate Hs, like the central H in C[H-]C, will not be removed
215  - Hs connected to dummy atoms will not be removed
216  - Hs that are part of the definition of double bond Stereochemistry
217  will not be removed
218  - Hs that are not connected to anything else will not be removed
219 
220  - the caller is responsible for <tt>delete</tt>ing the pointer this
221  returns.
222 */
224  bool implicitOnly = false,
225  bool updateExplicitCount = false,
226  bool sanitize = true);
227 //! \overload
228 // modifies the molecule in place
229 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
230  bool updateExplicitCount = false,
231  bool sanitize = true);
232 
233 //! returns a copy of a molecule with hydrogens removed and added as queries
234 //! to the heavy atoms to which they are bound.
235 /*!
236  This is really intended to be used with molecules that contain QueryAtoms
237 
238  \param mol the molecule to remove Hs from
239 
240  \return the new molecule
241 
242  <b>Notes:</b>
243  - Atoms that do not already have hydrogen count queries will have one
244  added, other H-related queries will not be touched. Examples:
245  - C[H] -> [C;!H0]
246  - [C;H1][H] -> [C;H1]
247  - [C;H2][H] -> [C;H2]
248  - Hydrogens which aren't connected to a heavy atom will not be
249  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
250  all atoms removed.
251  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
252  - By default all hydrogens are removed, however if
253  mergeUnmappedOnly is true, any hydrogen participating
254  in an atom map will be retained
255 
256 */
258  bool mergeUnmappedOnly = false);
259 //! \overload
260 // modifies the molecule in place
262  bool mergeUnmappedOnly = false);
263 
264 typedef enum {
271  ADJUST_IGNOREALL = 0xFFFFFFF
273 
275  bool adjustDegree; /**< add degree queries */
276  boost::uint32_t adjustDegreeFlags;
277  bool adjustRingCount; /**< add ring-count queries */
278  boost::uint32_t adjustRingCountFlags;
279 
280  bool makeDummiesQueries; /**< convert dummy atoms without isotope labels to
281  any-atom queries */
283  bool makeBondsGeneric; /**< convert bonds to generic queries (any bonds) */
284  boost::uint32_t makeBondsGenericFlags;
285  bool makeAtomsGeneric; /**< convert atoms to generic queries (any atoms) */
286  boost::uint32_t makeAtomsGenericFlags;
287  bool adjustHeavyDegree; /**< adjust the heavy-atom degree instead of overall
288  degree */
289  boost::uint32_t adjustHeavyDegreeFlags;
290  bool adjustRingChain; /**< add ring-chain queries */
291  boost::uint32_t adjustRingChainFlags;
292 
294  : adjustDegree(true),
295  adjustDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
296  adjustRingCount(false),
297  adjustRingCountFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
298  makeDummiesQueries(true),
299  aromatizeIfPossible(true),
300  makeBondsGeneric(false),
301  makeBondsGenericFlags(ADJUST_IGNORENONE),
302  makeAtomsGeneric(false),
303  makeAtomsGenericFlags(ADJUST_IGNORENONE),
304  adjustHeavyDegree(false),
305  adjustHeavyDegreeFlags(ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS),
306  adjustRingChain(false),
307  adjustRingChainFlags(ADJUST_IGNORENONE) {}
308 };
309 //! returns a copy of a molecule with query properties adjusted
310 /*!
311  \param mol the molecule to adjust
312  \param params controls the adjustments made
313 
314  \return the new molecule
315 */
317  const ROMol &mol, const AdjustQueryParameters *params = NULL);
318 //! \overload
319 // modifies the molecule in place
321  RWMol &mol, const AdjustQueryParameters *params = NULL);
322 
323 //! returns a copy of a molecule with the atoms renumbered
324 /*!
325 
326  \param mol the molecule to work with
327  \param newOrder the new ordering of the atoms (should be numAtoms long)
328  for example: if newOrder is [3,2,0,1], then atom 3 in the original
329  molecule will be atom 0 in the new one
330 
331  \return the new molecule
332 
333  <b>Notes:</b>
334  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
335 
336 */
338  const ROMol &mol, const std::vector<unsigned int> &newOrder);
339 
340 //@}
341 
342 //! \name Sanitization
343 //@{
344 
345 typedef enum {
357  SANITIZE_ALL = 0xFFFFFFF
358 } SanitizeFlags;
359 
360 //! \brief carries out a collection of tasks for cleaning up a molecule and
361 // ensuring
362 //! that it makes "chemical sense"
363 /*!
364  This functions calls the following in sequence
365  -# MolOps::cleanUp()
366  -# mol.updatePropertyCache()
367  -# MolOps::symmetrizeSSSR()
368  -# MolOps::Kekulize()
369  -# MolOps::assignRadicals()
370  -# MolOps::setAromaticity()
371  -# MolOps::setConjugation()
372  -# MolOps::setHybridization()
373  -# MolOps::cleanupChirality()
374  -# MolOps::adjustHs()
375 
376  \param mol : the RWMol to be cleaned
377 
378  \param operationThatFailed : the first (if any) sanitization operation that
379  fails is set here.
380  The values are taken from the \c SanitizeFlags
381  enum.
382  On success, the value is \c
383  SanitizeFlags::SANITIZE_NONE
384 
385  \param sanitizeOps : the bits here are used to set which sanitization
386  operations are carried
387  out. The elements of the \c SanitizeFlags enum define
388  the operations.
389 
390  <b>Notes:</b>
391  - If there is a failure in the sanitization, a \c SanitException
392  will be thrown.
393  - in general the user of this function should cast the molecule following
394  this
395  function to a ROMol, so that new atoms and bonds cannot be added to the
396  molecule and screw up the sanitizing that has been done here
397 */
399  unsigned int &operationThatFailed,
400  unsigned int sanitizeOps = SANITIZE_ALL);
401 //! \overload
403 
404 //! Possible aromaticity models
405 /*!
406 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
407 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
408 Book)
409 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
410 does not consider the outer envelope of fused rings)
411 - \c AROMATICITY_MDL
412 - \c AROMATICITY_CUSTOM uses a caller-provided function
413 */
414 typedef enum {
415  AROMATICITY_DEFAULT = 0x0, ///< future proofing
419  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
421 
422 //! Sets up the aromaticity for a molecule
423 /*!
424 
425  This is what happens here:
426  -# find all the simple rings by calling the findSSSR function
427  -# loop over all the Atoms in each ring and mark them if they are
428  candidates
429  for aromaticity. A ring atom is a candidate if it can spare electrons
430  to the ring and if it's from the first two rows of the periodic table.
431  -# based on the candidate atoms, mark the rings to be either candidates
432  or non-candidates. A ring is a candidate only if all its atoms are
433  candidates
434  -# apply Hueckel rule to each of the candidate rings to check if the ring
435  can be
436  aromatic
437 
438  \param mol the RWMol of interest
439  \param model the aromaticity model to use
440  \param func a custom function for assigning aromaticity (only used when
441  model=\c AROMATICITY_CUSTOM)
442 
443  \return >0 on success, <= 0 otherwise
444 
445  <b>Assumptions:</b>
446  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
447  been called)
448 
449 */
452  int (*func)(RWMol &) = NULL);
453 
454 //! Designed to be called by the sanitizer to handle special cases before
455 // anything is done.
456 /*!
457 
458  Currently this:
459  - modifies nitro groups, so that the nitrogen does not have an unreasonable
460  valence of 5, as follows:
461  - the nitrogen gets a positive charge
462  - one of the oxygens gets a negative chage and the double bond to this
463  oxygen is changed to a single bond
464  The net result is that nitro groups can be counted on to be:
465  \c "[N+](=O)[O-]"
466  - modifies halogen-oxygen containing species as follows:
467  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
468  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
469  \c [Cl,Br,I](=O)O -> [X+]([O-])O
470  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
471 
472  \param mol the molecule of interest
473 
474 */
476 
477 //! Called by the sanitizer to assign radical counts to atoms
479 
480 //! adjust the number of implicit and explicit Hs for special cases
481 /*!
482 
483  Currently this:
484  - modifies aromatic nitrogens so that, when appropriate, they have an
485  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
486 
487  \param mol the molecule of interest
488 
489  <b>Assumptions</b>
490  - this is called after the molecule has been sanitized,
491  aromaticity has been perceived, and the implicit valence of
492  everything has been calculated.
493 
494 */
496 
497 //! Kekulizes the molecule
498 /*!
499 
500  \param mol the molecule of interest
501  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
502  on both the Bonds and Atoms are turned to false
503  following
504  the Kekulization, otherwise they are left alone in
505  their
506  original state.
507  \param maxBackTracks the maximum number of attempts at back-tracking. The
508  algorithm
509  uses a back-tracking procedure to revist a previous
510  setting of
511  double bond if we hit a wall in the kekulization
512  process
513 
514  <b>Notes:</b>
515  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
516  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
517  RDKit::Bond::SINGLE
518  or RDKit::Bond::DOUBLE during Kekulization.
519 
520 */
521 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
522  unsigned int maxBackTracks = 100);
523 
524 //! flags the molecule's conjugated bonds
526 
527 //! calculates and sets the hybridization of all a molecule's Stoms
529 
530 // @}
531 
532 //! \name Ring finding and SSSR
533 //@{
534 
535 //! finds a molecule's Smallest Set of Smallest Rings
536 /*!
537  Currently this implements a modified form of Figueras algorithm
538  (JCICS - Vol. 36, No. 5, 1996, 986-991)
539 
540  \param mol the molecule of interest
541  \param res used to return the vector of rings. Each entry is a vector with
542  atom indices. This information is also stored in the molecule's
543  RingInfo structure, so this argument is optional (see overload)
544 
545  \return number of smallest rings found
546 
547  Base algorithm:
548  - The original algorithm starts by finding representative degree 2
549  nodes.
550  - Representative because if a series of deg 2 nodes are found only
551  one of them is picked.
552  - The smallest ring around each of them is found.
553  - The bonds that connect to this degree 2 node are them chopped off,
554  yielding
555  new deg two nodes
556  - The process is repeated on the new deg 2 nodes.
557  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
558  with it is found. A bond from this is "carefully" (look in the paper)
559  selected and chopped, yielding deg 2 nodes. The process is same as
560  above once this is done.
561 
562  Our Modifications:
563  - If available, more than one smallest ring around a representative deg 2
564  node will be computed and stored
565  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
566  available)
567  and all the bond to that node are chopped.
568  - The extra rings that were found in this process are removed after all the
569  nodes
570  have been covered.
571 
572  These changes were motivated by several factors:
573  - We believe the original algorithm fails to find the correct SSSR
574  (finds the correct number of them but the wrong ones) on some sample mols
575  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
576  The extra rings this process adds can be quite useful.
577 */
578 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol,
579  std::vector<std::vector<int>> &res);
580 //! \overload
581 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol,
582  std::vector<std::vector<int>> *res = 0);
583 
584 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
585 /*!
586  \b NOTE: though the RingInfo structure is populated by this function,
587  the only really reliable calls that can be made are to check if
588  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
589  return values >0
590 */
591 RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol);
592 
593 //! symmetrize the molecule's Smallest Set of Smallest Rings
594 /*!
595  SSSR rings obatined from "findSSSR" can be non-unique in some case.
596  For example, cubane has five SSSR rings, not six as one would hope.
597 
598  This function adds additional rings to the SSSR list if necessary
599  to make the list symmetric, e.g. all atoms in cubane will be part of the same
600  number
601  of SSSRs. This function choses these extra rings from the extra rings
602  computed
603  and discarded during findSSSR. The new ring are chosen such that:
604  - replacing a same sized ring in the SSSR list with an extra ring yields
605  the same union of bond IDs as the orignal SSSR list
606 
607  \param mol - the molecule of interest
608  \param res used to return the vector of rings. Each entry is a vector with
609  atom indices. This information is also stored in the molecule's
610  RingInfo structure, so this argument is optional (see overload)
611 
612  \return the total number of rings = (new rings + old SSSRs)
613 
614  <b>Notes:</b>
615  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
616  first
617 */
619  std::vector<std::vector<int>> &res);
620 //! \overload
622 
623 //@}
624 
625 //! \name Shortest paths and other matrices
626 //@{
627 
628 //! returns a molecule's adjacency matrix
629 /*!
630  \param mol the molecule of interest
631  \param useBO toggles use of bond orders in the matrix
632  \param emptyVal sets the empty value (for non-adjacent atoms)
633  \param force forces calculation of the matrix, even if already
634  computed
635  \param propNamePrefix used to set the cached property name
636 
637  \return the adjacency matrix.
638 
639  <b>Notes</b>
640  - The result of this is cached in the molecule's local property dictionary,
641  which will handle deallocation. The caller should <b>not</b> \c delete
642  this pointer.
643 
644 */
646  const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
647  const char *propNamePrefix = 0,
648  const boost::dynamic_bitset<> *bondsToUse = 0);
649 
650 //! Computes the molecule's topological distance matrix
651 /*!
652  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
653 
654  \param mol the molecule of interest
655  \param useBO toggles use of bond orders in the matrix
656  \param useAtomWts sets the diagonal elements of the result to
657  6.0/(atomic number) so that the matrix can be used to calculate
658  Balaban J values. This does not affect the bond weights.
659  \param force forces calculation of the matrix, even if already
660  computed
661  \param propNamePrefix used to set the cached property name
662 
663  \return the distance matrix.
664 
665  <b>Notes</b>
666  - The result of this is cached in the molecule's local property dictionary,
667  which will handle deallocation. The caller should <b>not</b> \c delete
668  this pointer.
669 
670 
671 */
672 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat(const ROMol &mol,
673  bool useBO = false,
674  bool useAtomWts = false,
675  bool force = false,
676  const char *propNamePrefix = 0);
677 
678 //! Computes the molecule's topological distance matrix
679 /*!
680  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
681 
682  \param mol the molecule of interest
683  \param activeAtoms only elements corresponding to these atom indices
684  will be included in the calculation
685  \param bonds only bonds found in this list will be included in the
686  calculation
687  \param useBO toggles use of bond orders in the matrix
688  \param useAtomWts sets the diagonal elements of the result to
689  6.0/(atomic number) so that the matrix can be used to calculate
690  Balaban J values. This does not affect the bond weights.
691 
692  \return the distance matrix.
693 
694  <b>Notes</b>
695  - The results of this call are not cached, the caller <b>should</b> \c
696  delete
697  this pointer.
698 
699 
700 */
702  const ROMol &mol, const std::vector<int> &activeAtoms,
703  const std::vector<const Bond *> &bonds, bool useBO = false,
704  bool useAtomWts = false);
705 
706 //! Computes the molecule's 3D distance matrix
707 /*!
708 
709  \param mol the molecule of interest
710  \param confId the conformer to use
711  \param useAtomWts sets the diagonal elements of the result to
712  6.0/(atomic number)
713  \param force forces calculation of the matrix, even if already
714  computed
715  \param propNamePrefix used to set the cached property name
716  (if set to an empty string, the matrix will not be
717  cached)
718 
719  \return the distance matrix.
720 
721  <b>Notes</b>
722  - If propNamePrefix is not empty the result of this is cached in the
723  molecule's local property dictionary, which will handle deallocation.
724  In other cases the caller is responsible for freeing the memory.
725 
726 */
727 RDKIT_GRAPHMOL_EXPORT double *get3DDistanceMat(const ROMol &mol,
728  int confId = -1,
729  bool useAtomWts = false,
730  bool force = false,
731  const char *propNamePrefix = 0);
732 //! Find the shortest path between two atoms
733 /*!
734  Uses the Bellman-Ford algorithm
735 
736  \param mol molecule of interest
737  \param aid1 index of the first atom
738  \param aid2 index of the second atom
739 
740  \return an std::list with the indices of the atoms along the shortest
741  path
742 
743  <b>Notes:</b>
744  - the starting and end atoms are included in the path
745  - if no path is found, an empty path is returned
746 
747 */
748 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
749  int aid2);
750 
751 //@}
752 
753 #if 0
754  //! \name Canonicalization
755  //@{
756 
757  //! assign a canonical ordering to a molecule's atoms
758  /*!
759  The algorithm used here is a modification of the published Daylight canonical
760  smiles algorithm (i.e. it uses atom invariants and products of primes).
761 
762  \param mol the molecule of interest
763  \param ranks used to return the ranks
764  \param breakTies toggles breaking of ties (see below)
765  \param includeChirality toggles inclusion of chirality in the invariants
766  \param includeIsotopes toggles inclusion of isotopes in the invariants
767  \param rankHistory used to return the rank history (see below)
768 
769  <b>Notes:</b>
770  - Tie breaking should be done when it's important to have a full ordering
771  of the atoms (e.g. when generating canonical traversal trees). If it's
772  acceptable to have ties between symmetry-equivalent atoms (e.g. when
773  generating CIP codes), tie breaking can/should be skipped.
774  - if the \c rankHistory argument is provided, the evolution of the ranks of
775  individual atoms will be tracked. The \c rankHistory pointer should be
776  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
777  */
778  RDKIT_GRAPHMOL_EXPORT void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
779  bool breakTies=true,
780  bool includeChirality=true,
781  bool includeIsotopes=true,
782  std::vector<std::vector<int> > *rankHistory=0);
783  //! assign a canonical ordering to a sub-molecule's atoms
784  /*!
785  The algorithm used here is a modification of the published Daylight canonical
786  smiles algorithm (i.e. it uses atom invariants and products of primes).
787 
788  \param mol the molecule of interest
789  \param atomsToUse atoms to be included
790  \param bondsToUse bonds to be included
791  \param atomSymbols symbols to use for the atoms in the output (these are
792  used in place of atomic number and isotope information)
793  \param ranks used to return the ranks
794  \param breakTies toggles breaking of ties (see below)
795  \param rankHistory used to return the rank history (see below)
796 
797  <b>Notes:</b>
798  - Tie breaking should be done when it's important to have a full ordering
799  of the atoms (e.g. when generating canonical traversal trees). If it's
800  acceptable to have ties between symmetry-equivalent atoms (e.g. when
801  generating CIP codes), tie breaking can/should be skipped.
802  - if the \c rankHistory argument is provided, the evolution of the ranks of
803  individual atoms will be tracked. The \c rankHistory pointer should be
804  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
805  */
806  RDKIT_GRAPHMOL_EXPORT void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
807  const boost::dynamic_bitset<> &atomsToUse,
808  const boost::dynamic_bitset<> &bondsToUse,
809  const std::vector<std::string> *atomSymbols=0,
810  const std::vector<std::string> *bondSymbols=0,
811  bool breakTies=true,
812  std::vector<std::vector<int> > *rankHistory=0);
813 
814  // @}
815 #endif
816 //! \name Stereochemistry
817 //@{
818 
819 //! removes bogus chirality markers (those on non-sp3 centers):
821 
822 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
823 /*!
824  \param mol the molecule of interest
825  \param confId the conformer to use
826  \param replaceExistingTags if this flag is true, any existing atomic chiral
827  tags will be replaced
828 
829  If the conformer provided is not a 3D conformer, nothing will be done.
830 */
832  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
833 
834 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
835 //! stereo flags to its bonds
836 /*!
837 
838  \param mol the molecule of interest
839  \param confId the conformer to use
840  \param replaceExistingTags if this flag is true, any existing info about
841  stereochemistry will be replaced
842 
843  If the conformer provided is not a 3D conformer, nothing will be done.
844 */
846  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
847 
848 //! \brief Uses a conformer to assign directionality to the single bonds
849 //! around double bonds
850 /*!
851 
852  \param mol the molecule of interest
853  \param confId the conformer to use
854 */
856  int confId = -1);
858  ROMol &mol, const Conformer *conf = NULL);
859 
860 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
861 /*!
862  Does the CIP stereochemistry assignment for the molecule's atoms
863  (R/S) and double bond (Z/E). Chiral atoms will have a property
864  '_CIPCode' indicating their chiral code.
865 
866  \param mol the molecule to use
867  \param cleanIt if true, atoms with a chiral specifier that aren't
868  actually chiral (e.g. atoms with duplicate
869  substituents or only 2 substituents, etc.) will have
870  their chiral code set to CHI_UNSPECIFIED. Bonds with
871  STEREOCIS/STEREOTRANS specified that have duplicate
872  substituents based upon the CIP atom ranks will be
873  marked STEREONONE.
874  \param force causes the calculation to be repeated even if it has
875  already been done
876  \param flagPossibleStereoCenters set the _ChiralityPossible property on
877  atoms that are possible stereocenters
878 
879  <b>Notes:M</b>
880  - Throughout we assume that we're working with a hydrogen-suppressed
881  graph.
882 
883 */
885  ROMol &mol, bool cleanIt = false, bool force = false,
886  bool flagPossibleStereoCenters = false);
887 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
888 //(i.e. Z/E)
889 /*!
890 
891  \param mol the molecule of interest
892 */
894 
895 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
896 //! Bond::STEREOANY.
897 /*!
898  \param mol the molecule of interest
899  \param cleanIt toggles removal of stereo flags from double bonds that can
900  not have stereochemistry
901 
902  This function finds any double bonds that can potentially be part of
903  a cis/trans system. No attempt is made here to mark them cis or
904  trans. No attempt is made to detect double bond stereo in ring systems.
905 
906  This function is useful in the following situations:
907  - when parsing a mol file; for the bonds marked here, coordinate
908  information on the neighbors can be used to indentify cis or trans states
909  - when writing a mol file; bonds that can be cis/trans but not marked as
910  either need to be specially marked in the mol file
911  - finding double bonds with unspecified stereochemistry so they
912  can be enumerated for downstream 3D tools
913 
914  The CIPranks on the neighboring atoms are checked in this function. The
915  _CIPCode property if set to any on the double bond.
916 */
918  bool cleanIt = false);
919 //@}
920 
921 //! returns the number of atoms which have a particular property set
923  const ROMol &mol, std::string prop);
924 
925 }; // end of namespace MolOps
926 }; // end of namespace RDKit
927 
928 #endif
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
boost::uint32_t adjustDegreeFlags
Definition: MolOps.h:276
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before.
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=NULL)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
boost::uint32_t makeBondsGenericFlags
Definition: MolOps.h:284
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:414
boost::uint32_t adjustRingChainFlags
Definition: MolOps.h:291
boost::uint32_t adjustRingCountFlags
Definition: MolOps.h:278
boost::uint32_t makeAtomsGenericFlags
Definition: MolOps.h:286
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
Uses a conformer to assign directionality to the single bonds around double bonds.
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:32
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=NULL)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=NULL)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:294
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int >> &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
boost::uint32_t adjustHeavyDegreeFlags
Definition: MolOps.h:289
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:33
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
Std stuff.
Definition: Atom.h:30
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int >> &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule&#39;s atoms and stereo flags to its bonds...
RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:42
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds.
std::vector< UINT > UINT_VECT
Definition: types.h:266
std::vector< double > INVAR_VECT
Definition: MolOps.h:30
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and
AdjustQueryWhichFlags
Definition: MolOps.h:264
The class for representing atoms.
Definition: Atom.h:69
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=NULL, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms