RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2012 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #ifndef _RD_MOL_OPS_H_
12 #define _RD_MOL_OPS_H_
13 
14 #include <vector>
15 #include <map>
16 #include <list>
17 #include <boost/smart_ptr.hpp>
18 #include <boost/dynamic_bitset.hpp>
19 
20 extern const int ci_LOCAL_INF;
21 namespace RDKit{
22  class ROMol;
23  class RWMol;
24  class Atom;
25  class Bond;
26  typedef std::vector<double> INVAR_VECT;
27  typedef INVAR_VECT::iterator INVAR_VECT_I;
28  typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
29 
30  //! \brief Groups a variety of molecular query and transformation operations.
31  namespace MolOps {
32 
33  //! return the number of electrons available on an atom to donate for aromaticity
34  /*!
35  The result is determined using the default valency, number of lone pairs,
36  number of bonds and the formal charge. Note that the atom may not donate
37  all of these electrons to a ring for aromaticity (also used in Conjugation
38  and hybridization code).
39 
40  \param at the atom of interest
41 
42  \return the number of electrons
43  */
44  int countAtomElec(const Atom *at);
45 
46  //! sums up all atomic formal charges and returns the result
47  int getFormalCharge(const ROMol &mol);
48 
49  //! returns whether or not the given Atom is involved in a conjugated bond
50  bool atomHasConjugatedBond(const Atom *at);
51 
52  //! find fragments (disconnected components of the molecular graph)
53  /*!
54 
55  \param mol the molecule of interest
56  \param mapping used to return the mapping of Atoms->fragments.
57  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
58  and will contain the fragment assignment for each Atom
59 
60  \return the number of fragments found.
61 
62  */
63  unsigned int getMolFrags(const ROMol &mol,std::vector<int> &mapping);
64  //! find fragments (disconnected components of the molecular graph)
65  /*!
66 
67  \param mol the molecule of interest
68  \param frags used to return the Atoms in each fragment
69  On return \c mapping will be \c numFrags long, and each entry
70  will contain the indices of the Atoms in that fragment.
71 
72  \return the number of fragments found.
73 
74  */
75  unsigned int getMolFrags(const ROMol &mol, std::vector<std::vector<int> > &frags);
76 
77  //! splits a molecule into its component fragments
78  // (disconnected components of the molecular graph)
79  /*!
80 
81  \param mol the molecule of interest
82  \param sanitizeFrags toggles sanitization of the fragments after
83  they are built
84  \param frags used to return the mapping of Atoms->fragments.
85  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
86  on return and will contain the fragment assignment for each Atom
87  \param fragsMolAtomMapping used to return the Atoms in each fragment
88  On return \c mapping will be \c numFrags long, and each entry
89  will contain the indices of the Atoms in that fragment.
90  \param copyConformers toggles copying conformers of the fragments after
91  they are built
92  \return a vector of the fragments as smart pointers to ROMols
93 
94  */
95  std::vector<boost::shared_ptr<ROMol> > getMolFrags(const ROMol &mol,
96  bool sanitizeFrags=true,
97  std::vector<int> *frags=0,
98  std::vector<std::vector<int> > *fragsMolAtomMapping=0,
99  bool copyConformers=true);
100 
101  //! splits a molecule into pieces based on labels assigned using a query
102  /*!
103 
104  \param mol the molecule of interest
105  \param query the query used to "label" the molecule for fragmentation
106  \param sanitizeFrags toggles sanitization of the fragments after
107  they are built
108  \param whiteList if provided, only labels in the list will be kept
109  \param negateList if true, the white list logic will be inverted: only labels
110  not in the list will be kept
111 
112  \return a map of the fragments and their labels
113 
114  */
115  template <typename T>
116  std::map<T,boost::shared_ptr<ROMol> > getMolFragsWithQuery(const ROMol &mol,
117  T (*query)(const ROMol &,const Atom *),
118  bool sanitizeFrags=true,
119  const std::vector<T> *whiteList=0,
120  bool negateList=false);
121 
122 
123 #if 0
124  //! finds a molecule's minimium spanning tree (MST)
125  /*!
126  \param mol the molecule of interest
127  \param mst used to return the MST as a vector of bond indices
128  */
129  void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
130 #endif
131 
132  //! calculates Balaban's J index for the molecule
133  /*!
134  \param mol the molecule of interest
135  \param useBO toggles inclusion of the bond order in the calculation
136  (when false, we're not really calculating the J value)
137  \param force forces the calculation (instead of using cached results)
138  \param bondPath when included, only paths using bonds whose indices occur
139  in this vector will be included in the calculation
140  \param cacheIt If this is true, the calculated value will be cached
141  as a property on the molecule
142  \return the J index
143 
144  */
145  double computeBalabanJ(const ROMol &mol,
146  bool useBO=true,
147  bool force=false,
148  const std::vector<int> *bondPath=0,
149  bool cacheIt=true);
150  //! \overload
151  double computeBalabanJ(double *distMat, int nb, int nAts);
152 
153  //! \name Dealing with hydrogens
154  //{@
155 
156  //! returns a copy of a molecule with hydrogens added in as explicit Atoms
157  /*!
158  \param mol the molecule to add Hs to
159  \param explicitOnly (optional) if this \c true, only explicit Hs will be added
160  \param addCoords (optional) If this is true, estimates for the atomic coordinates
161  of the added Hs will be used.
162 
163  \return the new molecule
164 
165  <b>Notes:</b>
166  - it makes no sense to use the \c addCoords option if the molecule's heavy
167  atoms don't already have coordinates.
168  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
169  */
170  ROMol *addHs(const ROMol &mol,bool explicitOnly=false,bool addCoords=false);
171  //! \overload
172  // modifies the molecule in place
173  void addHs(RWMol &mol,bool explicitOnly=false,bool addCoords=false);
174 
175  //! returns a copy of a molecule with hydrogens removed
176  /*!
177  \param mol the molecule to remove Hs from
178  \param implicitOnly (optional) if this \c true, only implicit Hs will be removed
179  \param updateExplicitCount (optional) If this is \c true, when explicit Hs are removed
180  from the graph, the heavy atom to which they are bound will have its counter of
181  explicit Hs increased.
182  \param sanitize: (optional) If this is \c true, the final molecule will be
183  sanitized
184 
185  \return the new molecule
186 
187  <b>Notes:</b>
188  - Hydrogens which aren't connected to a heavy atom will not be
189  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
190  all atoms removed.
191  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
192  will not be removed.
193  - two coordinate Hs, like the central H in C[H-]C, will not be removed
194  - Hs connected to dummy atoms will not be removed
195 
196  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
197  */
198  ROMol *removeHs(const ROMol &mol,bool implicitOnly=false,
199  bool updateExplicitCount=false,bool sanitize=true);
200  //! \overload
201  // modifies the molecule in place
202  void removeHs(RWMol &mol,bool implicitOnly=false,
203  bool updateExplicitCount=false,bool sanitize=true);
204 
205  //! returns a copy of a molecule with hydrogens removed and added as queries
206  //! to the heavy atoms to which they are bound.
207  /*!
208  This is really intended to be used with molecules that contain QueryAtoms
209 
210  \param mol the molecule to remove Hs from
211 
212  \return the new molecule
213 
214  <b>Notes:</b>
215  - Atoms that do not already have hydrogen count queries will have one
216  added, other H-related queries will not be touched. Examples:
217  - C[H] -> [C;!H0]
218  - [C;H1][H] -> [C;H1]
219  - [C;H2][H] -> [C;H2]
220  - Hydrogens which aren't connected to a heavy atom will not be
221  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
222  all atoms removed.
223  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
224  - By default all hydrogens are removed, however if
225  mergeUnmappedOnly is true, any hydrogen participating
226  in an atom map will be retained
227 
228  */
229  ROMol *mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false);
230  //! \overload
231  // modifies the molecule in place
232  void mergeQueryHs(RWMol &mol, bool mergeUnmappedOnly=false);
233 
234  //! returns a copy of a molecule with the atoms renumbered
235  /*!
236 
237  \param mol the molecule to work with
238  \param newOrder the new ordering of the atoms (should be numAtoms long)
239  for example: if newOrder is [3,2,0,1], then atom 3 in the original
240  molecule will be atom 0 in the new one
241 
242  \return the new molecule
243 
244  <b>Notes:</b>
245  - the caller is responsible for <tt>delete</tt>ing the pointer this returns.
246 
247  */
248  ROMol *renumberAtoms(const ROMol &mol,const std::vector<unsigned int> &newOrder);
249 
250  //@}
251 
252  //! \name Sanitization
253  //@{
254 
255  typedef enum {
267  SANITIZE_ALL=0xFFFFFFF
268  } SanitizeFlags;
269 
270  //! \brief carries out a collection of tasks for cleaning up a molecule and ensuring
271  //! that it makes "chemical sense"
272  /*!
273  This functions calls the following in sequence
274  -# MolOps::cleanUp()
275  -# mol.updatePropertyCache()
276  -# MolOps::symmetrizeSSSR()
277  -# MolOps::Kekulize()
278  -# MolOps::assignRadicals()
279  -# MolOps::setAromaticity()
280  -# MolOps::setConjugation()
281  -# MolOps::setHybridization()
282  -# MolOps::cleanupChirality()
283  -# MolOps::adjustHs()
284 
285  \param mol : the RWMol to be cleaned
286 
287  \param operationThatFailed : the first (if any) sanitization operation that fails is set here.
288  The values are taken from the \c SanitizeFlags enum.
289  On success, the value is \c SanitizeFlags::SANITIZE_NONE
290 
291  \param sanitizeOps : the bits here are used to set which sanitization operations are carried
292  out. The elements of the \c SanitizeFlags enum define the operations.
293 
294  <b>Notes:</b>
295  - If there is a failure in the sanitization, a \c SanitException
296  will be thrown.
297  - in general the user of this function should cast the molecule following this
298  function to a ROMol, so that new atoms and bonds cannot be added to the
299  molecule and screw up the sanitizing that has been done here
300  */
301  void sanitizeMol(RWMol &mol,unsigned int &operationThatFailed,
302  unsigned int sanitizeOps=SANITIZE_ALL);
303  //! \overload
304  void sanitizeMol(RWMol &mol);
305 
306  //! Sets up the aromaticity for a molecule
307  /*!
308 
309  This is what happens here:
310  -# find all the simple rings by calling the findSSSR function
311  -# loop over all the Atoms in each ring and mark them if they are candidates
312  for aromaticity. A ring atom is a candidate if it can spare electrons
313  to the ring and if it's from the first two rows of the periodic table.
314  -# ased on the candidate atoms, mark the rings to be either candidates
315  or non-candidates. A ring is a candidate only if all its atoms are candidates
316  -# apply Hueckel rule to each of the candidate rings to check if the ring can be
317  aromatic
318 
319  \param mol the RWMol of interest
320 
321  \return 1 on succes, 0 otherwise
322 
323  <b>Assumptions:</b>
324  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
325  been called)
326 
327  */
328  int setAromaticity(RWMol &mol);
329 
330 
331  //! Designed to be called by the sanitizer to handle special cases before anything is done.
332  /*!
333 
334  Currently this:
335  - modifies nitro groups, so that the nitrogen does not have a unreasonable
336  valence of 5, as follows:
337  - the nitrogen gets a positve charge
338  - one of the oxygens gets a negative chage and the double bond to this
339  oxygen is changed to a single bond
340  The net result is that nitro groups can be counted on to be:
341  \c "[N+](=O)[O-]"
342 
343  \param mol the molecule of interest
344 
345  */
346  void cleanUp(RWMol &mol);
347 
348  //! Called by the sanitizer to assign radical counts to atoms
349  void assignRadicals(RWMol &mol);
350 
351  //! adjust the number of implicit and explicit Hs for special cases
352  /*!
353 
354  Currently this:
355  - modifies aromatic nitrogens so that, when appropriate, they have an
356  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
357 
358  \param mol the molecule of interest
359 
360  <b>Assumptions</b>
361  - this is called after the molecule has been sanitized,
362  aromaticity has been perceived, and the implicit valence of
363  everything has been calculated.
364 
365  */
366  void adjustHs(RWMol &mol);
367 
368  //! Kekulizes the molecule
369  /*!
370 
371  \param mol the molecule of interest
372  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
373  on both the Bonds and Atoms are turned to false following
374  the Kekulization, otherwise they are left alone in their
375  original state.
376  \param maxBackTracks the maximum number of attempts at back-tracking. The algorithm
377  uses a back-tracking procedure to revist a previous setting of
378  double bond if we hit a wall in the kekulization process
379 
380  <b>Notes:</b>
381  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
382  bonds will be changed from \c RDKit::Bond::AROMATIC to \c RDKit::Bond::SINGLE
383  or RDKit::Bond::DOUBLE during Kekulization.
384 
385  */
386  void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100);
387 
388  //! flags the molecule's conjugated bonds
389  void setConjugation(ROMol &mol);
390 
391  //! calculates and sets the hybridization of all a molecule's Stoms
392  void setHybridization(ROMol &mol);
393 
394 
395  // @}
396 
397  //! \name Ring finding and SSSR
398  //@{
399 
400  //! finds a molecule's Smallest Set of Smallest Rings
401  /*!
402  Currently this implements a modified form of Figueras algorithm
403  (JCICS - Vol. 36, No. 5, 1996, 986-991)
404 
405  \param mol the molecule of interest
406  \param res used to return the vector of rings. Each entry is a vector with
407  atom indices. This information is also stored in the molecule's
408  RingInfo structure, so this argument is optional (see overload)
409 
410  \return number of smallest rings found
411 
412  Base algorithm:
413  - The original algorithm starts by finding representative degree 2
414  nodes.
415  - Representative because if a series of deg 2 nodes are found only
416  one of them is picked.
417  - The smallest ring around each of them is found.
418  - The bonds that connect to this degree 2 node are them chopped off, yielding
419  new deg two nodes
420  - The process is repeated on the new deg 2 nodes.
421  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
422  with it is found. A bond from this is "carefully" (look in the paper)
423  selected and chopped, yielding deg 2 nodes. The process is same as
424  above once this is done.
425 
426  Our Modifications:
427  - If available, more than one smallest ring around a representative deg 2
428  node will be computed and stored
429  - Typically 3 rings are found around a degree 3 node (when no deg 2s are available)
430  and all the bond to that node are chopped.
431  - The extra rings that were found in this process are removed after all the nodes
432  have been covered.
433 
434  These changes were motivated by several factors:
435  - We believe the original algorithm fails to find the correct SSSR
436  (finds the correct number of them but the wrong ones) on some sample mols
437  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be done.
438  The extra rings this process adds can be quite useful.
439  */
440  int findSSSR(const ROMol &mol, std::vector<std::vector<int> > &res);
441  //! \overload
442  int findSSSR(const ROMol &mol, std::vector<std::vector<int> > *res=0);
443 
444  //! use a DFS algorithm to identify ring bonds and atoms in a molecule
445  /*!
446  \b NOTE: though the RingInfo structure is populated by this function,
447  the only really reliable calls that can be made are to check if
448  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
449  return values >0
450  */
451  void fastFindRings(const ROMol &mol);
452 
453 
454  //! symmetrize the molecule's Smallest Set of Smallest Rings
455  /*!
456  SSSR rings obatined from "findSSSR" can be non-unique in some case.
457  For example, cubane has five SSSR rings, not six as one would hope.
458 
459  This function adds additional rings to the SSSR list if necessary
460  to make the list symmetric, e.g. all atoms in cubane will be part of the same number
461  of SSSRs. This function choses these extra rings from the extra rings computed
462  and discarded during findSSSR. The new ring are chosen such that:
463  - replacing a same sized ring in the SSSR list with an extra ring yields
464  the same union of bond IDs as the orignal SSSR list
465 
466  \param mol - the molecule of interest
467  \param res used to return the vector of rings. Each entry is a vector with
468  atom indices. This information is also stored in the molecule's
469  RingInfo structure, so this argument is optional (see overload)
470 
471  \return the total number of rings = (new rings + old SSSRs)
472 
473  <b>Notes:</b>
474  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called first
475  */
476  int symmetrizeSSSR(ROMol &mol, std::vector<std::vector<int> > &res);
477  //! \overload
478  int symmetrizeSSSR(ROMol &mol);
479 
480  //@}
481 
482  //! \name Shortest paths and other matrices
483  //@{
484 
485  //! returns a molecule's adjacency matrix
486  /*!
487  \param mol the molecule of interest
488  \param useBO toggles use of bond orders in the matrix
489  \param emptyVal sets the empty value (for non-adjacent atoms)
490  \param force forces calculation of the matrix, even if already computed
491  \param propNamePrefix used to set the cached property name
492 
493  \return the adjacency matrix.
494 
495  <b>Notes</b>
496  - The result of this is cached in the molecule's local property dictionary,
497  which will handle deallocation. Do the caller should <b>not</b> \c delete
498  this pointer.
499 
500  */
501  double * getAdjacencyMatrix(const ROMol &mol,
502  bool useBO=false,
503  int emptyVal=0,
504  bool force=false,
505  const char *propNamePrefix=0,
506  const boost::dynamic_bitset<> *bondsToUse=0
507  );
508 
509  //! Computes the molecule's topological distance matrix
510  /*!
511  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
512 
513  \param mol the molecule of interest
514  \param useBO toggles use of bond orders in the matrix
515  \param useAtomWts sets the diagonal elements of the result to
516  6.0/(atomic number) so that the matrix can be used to calculate
517  Balaban J values. This does not affect the bond weights.
518  \param force forces calculation of the matrix, even if already computed
519  \param propNamePrefix used to set the cached property name
520 
521  \return the distance matrix.
522 
523  <b>Notes</b>
524  - The result of this is cached in the molecule's local property dictionary,
525  which will handle deallocation. Do the caller should <b>not</b> \c delete
526  this pointer.
527 
528 
529  */
530  double *getDistanceMat(const ROMol &mol,
531  bool useBO=false,
532  bool useAtomWts=false,
533  bool force=false,
534  const char *propNamePrefix=0);
535 
536 
537  //! Computes the molecule's topological distance matrix
538  /*!
539  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
540 
541  \param mol the molecule of interest
542  \param activeAtoms only elements corresponding to these atom indices
543  will be included in the calculation
544  \param bonds only bonds found in this list will be included in the
545  calculation
546  \param useBO toggles use of bond orders in the matrix
547  \param useAtomWts sets the diagonal elements of the result to
548  6.0/(atomic number) so that the matrix can be used to calculate
549  Balaban J values. This does not affect the bond weights.
550 
551  \return the distance matrix.
552 
553  <b>Notes</b>
554  - The results of this call are not cached, the caller <b>should</b> \c delete
555  this pointer.
556 
557 
558  */
559  double *getDistanceMat(const ROMol &mol,
560  const std::vector<int> &activeAtoms,
561  const std::vector<const Bond *> &bonds,
562  bool useBO=false,
563  bool useAtomWts=false);
564 
565 
566  //! Computes the molecule's 3D distance matrix
567  /*!
568 
569  \param mol the molecule of interest
570  \param confId the conformer to use
571  \param useAtomWts sets the diagonal elements of the result to
572  6.0/(atomic number)
573  \param force forces calculation of the matrix, even if already computed
574  \param propNamePrefix used to set the cached property name
575 
576  \return the distance matrix.
577 
578  <b>Notes</b>
579  - The result of this is cached in the molecule's local property dictionary,
580  which will handle deallocation. Do the caller should <b>not</b> \c delete
581  this pointer.
582 
583  */
584  double *get3DDistanceMat(const ROMol &mol,
585  int confId=-1,
586  bool useAtomWts=false,
587  bool force=false,
588  const char *propNamePrefix=0);
589  //! Find the shortest path between two atoms
590  /*!
591  Uses the Bellman-Ford algorithm
592 
593  \param mol molecule of interest
594  \param aid1 index of the first atom
595  \param aid2 index of the second atom
596 
597  \return an std::list with the indices of the atoms along the shortest
598  path
599 
600  <b>Notes:</b>
601  - the starting and end atoms are included in the path
602  - if no path is found, an empty path is returned
603 
604  */
605  std::list<int> getShortestPath(const ROMol &mol, int aid1, int aid2);
606 
607  //@}
608 
609 #if 0
610  //! \name Canonicalization
611  //@{
612 
613  //! assign a canonical ordering to a molecule's atoms
614  /*!
615  The algorithm used here is a modification of the published Daylight canonical
616  smiles algorithm (i.e. it uses atom invariants and products of primes).
617 
618  \param mol the molecule of interest
619  \param ranks used to return the ranks
620  \param breakTies toggles breaking of ties (see below)
621  \param includeChirality toggles inclusion of chirality in the invariants
622  \param includeIsotopes toggles inclusion of isotopes in the invariants
623  \param rankHistory used to return the rank history (see below)
624 
625  <b>Notes:</b>
626  - Tie breaking should be done when it's important to have a full ordering
627  of the atoms (e.g. when generating canonical traversal trees). If it's
628  acceptable to have ties between symmetry-equivalent atoms (e.g. when
629  generating CIP codes), tie breaking can/should be skipped.
630  - if the \c rankHistory argument is provided, the evolution of the ranks of
631  individual atoms will be tracked. The \c rankHistory pointer should be
632  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
633  */
634  void rankAtoms(const ROMol &mol,std::vector<int> &ranks,
635  bool breakTies=true,
636  bool includeChirality=true,
637  bool includeIsotopes=true,
638  std::vector<std::vector<int> > *rankHistory=0);
639  //! assign a canonical ordering to a sub-molecule's atoms
640  /*!
641  The algorithm used here is a modification of the published Daylight canonical
642  smiles algorithm (i.e. it uses atom invariants and products of primes).
643 
644  \param mol the molecule of interest
645  \param atomsToUse atoms to be included
646  \param bondsToUse bonds to be included
647  \param atomSymbols symbols to use for the atoms in the output (these are
648  used in place of atomic number and isotope information)
649  \param ranks used to return the ranks
650  \param breakTies toggles breaking of ties (see below)
651  \param rankHistory used to return the rank history (see below)
652 
653  <b>Notes:</b>
654  - Tie breaking should be done when it's important to have a full ordering
655  of the atoms (e.g. when generating canonical traversal trees). If it's
656  acceptable to have ties between symmetry-equivalent atoms (e.g. when
657  generating CIP codes), tie breaking can/should be skipped.
658  - if the \c rankHistory argument is provided, the evolution of the ranks of
659  individual atoms will be tracked. The \c rankHistory pointer should be
660  to a VECT_INT_VECT that has at least \c mol.getNumAtoms() elements.
661  */
662  void rankAtomsInFragment(const ROMol &mol,std::vector<int> &ranks,
663  const boost::dynamic_bitset<> &atomsToUse,
664  const boost::dynamic_bitset<> &bondsToUse,
665  const std::vector<std::string> *atomSymbols=0,
666  const std::vector<std::string> *bondSymbols=0,
667  bool breakTies=true,
668  std::vector<std::vector<int> > *rankHistory=0);
669 
670  // @}
671 #endif
672  //! \name Stereochemistry
673  //@{
674 
675  //! removes bogus chirality markers (those on non-sp3 centers):
676  void cleanupChirality(RWMol &mol);
677 
678  //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
679  /*!
680  \param mol the molecule of interest
681  \param confId the conformer to use
682  \param replaceExistingTags if this flag is true, any existing atomic chiral
683  tags will be replaced
684 
685  If the conformer provided is not a 3D conformer, nothing will be done.
686  */
687  void assignChiralTypesFrom3D(ROMol &mol,int confId=-1,bool replaceExistingTags=true);
688 
689  //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
690  /*!
691 
692  \param mol the molecule of interest
693  \param cleanIt toggles removal of stereo flags from double bonds that can
694  not have stereochemistry
695  \param force forces the calculation to be repeated even if it has
696  already been done
697  \param flagPossibleStereoCenters set the _ChiralityPossible property on
698  atoms that are possible stereocenters
699 
700  <b>Notes:M</b>
701  - Throughout we assume that we're working with a hydrogen-suppressed
702  graph.
703 
704  */
705  void assignStereochemistry(ROMol &mol,bool cleanIt=false,bool force=false,
706  bool flagPossibleStereoCenters=false);
707  //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds (i.e. Z/E)
708  /*!
709 
710  \param mol the molecule of interest
711  */
712  void removeStereochemistry(ROMol &mol);
713 
714  //! \brief finds bonds that could be cis/trans in a molecule and mark them as
715  //! Bond::STEREONONE
716  /*!
717  \param mol the molecule of interest
718  \param cleanIt toggles removal of stereo flags from double bonds that can
719  not have stereochemistry
720 
721  This function is usefuly in two situations
722  - when parsing a mol file; for the bonds marked here, coordinate informations
723  on the neighbors can be used to indentify cis or trans states
724  - when writing a mol file; bonds that can be cis/trans but not marked as either
725  need to be specially marked in the mol file
726  */
727  void findPotentialStereoBonds(ROMol &mol,bool cleanIt=false);
728  //@}
729 
730  //! returns the number of atoms which have a particular property set
731  unsigned getNumAtomsWithDistinctProperty(const ROMol& mol, std::string prop);
732 
733  }; // end of namespace MolOps
734 }; // end of namespace RDKit
735 
736 #endif
ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
double computeBalabanJ(const ROMol &mol, bool useBO=true, bool force=false, const std::vector< int > *bondPath=0, bool cacheIt=true)
calculates Balaban&#39;s J index for the molecule
void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule&#39;s atoms.
int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res)
finds a molecule&#39;s Smallest Set of Smallest Rings
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:30
std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=0, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
const int ci_LOCAL_INF
int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res)
symmetrize the molecule&#39;s Smallest Set of Smallest Rings
int setAromaticity(RWMol &mol)
Sets up the aromaticity for a molecule.
void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
int countAtomElec(const Atom *at)
return the number of electrons available on an atom to donate for aromaticity
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:27
void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s topological distance matrix.
void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
void setConjugation(ROMol &mol)
flags the molecule&#39;s conjugated bonds
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:28
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
void removeStereochemistry(ROMol &mol)
Removes all stereochemistry information from atoms (i.e. R/S) and bonds (i.e. Z/E) ...
class for representing a bond
Definition: Bond.h:46
void cleanUp(RWMol &mol)
Designed to be called by the sanitizer to handle special cases before anything is done...
void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule&#39;s Stoms
double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=0, const boost::dynamic_bitset<> *bondsToUse=0)
returns a molecule&#39;s adjacency matrix
std::vector< double > INVAR_VECT
Definition: MolOps.h:25
ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=0)
Computes the molecule&#39;s 3D distance matrix.
void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREONONE ...
The class for representing atoms.
Definition: Atom.h:67