RDKit
Open-source cheminformatics and machine learning.
EmbeddedFrag.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_EMBEDDED_FRAG_H_
11 #define _RD_EMBEDDED_FRAG_H_
12 
13 #include <RDGeneral/types.h>
14 #include <Geometry/Transform2D.h>
15 #include <Geometry/point.h>
16 #include "DepictUtils.h"
17 #include <boost/smart_ptr.hpp>
18 
19 
20 namespace RDKit {
21  class ROMol;
22  class Bond;
23 }
24 
25 namespace RDDepict {
26  typedef boost::shared_array<double> DOUBLE_SMART_PTR;
27 
28  //! Class that contains the data for an atoms that has alredy been embedded
29  class EmbeddedAtom {
30  public:
31  typedef enum {
32  UNSPECIFIED=0,
34  RING} EAtomType;
35 
36  EmbeddedAtom() : aid(0), angle(-1.0), nbr1(-1), nbr2(-1),
37  CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0){
38  neighs.clear();
39  }
40 
41  EmbeddedAtom(unsigned int aid, const RDGeom::Point2D &pos) :
42  aid(aid), angle(-1.0), nbr1(-1), nbr2(-1),
43  CisTransNbr(-1), ccw(true), rotDir(0), d_density(-1.0) {
44  loc = pos;
45  }
46 
48  loc = other.loc;
49  angle = other.angle;
50  nbr1 = other.nbr1;
51  nbr2 = other.nbr2;
52  CisTransNbr = other.CisTransNbr;
53  rotDir = other.rotDir;
54  normal = other.normal;
55  ccw = other.ccw;
56  neighs = other.neighs;
57  d_density = other.d_density;
58  return *this;
59  }
60 
61  void Transform(const RDGeom::Transform2D &trans) {
62  RDGeom::Point2D temp = loc + normal;
63  trans.TransformPoint(loc);
64  trans.TransformPoint(temp);
65  normal = temp - loc;
66  }
67 
68  void Reflect(const RDGeom::Point2D &loc1,
69  const RDGeom::Point2D &loc2) {
70  RDGeom::Point2D temp = loc + normal;
71  loc = reflectPoint(loc, loc1, loc2);
72  temp = reflectPoint(temp, loc1, loc2);
73  normal = temp - loc;
74  ccw = (!ccw);
75  }
76 
77  unsigned int aid; // the id of the atom
78 
79  //! the angle that is already takes at this atom, so any new atom attaching to this
80  //! atom with have to fall in the available part
81  double angle;
82 
83  //! the first neighbor of this atom that form the 'angle'
84  int nbr1;
85 
86  //! the second neighbor of atom that from the 'angle'
87  int nbr2;
88 
89  //! is this is a cis/trans atom the neighbor of this atom that is involved in the
90  //! cis/trans system - defaults to -1
92 
93  //! which direction do we rotate this normal to add the next bond
94  //! if ccw is true we rotate counter cloack wise, otherwise rotate clock wise, by an angle that is
95  //! <= PI/2
96  bool ccw;
97 
98  //! rotation direction around this atom when adding new atoms,
99  //! we determine this for the first neighbor and stick to this direction after that
100  //! useful only on atoms that are degree >= 4
101  int rotDir;
102 
103  RDGeom::Point2D loc; // the current location of this atom
104  //! this is a normal vector to one of the bonds that added this atom
105  //! it provides the side on which we want to add a new bond to this atom,
106  //! this is only relevant when we are dealing with non ring atoms. We would like to draw chains in
107  //! a zig-zag manner
109 
110  //! and these are the atom IDs of the neighbors that still need to be embedded
112 
113  // density of the atoms around this atoms
114  // - this is sum of inverse of the square of distances to other atoms from this atom
115  // Used in the collision removal code - initialized to -1.0
116  double d_density;
117  };
118 
119 
120 
121  typedef std::map<unsigned int, EmbeddedAtom> INT_EATOM_MAP;
122  typedef INT_EATOM_MAP::iterator INT_EATOM_MAP_I;
123  typedef INT_EATOM_MAP::const_iterator INT_EATOM_MAP_CI;
124 
125 
126  //! Class containing a fragment of a molecule that has already been embedded
127  /*
128  Here is how this class is designed to be used
129  - find a set of fused rings and compte the coordinates for the atoms in those ring
130  - them grow this sytem either by adding non ring neighbors
131  - or by adding other embedded fragment
132  - so at the end of the process the whole molecule end up being one these embedded frag objects
133  */
134  class EmbeddedFrag {
135  // REVIEW: think about moving member functions up to global level and just using
136  // this class as a container
137 
138  public:
139  //! Default constructor
140  EmbeddedFrag() : d_done(false), dp_mol(0){
141  d_eatoms.clear();
142  d_attachPts.clear();
143  };
144 
145  //! Intializer from a single atom id
146  /*!
147  A single Embedded Atom with this atom ID is added and placed at the origin
148  */
149  EmbeddedFrag(unsigned int aid, const RDKit::ROMol *mol);
150 
151  //! Constructor when the coordinates have been specified for a set of atoms
152  /*!
153  This simply initialized a set of EmbeddedAtom to have the same coordinates as the
154  one's specified. No testing is done to verify any kind of ocrrectlness. Also
155  this fragment is less ready (to expand and add new neighbors) than when using other
156  constructors. This is because:
157  - the user may have specified coords for only a part of the atoms in a fused ring systems
158  in which case we need to find these atoms and merge these ring systems to this fragment
159  - The atoms are not yet aware of their neighbor (what is left to add etc.) this again
160  depends on atoms properly so that new
161  neighbors can be added to them
162  */
163  EmbeddedFrag(const RDKit::ROMol *mol, const RDGeom::INT_POINT2D_MAP &coordMap);
164 
165  //! Initializer from a set of fused rings
166  /*!
167  ARGUMENTS:
168  \param mol the molecule of interest
169  \param fusedRings a vector of rings, each ring is a list of atom ids
170  */
171  EmbeddedFrag(const RDKit::ROMol *mol, const RDKit::VECT_INT_VECT &fusedRings);
172 
173  //! Initializer for a cis/trans system using the double bond
174  /*!
175  ARGUMENTS:
176  \param dblBond the double bond that is involed in the cis/trans configuration
177  */
178  explicit EmbeddedFrag(const RDKit::Bond* dblBond);
179 
180  //! Expand this embedded system by adding negihboring atoms or other embedded systems
181  /*!
182 
183  Note that both nratms and efrags are modified in this function
184  as we start merging them with the current fragment
185 
186  */
187  void expandEfrag(RDKit::INT_LIST &nratms, std::list<EmbeddedFrag> &efrags);
188 
189  //! Add a new non-ring atom to this object
190  /*
191  ARGUMENTS:
192  \param aid ID of the atom to be added
193  \param toAid ID of the atom that is already in this object to which this atom is added
194  */
195  void addNonRingAtom(unsigned int aid, unsigned int toAid);
196 
197  //! Merge this embedded object with another embedded fragment
198  /*!
199 
200  The transformation (rotation + translation required to attached
201  the passed in object will be computed and applied. The
202  coordinates of the atoms in this object will remain fixed We
203  will assume that there are no common atoms between the two
204  fragments to start with
205 
206  ARGUMENTS:
207  \param embObj another EmbeddedFrag object to be merged with this object
208  \param toAid the atom in this embedded fragment to which the new object will be attached
209  \param nbrAid the atom in the other fragment to attach to
210  */
211  void mergeNoCommon(EmbeddedFrag &embObj, unsigned int toAid,
212  unsigned int nbrAid);
213 
214  //! Merge this embedded object with another embedded fragment
215  /*!
216 
217  The transformation (rotation + translation required to attached
218  the passed in object will be computed and applied. The
219  coordinates of the atoms in this object will remain fixed This
220  already know there are a atoms in common and we will use them to
221  merge things
222 
223  ARGUMENTS:
224  \param embObj another EmbeddedFrag object to be merged with this object
225  \param commAtms a vector of ids of the common atoms
226 
227  */
228  void mergeWithCommon(EmbeddedFrag &embObj, RDKit::INT_VECT &commAtms);
229 
230  void mergeFragsWithComm(std::list<EmbeddedFrag> &efrags);
231 
232  //! Mark this fragment to be done for final embedding
233  void markDone() {
234  d_done = true;
235  }
236 
237  //! If this fragment done for the final embedding
238  bool isDone() {
239  return d_done;
240  }
241 
242  //! Get the molecule that this embedded fragmetn blongs to
243  const RDKit::ROMol *getMol() const { return dp_mol;}
244 
245  //! Find the common atom ids between this fragment and a second one
246  RDKit::INT_VECT findCommonAtoms(const EmbeddedFrag &efrag2);
247 
248  //! Find a neighbor to a non-ring atom among the already embedded atoms
249  /*!
250  ARGUMENTS:
251  \param aid the atom id of interest
252 
253  RETURNS:
254  \return the id of the atom if we found a neighbor
255  -1 otherwise
256 
257  NOTE: by definition we can have only one neighbor in the embdded system.
258  */
259  int findNeighbor(unsigned int aid);
260 
261  //! Tranform this object to a new coordinates system
262  /*!
263  ARGUMENTS:
264  \param trans : the transformation that need to be applied to the atoms in this object
265  */
266  void Transform(const RDGeom::Transform2D &trans);
267 
268  void Reflect(const RDGeom::Point2D &loc1,
269  const RDGeom::Point2D &loc2);
270 
271  const INT_EATOM_MAP &GetEmbeddedAtoms() const {
272  return d_eatoms;
273  }
274 
275  void Translate(const RDGeom::Point2D &shift) {
276  INT_EATOM_MAP_I eari;
277  for (eari = d_eatoms.begin(); eari != d_eatoms.end(); eari++) {
278  eari->second.loc += shift;
279  }
280  }
281 
282  EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const {
283  INT_EATOM_MAP_CI posi = d_eatoms.find(aid);
284  if (posi == d_eatoms.end()) {
285  PRECONDITION(0, "Embedded atom does not contain embedded atom specified");
286  }
287  return posi->second;
288  }
289 
290  //! the number of atoms in the embedded system
291  int Size() const {
292  return d_eatoms.size();
293  }
294 
295  //! \brief compute a box that encloses the fragment
296  void computeBox();
297 
298  //! \brief Flip atoms on one side of a bond - used in removing colissions
299  /*!
300  ARGUMENTS:
301  \param bondId - the bond used as the mirror to flip
302  \param flipEnd - flip the atoms at the end of the bond
303 
304  */
305  void flipAboutBond(unsigned int bondId,bool flipEnd=true);
306 
307  void openAngles(const double *dmat, unsigned int aid1, unsigned int aid2);
308 
309  std::vector<PAIR_I_I> findCollisions(const double *dmat, bool includeBonds=1);
310 
311  void computeDistMat(DOUBLE_SMART_PTR &dmat);
312 
313  double mimicDistMatAndDensityCostFunc(const DOUBLE_SMART_PTR *dmat,
314  double mimicDmatWt);
315 
316  void permuteBonds(unsigned int aid, unsigned int aid1, unsigned int aid2);
317 
318  void randomSampleFlipsAndPermutations(unsigned int nBondsPerSample=3,
319  unsigned int nSamples=100, int seed=100,
320  const DOUBLE_SMART_PTR *dmat=0,
321  double mimicDmatWt=0.0,
322  bool permuteDeg4Nodes=false);
323 
324  //! Remove collisions in a structure by flipping rotable bonds
325  //! along the shortest path between two colliding atoms
326  void removeCollisionsBondFlip();
327 
328  //! Remove collision by opening angles at the offending atoms
329  void removeCollisionsOpenAngles();
330 
331  //! Remove collisions by shortening bonds along the shortest path between the atoms
332  void removeCollisionsShortenBonds();
333 
334  //! helpers funtions to
335 
336 
337  //! \brief make list of neighbors for each atom in the embedded system that
338  //! still need to be embedded
339  void setupNewNeighs();
340 
341  //! update the unembedded neighbor atom list for a specified atom
342  void updateNewNeighs(unsigned int aid);
343 
344  //! \brief Find all atoms in this embedded system that are
345  //! within a specified distant of a point
346  int findNumNeigh(const RDGeom::Point2D &pt, double radius);
347 
348 
349  inline double getBoxPx() {return d_px;}
350  inline double getBoxNx() {return d_nx;}
351  inline double getBoxPy() {return d_py;}
352  inline double getBoxNy() {return d_ny;}
353 
354  void canonicalizeOrientation();
355 
356 
357  private:
358 
359  double totalDensity();
360 
361  void embedFusedRings(const RDKit::VECT_INT_VECT &fusedRings);
362 
363  //! \brief Find a transform to join a ring to the current embedded frag when we
364  //! have only on common atom
365  /*!
366  So this is the state of affairs assumed here:
367  - we already have some rings in the fused system embeded and the
368  coordinates for the atoms
369  - the coordinates for the atoms in the new ring (with the center
370  of rings at the origin) are available nringCors. we want to
371  translate and rotate this ring to join with the already
372  embeded rings.
373  - only one atom is common between this new ring and the atoms
374  that are already embedded
375  - so we need to compute a transform that includes a translation
376  so that the common atom overlaps and the rotation to minimize
377  overalp with other atoms.
378 
379  Here's what is done:
380  - we bisect the remaining sweep angle at the common atom and
381  attach the new ring such that the center of the new ring falls
382  on this bisecting line
383 
384  NOTE: It is assumed here that the original coordinates for the
385  new ring are such that the center is at the origin (this is the
386  way rings come out of embedRing)
387  */
388  RDGeom::Transform2D computeOneAtomTrans(unsigned int commAid,
389  const EmbeddedFrag &other);
390 
391 
392  RDGeom::Transform2D computeTwoAtomTrans(unsigned int aid1, unsigned int aid2,
393  const RDGeom::INT_POINT2D_MAP &nringCor);
394 
395  //! Merge a ring with already embedded atoms
396  /*!
397  It is assumed that the new rings has already been oriented
398  correctly etc. This function just update all the relevant data,
399  like the neighbor information and the sweep angle
400  */
401  void mergeRing(const EmbeddedFrag &embRing, unsigned int nCommon,
402  const RDKit::INT_VECT &pinAtoms);
403 
404  //! Reflect a fragment if necessary through a line connecting two atoms
405  /*!
406 
407  We want add the new fragment such that, most of its atoms fall
408  on the side opoiste to where the atoms already embedded are aid1
409  and aid2 give the atoms that were used to align the new ring to
410  the embedded atoms and we will assume that that process has
411  already taken place (i.e. transformRing has been called)
412 
413  */
414  void reflectIfNecessaryDensity(EmbeddedFrag &embFrag, unsigned int aid1,
415  unsigned int aid2);
416 
417  //! Reflect a fragment if necessary based on the cis/trans specification
418  /*!
419 
420  we want to add the new fragment such that the cis/trans
421  specification on bond between aid1 and aid2 is not violated. We
422  will assume that aid1 and aid2 from this fragments as well as
423  embFrag are already aligned to each other.
424 
425  \param embFrag the fragment that will be reflected if necessary
426  \param ctCase which fragment if the cis/trans dbl bond
427  - 1 means embFrag is the cis/trans fragment
428  - 2 mean "this" is the cis/trans fragment
429  \param aid1 first atom that forms the plane (line) of reflection
430  \param aid2 seconf atom that forms the plane of reflection
431  */
432  void reflectIfNecessaryCisTrans(EmbeddedFrag &embFrag, unsigned int ctCase,
433  unsigned int aid1, unsigned int aid2);
434 
435  //! Reflect a fragment if necessary based on a thrid common point
436  /*!
437 
438  we want add the new fragment such that the thrid point falls on
439  the same side of aid1 and aid2. We will assume that aid1 and
440  aid2 from this fragments as well as embFrag are already aligned
441  to each other.
442 
443  */
444  void reflectIfNecessaryThirdPt(EmbeddedFrag &embFrag, unsigned int aid1, unsigned int aid2,
445  unsigned int aid3);
446 
447  //! \brief Initialize this fragment from a ring and coordinates for its atoms
448  /*!
449  ARGUMENTS:
450  /param ring a vector of atom ids in the ring; it is assumed that there in
451  clockwise or anti-clockwise order
452  /param nringMap a map of atomId to coordinate map for the atoms in the ring
453  */
454  void initFromRingCoords(const RDKit::INT_VECT &ring,
455  const RDGeom::INT_POINT2D_MAP &nringMap);
456 
457  //! Helper function to addNonRingAtom to a specified atoms in the fragment
458  /*
459  Add an atom to this embedded fragment when the fragment already
460  has a atleast two previously added neighbors to 'toAid'. In this
461  case we have to choose where the the new neighbor goes based on
462  the angle that is already taken around the atom.
463 
464  ARGUMENTS:
465  \param aid ID of the atom to be added
466  \param toAid ID of the atom that is already in this object to which this atom is added
467  */
468  void addAtomToAtomWithAng(unsigned int aid, unsigned int toAid);
469 
470 
471  //! Helper function to addNonRingAtom to a specified atoms in the fragment
472  /*!
473 
474  Add an atom (aid) to an atom (toAid) in this embedded fragment
475  when 'toAid' has one or no neighbors previously added. In this
476  case where the new atom should fall is determined by the degree
477  of 'toAid' and the congestion around it.
478 
479  ARGUMENTS:
480  \param aid ID of the atom to be added
481  \param toAid ID of the atom that is already in this object to which this atom is added
482  \param mol the molecule we are dealing with
483  */
484  void addAtomToAtomWithNoAng(unsigned int aid,
485  unsigned int toAid); //, const RDKit::ROMol *mol);
486 
487  //! Helper funtion to contructor that takes predefined coordinates
488  /*!
489 
490  Given an atom with more than 2 neighbors all embedded in this
491  fragment this function tries to determine
492 
493  - how much of an angle if left for any new neighbors yet to be
494  added
495  - which atom should we rotate when we add a new neighbor and in
496  which direction (clockwise or anticlockwise
497 
498  This is how it works
499  - find the pair of nbrs that have the largest angle
500  - this will most likely be the angle that is available - unless
501  we have fused rings and we found on of the ring angle !!!! -
502  in this cae we find the next best
503  - find the smallest anngle that contains one of these nbrs -
504  this determined which
505  - way we want to rotate
506 
507  ARGUMENTS:
508  \param aid the atom id where we are centered right now
509  \param doneNbrs list of neighbors that are already embedded around aid
510  */
511  void computeNbrsAndAng(unsigned int aid, const RDKit::INT_VECT &doneNbrs);
512  //const RDKit::ROMol *mol);
513 
514  //! are we embedded with the final (molecule) coordinates
515  bool d_done;
516  double d_px, d_nx, d_py, d_ny;
517 
518  //! a map that takes one from teh atom id to the embeddedatom object for that atom.
519  INT_EATOM_MAP d_eatoms;
520 
521  //RDKit::INT_DEQUE d_attachPts;
522  RDKit::INT_LIST d_attachPts;
523 
524  // pointer to the owning molecule
525  const RDKit::ROMol *dp_mol;
526 
527  };
528 
529 
530 }
531 
532 #endif
std::list< int > INT_LIST
Definition: types.h:152
boost::shared_array< double > DOUBLE_SMART_PTR
Definition: EmbeddedFrag.h:26
EmbeddedAtom & operator=(const EmbeddedAtom &other)
Definition: EmbeddedFrag.h:47
const RDKit::ROMol * getMol() const
Get the molecule that this embedded fragmetn blongs to.
Definition: EmbeddedFrag.h:243
EmbeddedAtom(unsigned int aid, const RDGeom::Point2D &pos)
Definition: EmbeddedFrag.h:41
RDGeom::Point2D loc
Definition: EmbeddedFrag.h:103
RDGeom::Point2D normal
Definition: EmbeddedFrag.h:108
void Transform(const RDGeom::Transform2D &trans)
Definition: EmbeddedFrag.h:61
void Translate(const RDGeom::Point2D &shift)
Definition: EmbeddedFrag.h:275
ROMol is a molecule class that is intended to have a fixed topology.
Definition: ROMol.h:105
std::vector< INT_VECT > VECT_INT_VECT
Definition: types.h:160
std::map< int, Point2D > INT_POINT2D_MAP
Definition: point.h:533
const INT_EATOM_MAP & GetEmbeddedAtoms() const
Definition: EmbeddedFrag.h:271
std::vector< int > INT_VECT
Definition: types.h:146
Class containing a fragment of a molecule that has already been embedded.
Definition: EmbeddedFrag.h:134
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
Class that contains the data for an atoms that has alredy been embedded.
Definition: EmbeddedFrag.h:29
int Size() const
the number of atoms in the embedded system
Definition: EmbeddedFrag.h:291
int nbr2
the second neighbor of atom that from the &#39;angle&#39;
Definition: EmbeddedFrag.h:87
class for representing a bond
Definition: Bond.h:46
std::map< unsigned int, EmbeddedAtom > INT_EATOM_MAP
Definition: EmbeddedFrag.h:121
RDGeom::Point2D reflectPoint(const RDGeom::Point2D &point, const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2)
EmbeddedFrag()
Default constructor.
Definition: EmbeddedFrag.h:140
INT_EATOM_MAP::const_iterator INT_EATOM_MAP_CI
Definition: EmbeddedFrag.h:123
void Reflect(const RDGeom::Point2D &loc1, const RDGeom::Point2D &loc2)
Definition: EmbeddedFrag.h:68
void markDone()
Mark this fragment to be done for final embedding.
Definition: EmbeddedFrag.h:233
bool isDone()
If this fragment done for the final embedding.
Definition: EmbeddedFrag.h:238
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
EmbeddedAtom GetEmbeddedAtom(unsigned int aid) const
Definition: EmbeddedFrag.h:282
INT_EATOM_MAP::iterator INT_EATOM_MAP_I
Definition: EmbeddedFrag.h:122
RDKit::INT_VECT neighs
and these are the atom IDs of the neighbors that still need to be embedded
Definition: EmbeddedFrag.h:111
void TransformPoint(Point2D &pt) const
int nbr1
the first neighbor of this atom that form the &#39;angle&#39;
Definition: EmbeddedFrag.h:84